From 8ecfd17973a07f2dc6a71bd8095f3113c9dab334 Mon Sep 17 00:00:00 2001 From: Felipe Tuffani <8229714+fetuffani@users.noreply.github.com> Date: Tue, 17 Sep 2024 06:22:54 -0300 Subject: [PATCH 1/3] Restock/Price detection - Fix duplicated prices with different data type on single page product #2636 (#2638) --- .../processors/restock_diff/processor.py | 21 ++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/changedetectionio/processors/restock_diff/processor.py b/changedetectionio/processors/restock_diff/processor.py index 3d8e4349..88203b51 100644 --- a/changedetectionio/processors/restock_diff/processor.py +++ b/changedetectionio/processors/restock_diff/processor.py @@ -26,6 +26,25 @@ def _search_prop_by_value(matches, value): if value in prop[0]: return prop[1] # Yield the desired value and exit the function +def _deduplicate_prices(data): + seen = set() + unique_data = [] + + for datum in data: + # Convert 'value' to float if it can be a numeric string, otherwise leave it as is + try: + normalized_value = float(datum.value) if isinstance(datum.value, str) and datum.value.replace('.', '', 1).isdigit() else datum.value + except ValueError: + normalized_value = datum.value + + # If the normalized value hasn't been seen yet, add it to unique data + if normalized_value not in seen: + unique_data.append(datum) + seen.add(normalized_value) + + return unique_data + + # should return Restock() # add casting? def get_itemprop_availability(html_content) -> Restock: @@ -60,7 +79,7 @@ def get_itemprop_availability(html_content) -> Restock: pricecurrency_parse = parse('$..(pricecurrency|currency|priceCurrency )') availability_parse = parse('$..(availability|Availability)') - price_result = price_parse.find(data) + price_result = _deduplicate_prices(price_parse.find(data)) if price_result: # Right now, we just support single product items, maybe we will store the whole actual metadata seperately in teh future and # parse that for the UI? From 273adfa0a460dcb615d811d814c361930e7b45d1 Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Tue, 17 Sep 2024 16:55:04 +0200 Subject: [PATCH 2/3] Testing - Fix false filter missing check alerts --- .../{apprise => apprise_plugin}/__init__.py | 1 - changedetectionio/flask_app.py | 2 +- changedetectionio/forms.py | 2 +- changedetectionio/notification.py | 3 +- .../tests/test_filter_failure_notification.py | 126 ++++++++++-------- changedetectionio/update_worker.py | 20 ++- 6 files changed, 86 insertions(+), 68 deletions(-) rename changedetectionio/{apprise => apprise_plugin}/__init__.py (99%) diff --git a/changedetectionio/apprise/__init__.py b/changedetectionio/apprise_plugin/__init__.py similarity index 99% rename from changedetectionio/apprise/__init__.py rename to changedetectionio/apprise_plugin/__init__.py index 130b1322..93c382fa 100644 --- a/changedetectionio/apprise/__init__.py +++ b/changedetectionio/apprise_plugin/__init__.py @@ -1,4 +1,3 @@ - # include the decorator from apprise.decorators import notify diff --git a/changedetectionio/flask_app.py b/changedetectionio/flask_app.py index 6324b58b..eb9d7799 100644 --- a/changedetectionio/flask_app.py +++ b/changedetectionio/flask_app.py @@ -538,7 +538,7 @@ def changedetection_app(config=None, datastore_o=None): from .apprise_asset import asset apobj = apprise.Apprise(asset=asset) # so that the custom endpoints are registered - from changedetectionio.apprise import apprise_custom_api_call_wrapper + from changedetectionio.apprise_plugin import apprise_custom_api_call_wrapper is_global_settings_form = request.args.get('mode', '') == 'global-settings' is_group_settings_form = request.args.get('mode', '') == 'group-settings' diff --git a/changedetectionio/forms.py b/changedetectionio/forms.py index ce2841de..1b718cfe 100644 --- a/changedetectionio/forms.py +++ b/changedetectionio/forms.py @@ -222,7 +222,7 @@ class ValidateAppRiseServers(object): import apprise apobj = apprise.Apprise() # so that the custom endpoints are registered - from changedetectionio.apprise import apprise_custom_api_call_wrapper + from changedetectionio.apprise_plugin import apprise_custom_api_call_wrapper for server_url in field.data: if not apobj.add(server_url): message = field.gettext('\'%s\' is not a valid AppRise URL.' % (server_url)) diff --git a/changedetectionio/notification.py b/changedetectionio/notification.py index 54e682ae..a7328857 100644 --- a/changedetectionio/notification.py +++ b/changedetectionio/notification.py @@ -39,7 +39,8 @@ valid_notification_formats = { def process_notification(n_object, datastore): # so that the custom endpoints are registered - from changedetectionio.apprise import apprise_custom_api_call_wrapper + from changedetectionio.apprise_plugin import apprise_custom_api_call_wrapper + from .safe_jinja import render as jinja_render now = time.time() if n_object.get('notification_timestamp'): diff --git a/changedetectionio/tests/test_filter_failure_notification.py b/changedetectionio/tests/test_filter_failure_notification.py index 24ae405f..7d3a8bd0 100644 --- a/changedetectionio/tests/test_filter_failure_notification.py +++ b/changedetectionio/tests/test_filter_failure_notification.py @@ -1,5 +1,6 @@ import os import time +from loguru import logger from flask import url_for from .util import set_original_response, live_server_setup, extract_UUID_from_client, wait_for_all_checks, \ wait_for_notification_endpoint_output @@ -27,6 +28,12 @@ def run_filter_test(client, live_server, content_filter): # Response WITHOUT the filter ID element set_original_response() + # Goto the edit page, add our ignore text + notification_url = url_for('test_notification_endpoint', _external=True).replace('http', 'json') + + # Add our URL to the import page + test_url = url_for('test_endpoint', _external=True) + # cleanup for the next client.get( url_for("form_delete", uuid="all"), @@ -35,84 +42,90 @@ def run_filter_test(client, live_server, content_filter): if os.path.isfile("test-datastore/notification.txt"): os.unlink("test-datastore/notification.txt") - # Add our URL to the import page - test_url = url_for('test_endpoint', _external=True) res = client.post( - url_for("form_quick_watch_add"), - data={"url": test_url, "tags": ''}, + url_for("import_page"), + data={"urls": test_url}, follow_redirects=True ) - assert b"Watch added" in res.data - - # Give the thread time to pick up the first version + assert b"1 Imported" in res.data wait_for_all_checks(client) - # Goto the edit page, add our ignore text - # Add our URL to the import page - url = url_for('test_notification_endpoint', _external=True) - notification_url = url.replace('http', 'json') - - print(">>>> Notification URL: " + notification_url) - - # Just a regular notification setting, this will be used by the special 'filter not found' notification - notification_form_data = {"notification_urls": notification_url, - "notification_title": "New ChangeDetection.io Notification - {{watch_url}}", - "notification_body": "BASE URL: {{base_url}}\n" - "Watch URL: {{watch_url}}\n" - "Watch UUID: {{watch_uuid}}\n" - "Watch title: {{watch_title}}\n" - "Watch tag: {{watch_tag}}\n" - "Preview: {{preview_url}}\n" - "Diff URL: {{diff_url}}\n" - "Snapshot: {{current_snapshot}}\n" - "Diff: {{diff}}\n" - "Diff Full: {{diff_full}}\n" - "Diff as Patch: {{diff_patch}}\n" - ":-)", - "notification_format": "Text"} - - notification_form_data.update({ - "url": test_url, - "tags": "my tag", - "title": "my title 123", - "headers": "", - "filter_failure_notification_send": 'y', - "include_filters": content_filter, - "fetch_backend": "html_requests"}) - - # A POST here will also reset the filter failure counter (filter_failure_notification_threshold_attempts) + uuid = extract_UUID_from_client(client) + + assert live_server.app.config['DATASTORE'].data['watching'][uuid]['consecutive_filter_failures'] == 0, "No filter = No filter failure" + + watch_data = {"notification_urls": notification_url, + "notification_title": "New ChangeDetection.io Notification - {{watch_url}}", + "notification_body": "BASE URL: {{base_url}}\n" + "Watch URL: {{watch_url}}\n" + "Watch UUID: {{watch_uuid}}\n" + "Watch title: {{watch_title}}\n" + "Watch tag: {{watch_tag}}\n" + "Preview: {{preview_url}}\n" + "Diff URL: {{diff_url}}\n" + "Snapshot: {{current_snapshot}}\n" + "Diff: {{diff}}\n" + "Diff Full: {{diff_full}}\n" + "Diff as Patch: {{diff_patch}}\n" + ":-)", + "notification_format": "Text", + "fetch_backend": "html_requests", + "filter_failure_notification_send": 'y', + "headers": "", + "tags": "my tag", + "title": "my title 123", + "time_between_check-hours": 5, # So that the queue runner doesnt also put it in + "url": test_url, + } + res = client.post( - url_for("edit_page", uuid="first"), - data=notification_form_data, + url_for("edit_page", uuid=uuid), + data=watch_data, follow_redirects=True ) - assert b"Updated watch." in res.data wait_for_all_checks(client) + assert live_server.app.config['DATASTORE'].data['watching'][uuid]['consecutive_filter_failures'] == 0, "No filter = No filter failure" + + # Now add a filter, because recheck hours == 5, ONLY pressing of the [edit] or [recheck all] should trigger + watch_data['include_filters'] = content_filter + res = client.post( + url_for("edit_page", uuid=uuid), + data=watch_data, + follow_redirects=True + ) + assert b"Updated watch." in res.data + + # It should have checked once so far and given this error (because we hit SAVE) - # Now the notification should not exist, because we didnt reach the threshold + wait_for_all_checks(client) assert not os.path.isfile("test-datastore/notification.txt") + # Hitting [save] would have triggered a recheck, and we have a filter, so this would be ONE failure + assert live_server.app.config['DATASTORE'].data['watching'][uuid]['consecutive_filter_failures'] == 1, "Should have been checked once" + # recheck it up to just before the threshold, including the fact that in the previous POST it would have rechecked (and incremented) - for i in range(0, App._FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT-2): + # Add 4 more checks + checked = 0 + ATTEMPT_THRESHOLD_SETTING = live_server.app.config['DATASTORE'].data['settings']['application'].get('filter_failure_notification_threshold_attempts', 0) + for i in range(0, ATTEMPT_THRESHOLD_SETTING - 2): + checked += 1 client.get(url_for("form_watch_checknow"), follow_redirects=True) wait_for_all_checks(client) - time.sleep(2) # delay for apprise to fire - assert not os.path.isfile("test-datastore/notification.txt"), f"test-datastore/notification.txt should not exist - Attempt {i} when threshold is {App._FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT}" + res = client.get(url_for("index")) + assert b'Warning, no filters were found' in res.data + assert not os.path.isfile("test-datastore/notification.txt") - # We should see something in the frontend - res = client.get(url_for("index")) - assert b'Warning, no filters were found' in res.data + assert live_server.app.config['DATASTORE'].data['watching'][uuid]['consecutive_filter_failures'] == 5 # One more check should trigger the _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT threshold client.get(url_for("form_watch_checknow"), follow_redirects=True) wait_for_all_checks(client) - wait_for_notification_endpoint_output() + # Now it should exist and contain our "filter not found" alert assert os.path.isfile("test-datastore/notification.txt") - with open("test-datastore/notification.txt", 'r') as f: notification = f.read() @@ -125,7 +138,7 @@ def run_filter_test(client, live_server, content_filter): set_response_with_filter() # Try several times, it should NOT have 'filter not found' - for i in range(0, App._FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT): + for i in range(0, ATTEMPT_THRESHOLD_SETTING + 2): client.get(url_for("form_watch_checknow"), follow_redirects=True) wait_for_all_checks(client) @@ -138,9 +151,6 @@ def run_filter_test(client, live_server, content_filter): assert not 'CSS/xPath filter was not present in the page' in notification # Re #1247 - All tokens got replaced correctly in the notification - res = client.get(url_for("index")) - uuid = extract_UUID_from_client(client) - # UUID is correct, but notification contains tag uuid as UUIID wtf assert uuid in notification # cleanup for the next @@ -155,9 +165,11 @@ def test_setup(live_server): live_server_setup(live_server) def test_check_include_filters_failure_notification(client, live_server, measure_memory_usage): +# live_server_setup(live_server) run_filter_test(client, live_server,'#nope-doesnt-exist') def test_check_xpath_filter_failure_notification(client, live_server, measure_memory_usage): +# live_server_setup(live_server) run_filter_test(client, live_server, '//*[@id="nope-doesnt-exist"]') # Test that notification is never sent diff --git a/changedetectionio/update_worker.py b/changedetectionio/update_worker.py index ba183848..fab27548 100644 --- a/changedetectionio/update_worker.py +++ b/changedetectionio/update_worker.py @@ -189,7 +189,9 @@ class update_worker(threading.Thread): 'screenshot': None }) self.notification_q.put(n_object) - logger.error(f"Sent filter not found notification for {watch_uuid}") + logger.debug(f"Sent filter not found notification for {watch_uuid}") + else: + logger.debug(f"NOT sending filter not found notification for {watch_uuid} - no notification URLs") def send_step_failure_notification(self, watch_uuid, step_n): watch = self.datastore.data['watching'].get(watch_uuid, False) @@ -364,18 +366,22 @@ class update_worker(threading.Thread): # Only when enabled, send the notification if watch.get('filter_failure_notification_send', False): - c = watch.get('consecutive_filter_failures', 5) + c = watch.get('consecutive_filter_failures', 0) c += 1 # Send notification if we reached the threshold? - threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', - 0) - logger.warning(f"Filter for {uuid} not found, consecutive_filter_failures: {c}") - if threshold > 0 and c >= threshold: + threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', 0) + logger.debug(f"Filter for {uuid} not found, consecutive_filter_failures: {c} of threshold {threshold}") + if c >= threshold: if not watch.get('notification_muted'): + logger.debug(f"Sending filter failed notification for {uuid}") self.send_filter_failure_notification(uuid) c = 0 + logger.debug(f"Reset filter failure count back to zero") self.datastore.update_watch(uuid=uuid, update_obj={'consecutive_filter_failures': c}) + else: + logger.trace(f"{uuid} - filter_failure_notification_send not enabled, skipping") + process_changedetection_results = False @@ -422,7 +428,7 @@ class update_worker(threading.Thread): ) if watch.get('filter_failure_notification_send', False): - c = watch.get('consecutive_filter_failures', 5) + c = watch.get('consecutive_filter_failures', 0) c += 1 # Send notification if we reached the threshold? threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', From 543cb205d2d1276bd7e71c1f943acf2da67b9f04 Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Tue, 17 Sep 2024 18:29:12 +0200 Subject: [PATCH 3/3] Testing - Fixing Restock test #2641 --- changedetectionio/tests/test_restock_itemprop.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/changedetectionio/tests/test_restock_itemprop.py b/changedetectionio/tests/test_restock_itemprop.py index c873aa22..eb2e731b 100644 --- a/changedetectionio/tests/test_restock_itemprop.py +++ b/changedetectionio/tests/test_restock_itemprop.py @@ -146,14 +146,13 @@ def _run_test_minmax_limit(client, extra_watch_edit_form): data={"url": test_url, "tags": 'restock tests', 'processor': 'restock_diff'}, follow_redirects=True ) - - # A change in price, should trigger a change by default wait_for_all_checks(client) data = { "tags": "", "url": test_url, "headers": "", + "time_between_check-hours": 5, 'fetch_backend': "html_requests" } data.update(extra_watch_edit_form) @@ -178,12 +177,9 @@ def _run_test_minmax_limit(client, extra_watch_edit_form): assert b'1,000.45' or b'1000.45' in res.data #depending on locale assert b'unviewed' not in res.data - # price changed to something LESS than min (900), SHOULD be a change set_original_response(props_markup=instock_props[0], price='890.45') - # let previous runs wait - time.sleep(2) - + res = client.get(url_for("form_watch_checknow"), follow_redirects=True) assert b'1 watches queued for rechecking.' in res.data wait_for_all_checks(client)