From daae43e9f98d25cde86d829d600110176db2414f Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Fri, 29 Jul 2022 10:11:49 +0200 Subject: [PATCH] Bug fix: Filter failure detection notification was interfering with change-detection results, added test case (#786) --- .../tests/test_filter_exist_changes.py | 134 ++++++++++++++++++ .../tests/test_filter_failure_notification.py | 12 +- changedetectionio/update_worker.py | 52 ++++--- 3 files changed, 179 insertions(+), 19 deletions(-) create mode 100644 changedetectionio/tests/test_filter_exist_changes.py diff --git a/changedetectionio/tests/test_filter_exist_changes.py b/changedetectionio/tests/test_filter_exist_changes.py new file mode 100644 index 00000000..261686ed --- /dev/null +++ b/changedetectionio/tests/test_filter_exist_changes.py @@ -0,0 +1,134 @@ +#!/usr/bin/python3 + +# https://www.reddit.com/r/selfhosted/comments/wa89kp/comment/ii3a4g7/?context=3 +import os +import time +from flask import url_for +from .util import set_original_response, live_server_setup +from changedetectionio.model import App + + +def set_response_without_filter(): + test_return_data = """ + + Some initial text
+

Which is across multiple lines

+
+ So let's see what happens.
+
Some text thats the same
+ + + """ + + with open("test-datastore/endpoint-content.txt", "w") as f: + f.write(test_return_data) + return None + + +def set_response_with_filter(): + test_return_data = """ + + Some initial text
+

Which is across multiple lines

+
+ So let's see what happens.
+
Ticket now on sale!
+ + + """ + + with open("test-datastore/endpoint-content.txt", "w") as f: + f.write(test_return_data) + return None + +def test_filter_doesnt_exist_then_exists_should_get_notification(client, live_server): +# Filter knowingly doesn't exist, like someone setting up a known filter to see if some cinema tickets are on sale again +# And the page has that filter available +# Then I should get a notification + + live_server_setup(live_server) + + # Give the endpoint time to spin up + time.sleep(1) + set_response_without_filter() + + # Add our URL to the import page + test_url = url_for('test_endpoint', _external=True) + res = client.post( + url_for("form_quick_watch_add"), + data={"url": test_url, "tag": 'cinema'}, + follow_redirects=True + ) + assert b"Watch added" in res.data + + # Give the thread time to pick up the first version + time.sleep(3) + + # Goto the edit page, add our ignore text + # Add our URL to the import page + url = url_for('test_notification_endpoint', _external=True) + notification_url = url.replace('http', 'json') + + print(">>>> Notification URL: " + notification_url) + + # Just a regular notification setting, this will be used by the special 'filter not found' notification + notification_form_data = {"notification_urls": notification_url, + "notification_title": "New ChangeDetection.io Notification - {watch_url}", + "notification_body": "BASE URL: {base_url}\n" + "Watch URL: {watch_url}\n" + "Watch UUID: {watch_uuid}\n" + "Watch title: {watch_title}\n" + "Watch tag: {watch_tag}\n" + "Preview: {preview_url}\n" + "Diff URL: {diff_url}\n" + "Snapshot: {current_snapshot}\n" + "Diff: {diff}\n" + "Diff Full: {diff_full}\n" + ":-)", + "notification_format": "Text"} + + notification_form_data.update({ + "url": test_url, + "tag": "my tag", + "title": "my title", + "headers": "", + "css_filter": '.ticket-available', + "fetch_backend": "html_requests"}) + + res = client.post( + url_for("edit_page", uuid="first"), + data=notification_form_data, + follow_redirects=True + ) + assert b"Updated watch." in res.data + time.sleep(3) + + # Shouldn't exist, shouldn't have fired + assert not os.path.isfile("test-datastore/notification.txt") + # Now the filter should exist + set_response_with_filter() + client.get(url_for("form_watch_checknow"), follow_redirects=True) + time.sleep(3) + + assert os.path.isfile("test-datastore/notification.txt") + + with open("test-datastore/notification.txt", 'r') as f: + notification = f.read() + + assert 'Ticket now on sale' in notification + os.unlink("test-datastore/notification.txt") + + + # Test that if it gets removed, then re-added, we get a notification + # Remove the target and re-add it, we should get a new notification + set_response_without_filter() + client.get(url_for("form_watch_checknow"), follow_redirects=True) + time.sleep(3) + assert not os.path.isfile("test-datastore/notification.txt") + + set_response_with_filter() + client.get(url_for("form_watch_checknow"), follow_redirects=True) + time.sleep(3) + assert os.path.isfile("test-datastore/notification.txt") + +# Also test that the filter was updated after the first one was requested diff --git a/changedetectionio/tests/test_filter_failure_notification.py b/changedetectionio/tests/test_filter_failure_notification.py index da4c9720..812f288b 100644 --- a/changedetectionio/tests/test_filter_failure_notification.py +++ b/changedetectionio/tests/test_filter_failure_notification.py @@ -26,6 +26,13 @@ def run_filter_test(client, content_filter): # Give the endpoint time to spin up time.sleep(1) + # cleanup for the next + client.get( + url_for("form_delete", uuid="all"), + follow_redirects=True + ) + if os.path.isfile("test-datastore/notification.txt"): + os.unlink("test-datastore/notification.txt") # Add our URL to the import page test_url = url_for('test_endpoint', _external=True) @@ -34,6 +41,7 @@ def run_filter_test(client, content_filter): data={"url": test_url, "tag": ''}, follow_redirects=True ) + assert b"Watch added" in res.data # Give the thread time to pick up the first version @@ -67,6 +75,7 @@ def run_filter_test(client, content_filter): "tag": "my tag", "title": "my title", "headers": "", + "filter_failure_notification_send": 'y', "css_filter": content_filter, "fetch_backend": "html_requests"}) @@ -86,7 +95,7 @@ def run_filter_test(client, content_filter): time.sleep(3) # We should see something in the frontend - assert b'Did the page change its layout' in res.data + assert b'Warning, filter' in res.data # Now it should exist and contain our "filter not found" alert assert os.path.isfile("test-datastore/notification.txt") @@ -132,3 +141,4 @@ def test_check_xpath_filter_failure_notification(client, live_server): time.sleep(1) run_filter_test(client, '//*[@id="nope-doesnt-exist"]') +# Test that notification is never sent \ No newline at end of file diff --git a/changedetectionio/update_worker.py b/changedetectionio/update_worker.py index 41b1ff91..02ea036c 100644 --- a/changedetectionio/update_worker.py +++ b/changedetectionio/update_worker.py @@ -65,10 +65,12 @@ class update_worker(threading.Thread): if uuid in list(self.datastore.data['watching'].keys()): changed_detected = False - contents = "" + contents = b'' screenshot = False update_obj= {} xpath_data = False + process_changedetection_results = True + now = time.time() try: @@ -80,26 +82,35 @@ class update_worker(threading.Thread): raise Exception("Error - returned data from the fetch handler SHOULD be bytes") except PermissionError as e: self.app.logger.error("File permission error updating", uuid, str(e)) + process_changedetection_results = False except content_fetcher.ReplyWithContentButNoText as e: # Totally fine, it's by choice - just continue on, nothing more to care about # Page had elements/content but no renderable text + # Backend (not filters) gave zero output self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Got HTML content but no text found."}) + process_changedetection_results = False + except FilterNotFoundInResponse as e: - err_text = "Filter '{}' not found - Did the page change its layout?".format(str(e)) - c = 0 - if self.datastore.data['watching'].get(uuid, False): - c = self.datastore.data['watching'][uuid].get('consecutive_filter_failures', 5) - c += 1 + err_text = "Warning, filter '{}' not found".format(str(e)) + self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, + # So that we get a trigger when the content is added again + 'previous_md5': ''}) - # Send notification if we reached the threshold? - threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', 0) - print("Filter for {} not found, consecutive_filter_failures: {}".format(uuid, c)) - if threshold >0 and c >= threshold: - self.send_filter_failure_notification(uuid) - c = 0 + # Only when enabled, send the notification + if self.datastore.data['watching'][uuid].get('filter_failure_notification_send', False): + c = self.datastore.data['watching'][uuid].get('consecutive_filter_failures', 5) + c += 1 + # Send notification if we reached the threshold? + threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', + 0) + print("Filter for {} not found, consecutive_filter_failures: {}".format(uuid, c)) + if threshold > 0 and c >= threshold: + self.send_filter_failure_notification(uuid) + c = 0 + self.datastore.update_watch(uuid=uuid, update_obj={'consecutive_filter_failures': c}) + + process_changedetection_results = True - self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, - 'consecutive_filter_failures': c}) except content_fetcher.EmptyReply as e: # Some kind of custom to-str handler in the exception handler that does this? err_text = "EmptyReply - try increasing 'Wait seconds before extracting text', Status Code {}".format(e.status_code) @@ -109,6 +120,7 @@ class update_worker(threading.Thread): err_text = "Screenshot unavailable, page did not render fully in the expected time - try increasing 'Wait seconds before extracting text'" self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, 'last_check_status': e.status_code}) + process_changedetection_results = False except content_fetcher.PageUnloadable as e: err_text = "Page request from server didnt respond correctly" self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, @@ -116,8 +128,14 @@ class update_worker(threading.Thread): except Exception as e: self.app.logger.error("Exception reached processing watch UUID: %s - %s", uuid, str(e)) self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)}) - + # Other serious error + process_changedetection_results = False else: + # Mark that we never had any failures + update_obj['consecutive_filter_failures'] = 0 + + # Different exceptions mean that we may or may not want to bump the snapshot, trigger notifications etc + if process_changedetection_results: try: watch = self.datastore.data['watching'][uuid] fname = "" # Saved history text filename @@ -127,8 +145,6 @@ class update_worker(threading.Thread): # A change was detected fname = watch.save_history_text(contents=contents, timestamp=str(round(time.time()))) - # Generally update anything interesting returned - update_obj['consecutive_filter_failures'] = 0 self.datastore.update_watch(uuid=uuid, update_obj=update_obj) # A change was detected @@ -194,7 +210,7 @@ class update_worker(threading.Thread): self.app.logger.error("Exception reached processing watch UUID: %s - %s", uuid, str(e)) self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)}) - finally: + # Always record that we atleast tried self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3), 'last_checked': round(time.time())})