From 66fb05527b6112f5d8b184c3dc14f82117f9fcbe Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Mon, 6 Jan 2025 20:38:50 +0100 Subject: [PATCH] Improve `last_checked` vs `last_changed` time information precision (#2883) --- changedetectionio/api/api_v1.py | 1 + changedetectionio/tests/test_api.py | 7 ++-- changedetectionio/tests/test_backend.py | 1 - .../tests/test_nonrenderable_pages.py | 14 +++++++- changedetectionio/tests/util.py | 8 +++++ changedetectionio/update_worker.py | 35 +++++++++++-------- 6 files changed, 47 insertions(+), 19 deletions(-) diff --git a/changedetectionio/api/api_v1.py b/changedetectionio/api/api_v1.py index 97e58abb..fc09bb1b 100644 --- a/changedetectionio/api/api_v1.py +++ b/changedetectionio/api/api_v1.py @@ -76,6 +76,7 @@ class Watch(Resource): # Return without history, get that via another API call # Properties are not returned as a JSON, so add the required props manually watch['history_n'] = watch.history_n + # attr .last_changed will check for the last written text snapshot on change watch['last_changed'] = watch.last_changed watch['viewed'] = watch.viewed return watch diff --git a/changedetectionio/tests/test_api.py b/changedetectionio/tests/test_api.py index 55ea4635..6b39822b 100644 --- a/changedetectionio/tests/test_api.py +++ b/changedetectionio/tests/test_api.py @@ -44,7 +44,6 @@ def set_modified_response(): return None - def is_valid_uuid(val): try: uuid.UUID(str(val)) @@ -56,8 +55,9 @@ def is_valid_uuid(val): def test_setup(client, live_server, measure_memory_usage): live_server_setup(live_server) + def test_api_simple(client, live_server, measure_memory_usage): - #live_server_setup(live_server) +# live_server_setup(live_server) api_key = extract_api_key_from_UI(client) @@ -129,6 +129,9 @@ def test_api_simple(client, live_server, measure_memory_usage): assert after_recheck_info['last_checked'] != before_recheck_info['last_checked'] assert after_recheck_info['last_changed'] != 0 + # #2877 When run in a slow fetcher like playwright etc + assert after_recheck_info['last_changed'] == after_recheck_info['last_checked'] + # Check history index list res = client.get( url_for("watchhistory", uuid=watch_uuid), diff --git a/changedetectionio/tests/test_backend.py b/changedetectionio/tests/test_backend.py index 4a1d66fb..69100ff7 100644 --- a/changedetectionio/tests/test_backend.py +++ b/changedetectionio/tests/test_backend.py @@ -2,7 +2,6 @@ import time from flask import url_for -from urllib.request import urlopen from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks, extract_rss_token_from_UI, \ extract_UUID_from_client diff --git a/changedetectionio/tests/test_nonrenderable_pages.py b/changedetectionio/tests/test_nonrenderable_pages.py index 2d8d4425..ceb4791e 100644 --- a/changedetectionio/tests/test_nonrenderable_pages.py +++ b/changedetectionio/tests/test_nonrenderable_pages.py @@ -48,7 +48,7 @@ def test_check_basic_change_detection_functionality(client, live_server, measure ##################### client.post( url_for("settings_page"), - data={"application-empty_pages_are_a_change": "", + data={"application-empty_pages_are_a_change": "", # default, OFF, they are NOT a change "requests-time_between_check-minutes": 180, 'application-fetch_backend': "html_requests"}, follow_redirects=True @@ -66,6 +66,14 @@ def test_check_basic_change_detection_functionality(client, live_server, measure res = client.get(url_for("index")) assert b'unviewed' not in res.data + uuid = next(iter(live_server.app.config['DATASTORE'].data['watching'])) + watch = live_server.app.config['DATASTORE'].data['watching'][uuid] + + assert watch.last_changed == 0 + assert watch['last_checked'] != 0 + + + # ok now do the opposite @@ -92,6 +100,10 @@ def test_check_basic_change_detection_functionality(client, live_server, measure # A totally zero byte (#2528) response should also not trigger an error set_zero_byte_response() client.get(url_for("form_watch_checknow"), follow_redirects=True) + + # 2877 + assert watch.last_changed == watch['last_checked'] + wait_for_all_checks(client) res = client.get(url_for("index")) assert b'unviewed' in res.data # A change should have registered because empty_pages_are_a_change is ON diff --git a/changedetectionio/tests/util.py b/changedetectionio/tests/util.py index bf5305b1..e501c2f5 100644 --- a/changedetectionio/tests/util.py +++ b/changedetectionio/tests/util.py @@ -76,6 +76,14 @@ def set_more_modified_response(): return None +def set_empty_text_response(): + test_return_data = """""" + + with open("test-datastore/endpoint-content.txt", "w") as f: + f.write(test_return_data) + + return None + def wait_for_notification_endpoint_output(): '''Apprise can take a few seconds to fire''' #@todo - could check the apprise object directly instead of looking for this file diff --git a/changedetectionio/update_worker.py b/changedetectionio/update_worker.py index 38de3d88..28647bad 100644 --- a/changedetectionio/update_worker.py +++ b/changedetectionio/update_worker.py @@ -243,7 +243,6 @@ class update_worker(threading.Thread): os.unlink(full_path) def run(self): - now = time.time() while not self.app.config.exit.is_set(): update_handler = None @@ -254,6 +253,7 @@ class update_worker(threading.Thread): pass else: + fetch_start_time = time.time() uuid = queued_item_data.item.get('uuid') self.current_uuid = uuid if uuid in list(self.datastore.data['watching'].keys()) and self.datastore.data['watching'][uuid].get('url'): @@ -268,7 +268,6 @@ class update_worker(threading.Thread): watch = self.datastore.data['watching'].get(uuid) logger.info(f"Processing watch UUID {uuid} Priority {queued_item_data.priority} URL {watch['url']}") - now = time.time() try: # Processor is what we are using for detecting the "Change" @@ -288,6 +287,10 @@ class update_worker(threading.Thread): update_handler.call_browser() + # In reality, the actual time of when the change was detected could be a few seconds after this + # For example it should include when the page stopped rendering if using a playwright/chrome type fetch + fetch_start_time = time.time() + changed_detected, update_obj, contents = update_handler.run_changedetection(watch=watch) # Re #342 @@ -512,7 +515,7 @@ class update_worker(threading.Thread): if not self.datastore.data['watching'].get(uuid): continue - # + # Different exceptions mean that we may or may not want to bump the snapshot, trigger notifications etc if process_changedetection_results: @@ -525,8 +528,6 @@ class update_worker(threading.Thread): except Exception as e: logger.warning(f"UUID: {uuid} Extract as watch title was enabled, but couldn't find a <title>.") - # Now update after running everything - timestamp = round(time.time()) try: self.datastore.update_watch(uuid=uuid, update_obj=update_obj) @@ -542,24 +543,28 @@ class update_worker(threading.Thread): # Small hack so that we sleep just enough to allow 1 second between history snapshots # this is because history.txt indexes/keys snapshots by epoch seconds and we dont want dupe keys - - if watch.newest_history_key and int(timestamp) == int(watch.newest_history_key): + # @also - the keys are one per second at the most (for now) + if watch.newest_history_key and int(fetch_start_time) == int(watch.newest_history_key): logger.warning( - f"Timestamp {timestamp} already exists, waiting 1 seconds so we have a unique key in history.txt") - timestamp = str(int(timestamp) + 1) + f"Timestamp {fetch_start_time} already exists, waiting 1 seconds so we have a unique key in history.txt") + fetch_start_time += 1 time.sleep(1) watch.save_history_text(contents=contents, - timestamp=timestamp, + timestamp=int(fetch_start_time), snapshot_id=update_obj.get('previous_md5', 'none')) - if update_handler.fetcher.content: - watch.save_last_fetched_html(contents=update_handler.fetcher.content, timestamp=timestamp) + + empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False) + if update_handler.fetcher.content or (not update_handler.fetcher.content and empty_pages_are_a_change): + # attribute .last_changed is then based on this data + watch.save_last_fetched_html(contents=update_handler.fetcher.content, timestamp=int(fetch_start_time)) # Notifications should only trigger on the second time (first time, we gather the initial snapshot) if watch.history_n >= 2: logger.info(f"Change detected in UUID {uuid} - {watch['url']}") if not watch.get('notification_muted'): + # @todo only run this if notifications exist self.send_content_changed_notification(watch_uuid=uuid) except Exception as e: @@ -581,15 +586,15 @@ class update_worker(threading.Thread): except Exception as e: pass - self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3), - 'last_checked': round(time.time()), + self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - fetch_start_time, 3), + 'last_checked': int(fetch_start_time), 'check_count': count }) self.current_uuid = None # Done self.q.task_done() - logger.debug(f"Watch {uuid} done in {time.time()-now:.2f}s") + logger.debug(f"Watch {uuid} done in {time.time()-fetch_start_time:.2f}s") # Give the CPU time to interrupt time.sleep(0.1)