From 51fc81ad3e99f7106118b74cdec31c949be1f8bd Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Mon, 7 Oct 2024 17:48:40 +0200 Subject: [PATCH] Always process, dont skip when checksums were the same, saved a small amount of CPU but added complexity and issues --- changedetectionio/api/api_v1.py | 6 ++--- .../blueprint/price_data_follower/__init__.py | 2 +- changedetectionio/flask_app.py | 23 +++++++++---------- changedetectionio/processors/__init__.py | 2 +- .../processors/restock_diff/processor.py | 2 +- .../processors/text_json_diff/processor.py | 5 +--- changedetectionio/update_worker.py | 6 +---- 7 files changed, 19 insertions(+), 27 deletions(-) diff --git a/changedetectionio/api/api_v1.py b/changedetectionio/api/api_v1.py index 9b3eb440..97e58abb 100644 --- a/changedetectionio/api/api_v1.py +++ b/changedetectionio/api/api_v1.py @@ -58,7 +58,7 @@ class Watch(Resource): abort(404, message='No watch exists with the UUID of {}'.format(uuid)) if request.args.get('recheck'): - self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True})) + self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) return "OK", 200 if request.args.get('paused', '') == 'paused': self.datastore.data['watching'].get(uuid).pause() @@ -246,7 +246,7 @@ class CreateWatch(Resource): new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags) if new_uuid: - self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid, 'skip_when_checksum_same': True})) + self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid})) return {'uuid': new_uuid}, 201 else: return "Invalid or unsupported URL", 400 @@ -303,7 +303,7 @@ class CreateWatch(Resource): if request.args.get('recheck_all'): for uuid in self.datastore.data['watching'].keys(): - self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True})) + self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) return {'status': "OK"}, 200 return list, 200 diff --git a/changedetectionio/blueprint/price_data_follower/__init__.py b/changedetectionio/blueprint/price_data_follower/__init__.py index a41552d8..6011303a 100644 --- a/changedetectionio/blueprint/price_data_follower/__init__.py +++ b/changedetectionio/blueprint/price_data_follower/__init__.py @@ -19,7 +19,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT datastore.data['watching'][uuid]['processor'] = 'restock_diff' datastore.data['watching'][uuid].clear_watch() - update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False})) + update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) return redirect(url_for("index")) @login_required diff --git a/changedetectionio/flask_app.py b/changedetectionio/flask_app.py index 32c2b316..d9a953ee 100644 --- a/changedetectionio/flask_app.py +++ b/changedetectionio/flask_app.py @@ -799,7 +799,7 @@ def changedetection_app(config=None, datastore_o=None): datastore.needs_write_urgent = True # Queue the watch for immediate recheck, with a higher priority - update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False})) + update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) # Diff page [edit] link should go back to diff page if request.args.get("next") and request.args.get("next") == 'diff': @@ -980,7 +980,7 @@ def changedetection_app(config=None, datastore_o=None): importer = import_url_list() importer.run(data=request.values.get('urls'), flash=flash, datastore=datastore, processor=request.values.get('processor', 'text_json_diff')) for uuid in importer.new_uuids: - update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True})) + update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) if len(importer.remaining_data) == 0: return redirect(url_for('index')) @@ -993,7 +993,7 @@ def changedetection_app(config=None, datastore_o=None): d_importer = import_distill_io_json() d_importer.run(data=request.values.get('distill-io'), flash=flash, datastore=datastore) for uuid in d_importer.new_uuids: - update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True})) + update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) # XLSX importer if request.files and request.files.get('xlsx_file'): @@ -1017,7 +1017,7 @@ def changedetection_app(config=None, datastore_o=None): w_importer.run(data=file, flash=flash, datastore=datastore) for uuid in w_importer.new_uuids: - update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True})) + update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) # Could be some remaining, or we could be on GET form = forms.importForm(formdata=request.form if request.method == 'POST' else None) @@ -1414,8 +1414,7 @@ def changedetection_app(config=None, datastore_o=None): update_handler.fetcher.headers['content-type'] = tmp_watch.get('content-type') try: changed_detected, update_obj, text_after_filter = update_handler.run_changedetection( - watch=tmp_watch, - skip_when_checksum_same=False, + watch=tmp_watch ) except FilterNotFoundInResponse as e: text_after_filter = f"Filter not found in HTML: {str(e)}" @@ -1515,7 +1514,7 @@ def changedetection_app(config=None, datastore_o=None): new_uuid = datastore.clone(uuid) if new_uuid: if not datastore.data['watching'].get(uuid).get('paused'): - update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid, 'skip_when_checksum_same': True})) + update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid})) flash('Cloned.') return redirect(url_for('index')) @@ -1536,7 +1535,7 @@ def changedetection_app(config=None, datastore_o=None): if uuid: if uuid not in running_uuids: - update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False})) + update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) i = 1 elif tag: @@ -1547,7 +1546,7 @@ def changedetection_app(config=None, datastore_o=None): continue if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']: update_q.put( - queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid, 'skip_when_checksum_same': False}) + queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}) ) i += 1 @@ -1557,7 +1556,7 @@ def changedetection_app(config=None, datastore_o=None): if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']: if with_errors and not watch.get('last_error'): continue - update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid, 'skip_when_checksum_same': False})) + update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid})) i += 1 flash(f"{i} watches queued for rechecking.") @@ -1616,7 +1615,7 @@ def changedetection_app(config=None, datastore_o=None): uuid = uuid.strip() if datastore.data['watching'].get(uuid): # Recheck and require a full reprocessing - update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False})) + update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) flash("{} watches queued for rechecking".format(len(uuids))) elif (op == 'clear-errors'): @@ -1940,7 +1939,7 @@ def ticker_thread_check_time_launch_checks(): f"{now - watch['last_checked']:0.2f}s since last checked") # Into the queue with you - update_q.put(queuedWatchMetaData.PrioritizedItem(priority=priority, item={'uuid': uuid, 'skip_when_checksum_same': True})) + update_q.put(queuedWatchMetaData.PrioritizedItem(priority=priority, item={'uuid': uuid})) # Reset for next time watch.jitter_seconds = 0 diff --git a/changedetectionio/processors/__init__.py b/changedetectionio/processors/__init__.py index 54ffcea7..72f237d7 100644 --- a/changedetectionio/processors/__init__.py +++ b/changedetectionio/processors/__init__.py @@ -155,7 +155,7 @@ class difference_detection_processor(): # After init, call run_changedetection() which will do the actual change-detection @abstractmethod - def run_changedetection(self, watch, skip_when_checksum_same: bool = True): + def run_changedetection(self, watch): update_obj = {'last_notification_error': False, 'last_error': False} some_data = 'xxxxx' update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest() diff --git a/changedetectionio/processors/restock_diff/processor.py b/changedetectionio/processors/restock_diff/processor.py index 8a87aab2..b4b7901c 100644 --- a/changedetectionio/processors/restock_diff/processor.py +++ b/changedetectionio/processors/restock_diff/processor.py @@ -140,7 +140,7 @@ class perform_site_check(difference_detection_processor): screenshot = None xpath_data = None - def run_changedetection(self, watch, skip_when_checksum_same=True): + def run_changedetection(self, watch): import hashlib if not watch: diff --git a/changedetectionio/processors/text_json_diff/processor.py b/changedetectionio/processors/text_json_diff/processor.py index a35724b5..e62016ca 100644 --- a/changedetectionio/processors/text_json_diff/processor.py +++ b/changedetectionio/processors/text_json_diff/processor.py @@ -35,7 +35,7 @@ class PDFToHTMLToolNotFound(ValueError): # (set_proxy_from_list) class perform_site_check(difference_detection_processor): - def run_changedetection(self, watch, skip_when_checksum_same=True): + def run_changedetection(self, watch): changed_detected = False html_content = "" @@ -59,9 +59,6 @@ class perform_site_check(difference_detection_processor): # Watches added automatically in the queue manager will skip if its the same checksum as the previous run # Saves a lot of CPU update_obj['previous_md5_before_filters'] = hashlib.md5(self.fetcher.content.encode('utf-8')).hexdigest() - if skip_when_checksum_same: - if update_obj['previous_md5_before_filters'] == watch.get('previous_md5_before_filters'): - raise content_fetchers.exceptions.checksumFromPreviousCheckWasTheSame() # Fetching complete, now filters diff --git a/changedetectionio/update_worker.py b/changedetectionio/update_worker.py index 97e1ec27..22be5128 100644 --- a/changedetectionio/update_worker.py +++ b/changedetectionio/update_worker.py @@ -260,9 +260,6 @@ class update_worker(threading.Thread): try: # Processor is what we are using for detecting the "Change" processor = watch.get('processor', 'text_json_diff') - # Abort processing when the content was the same as the last fetch - skip_when_same_checksum = queued_item_data.item.get('skip_when_checksum_same') - # Init a new 'difference_detection_processor', first look in processors processor_module_name = f"changedetectionio.processors.{processor}.processor" @@ -279,8 +276,7 @@ class update_worker(threading.Thread): update_handler.call_browser() changed_detected, update_obj, contents = update_handler.run_changedetection( - watch=watch, - skip_when_checksum_same=skip_when_same_checksum, + watch=watch ) # Re #342