From c20d900ffbc8446c6d1882e3da211875bfc08d86 Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Fri, 11 Oct 2024 15:54:52 +0200 Subject: [PATCH] Remove skip_when_checksum_same --- changedetectionio/api/api_v1.py | 6 +++--- .../blueprint/price_data_follower/__init__.py | 2 +- changedetectionio/flask_app.py | 20 +++++++++---------- changedetectionio/processors/__init__.py | 2 +- .../processors/restock_diff/processor.py | 2 +- .../processors/text_json_diff/__init__.py | 5 +---- .../processors/text_json_diff/processor.py | 5 +---- changedetectionio/update_worker.py | 8 +------- 8 files changed, 19 insertions(+), 31 deletions(-) diff --git a/changedetectionio/api/api_v1.py b/changedetectionio/api/api_v1.py index 9b3eb440..97e58abb 100644 --- a/changedetectionio/api/api_v1.py +++ b/changedetectionio/api/api_v1.py @@ -58,7 +58,7 @@ class Watch(Resource): abort(404, message='No watch exists with the UUID of {}'.format(uuid)) if request.args.get('recheck'): - self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True})) + self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) return "OK", 200 if request.args.get('paused', '') == 'paused': self.datastore.data['watching'].get(uuid).pause() @@ -246,7 +246,7 @@ class CreateWatch(Resource): new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags) if new_uuid: - self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid, 'skip_when_checksum_same': True})) + self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid})) return {'uuid': new_uuid}, 201 else: return "Invalid or unsupported URL", 400 @@ -303,7 +303,7 @@ class CreateWatch(Resource): if request.args.get('recheck_all'): for uuid in self.datastore.data['watching'].keys(): - self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True})) + self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) return {'status': "OK"}, 200 return list, 200 diff --git a/changedetectionio/blueprint/price_data_follower/__init__.py b/changedetectionio/blueprint/price_data_follower/__init__.py index a41552d8..6011303a 100644 --- a/changedetectionio/blueprint/price_data_follower/__init__.py +++ b/changedetectionio/blueprint/price_data_follower/__init__.py @@ -19,7 +19,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT datastore.data['watching'][uuid]['processor'] = 'restock_diff' datastore.data['watching'][uuid].clear_watch() - update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False})) + update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) return redirect(url_for("index")) @login_required diff --git a/changedetectionio/flask_app.py b/changedetectionio/flask_app.py index 94cd4c93..705ea20d 100644 --- a/changedetectionio/flask_app.py +++ b/changedetectionio/flask_app.py @@ -795,7 +795,7 @@ def changedetection_app(config=None, datastore_o=None): datastore.needs_write_urgent = True # Queue the watch for immediate recheck, with a higher priority - update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False})) + update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) # Diff page [edit] link should go back to diff page if request.args.get("next") and request.args.get("next") == 'diff': @@ -976,7 +976,7 @@ def changedetection_app(config=None, datastore_o=None): importer = import_url_list() importer.run(data=request.values.get('urls'), flash=flash, datastore=datastore, processor=request.values.get('processor', 'text_json_diff')) for uuid in importer.new_uuids: - update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True})) + update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) if len(importer.remaining_data) == 0: return redirect(url_for('index')) @@ -989,7 +989,7 @@ def changedetection_app(config=None, datastore_o=None): d_importer = import_distill_io_json() d_importer.run(data=request.values.get('distill-io'), flash=flash, datastore=datastore) for uuid in d_importer.new_uuids: - update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True})) + update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) # XLSX importer if request.files and request.files.get('xlsx_file'): @@ -1013,7 +1013,7 @@ def changedetection_app(config=None, datastore_o=None): w_importer.run(data=file, flash=flash, datastore=datastore) for uuid in w_importer.new_uuids: - update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True})) + update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) # Could be some remaining, or we could be on GET form = forms.importForm(formdata=request.form if request.method == 'POST' else None) @@ -1442,7 +1442,7 @@ def changedetection_app(config=None, datastore_o=None): new_uuid = datastore.clone(uuid) if new_uuid: if not datastore.data['watching'].get(uuid).get('paused'): - update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid, 'skip_when_checksum_same': True})) + update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid})) flash('Cloned.') return redirect(url_for('index')) @@ -1463,7 +1463,7 @@ def changedetection_app(config=None, datastore_o=None): if uuid: if uuid not in running_uuids: - update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False})) + update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) i = 1 elif tag: @@ -1474,7 +1474,7 @@ def changedetection_app(config=None, datastore_o=None): continue if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']: update_q.put( - queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid, 'skip_when_checksum_same': False}) + queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}) ) i += 1 @@ -1484,7 +1484,7 @@ def changedetection_app(config=None, datastore_o=None): if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']: if with_errors and not watch.get('last_error'): continue - update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid, 'skip_when_checksum_same': False})) + update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid})) i += 1 flash(f"{i} watches queued for rechecking.") return redirect(url_for('index', tag=tag)) @@ -1542,7 +1542,7 @@ def changedetection_app(config=None, datastore_o=None): uuid = uuid.strip() if datastore.data['watching'].get(uuid): # Recheck and require a full reprocessing - update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False})) + update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) flash("{} watches queued for rechecking".format(len(uuids))) elif (op == 'clear-errors'): @@ -1866,7 +1866,7 @@ def ticker_thread_check_time_launch_checks(): f"{now - watch['last_checked']:0.2f}s since last checked") # Into the queue with you - update_q.put(queuedWatchMetaData.PrioritizedItem(priority=priority, item={'uuid': uuid, 'skip_when_checksum_same': True})) + update_q.put(queuedWatchMetaData.PrioritizedItem(priority=priority, item={'uuid': uuid})) # Reset for next time watch.jitter_seconds = 0 diff --git a/changedetectionio/processors/__init__.py b/changedetectionio/processors/__init__.py index c243f07d..e0682a25 100644 --- a/changedetectionio/processors/__init__.py +++ b/changedetectionio/processors/__init__.py @@ -157,7 +157,7 @@ class difference_detection_processor(): # After init, call run_changedetection() which will do the actual change-detection @abstractmethod - def run_changedetection(self, watch, skip_when_checksum_same: bool = True): + def run_changedetection(self, watch): update_obj = {'last_notification_error': False, 'last_error': False} some_data = 'xxxxx' update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest() diff --git a/changedetectionio/processors/restock_diff/processor.py b/changedetectionio/processors/restock_diff/processor.py index ce55f5d2..f3ee2be9 100644 --- a/changedetectionio/processors/restock_diff/processor.py +++ b/changedetectionio/processors/restock_diff/processor.py @@ -144,7 +144,7 @@ class perform_site_check(difference_detection_processor): screenshot = None xpath_data = None - def run_changedetection(self, watch, skip_when_checksum_same=True): + def run_changedetection(self, watch): import hashlib if not watch: diff --git a/changedetectionio/processors/text_json_diff/__init__.py b/changedetectionio/processors/text_json_diff/__init__.py index 6a5efad9..a6e018fd 100644 --- a/changedetectionio/processors/text_json_diff/__init__.py +++ b/changedetectionio/processors/text_json_diff/__init__.py @@ -11,10 +11,7 @@ def _task(watch, update_handler): try: # The slow process (we run 2 of these in parallel) - changed_detected, update_obj, text_after_filter = update_handler.run_changedetection( - watch=watch, - skip_when_checksum_same=False, - ) + changed_detected, update_obj, text_after_filter = update_handler.run_changedetection(watch=watch) except FilterNotFoundInResponse as e: text_after_filter = f"Filter not found in HTML: {str(e)}" except ReplyWithContentButNoText as e: diff --git a/changedetectionio/processors/text_json_diff/processor.py b/changedetectionio/processors/text_json_diff/processor.py index c3752956..42c2dd59 100644 --- a/changedetectionio/processors/text_json_diff/processor.py +++ b/changedetectionio/processors/text_json_diff/processor.py @@ -35,7 +35,7 @@ class PDFToHTMLToolNotFound(ValueError): # (set_proxy_from_list) class perform_site_check(difference_detection_processor): - def run_changedetection(self, watch, skip_when_checksum_same=True): + def run_changedetection(self, watch): changed_detected = False html_content = "" screenshot = False # as bytes @@ -58,9 +58,6 @@ class perform_site_check(difference_detection_processor): # Watches added automatically in the queue manager will skip if its the same checksum as the previous run # Saves a lot of CPU update_obj['previous_md5_before_filters'] = hashlib.md5(self.fetcher.content.encode('utf-8')).hexdigest() - if skip_when_checksum_same: - if update_obj['previous_md5_before_filters'] == watch.get('previous_md5_before_filters'): - raise content_fetchers.exceptions.checksumFromPreviousCheckWasTheSame() # Fetching complete, now filters diff --git a/changedetectionio/update_worker.py b/changedetectionio/update_worker.py index ebb3ada7..9dd460d3 100644 --- a/changedetectionio/update_worker.py +++ b/changedetectionio/update_worker.py @@ -260,9 +260,6 @@ class update_worker(threading.Thread): try: # Processor is what we are using for detecting the "Change" processor = watch.get('processor', 'text_json_diff') - # Abort processing when the content was the same as the last fetch - skip_when_same_checksum = queued_item_data.item.get('skip_when_checksum_same') - # Init a new 'difference_detection_processor', first look in processors processor_module_name = f"changedetectionio.processors.{processor}.processor" @@ -278,10 +275,7 @@ class update_worker(threading.Thread): update_handler.call_browser() - changed_detected, update_obj, contents = update_handler.run_changedetection( - watch=watch, - skip_when_checksum_same=skip_when_same_checksum, - ) + changed_detected, update_obj, contents = update_handler.run_changedetection(watch=watch) # Re #342 # In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.