From 51fc81ad3e99f7106118b74cdec31c949be1f8bd Mon Sep 17 00:00:00 2001
From: dgtlmoon <dgtlmoon@gmail.com>
Date: Mon, 7 Oct 2024 17:48:40 +0200
Subject: [PATCH] Always process, dont skip when checksums were the same, saved
 a small amount of CPU but added complexity and issues

---
 changedetectionio/api/api_v1.py               |  6 ++---
 .../blueprint/price_data_follower/__init__.py |  2 +-
 changedetectionio/flask_app.py                | 23 +++++++++----------
 changedetectionio/processors/__init__.py      |  2 +-
 .../processors/restock_diff/processor.py      |  2 +-
 .../processors/text_json_diff/processor.py    |  5 +---
 changedetectionio/update_worker.py            |  6 +----
 7 files changed, 19 insertions(+), 27 deletions(-)

diff --git a/changedetectionio/api/api_v1.py b/changedetectionio/api/api_v1.py
index 9b3eb440..97e58abb 100644
--- a/changedetectionio/api/api_v1.py
+++ b/changedetectionio/api/api_v1.py
@@ -58,7 +58,7 @@ class Watch(Resource):
             abort(404, message='No watch exists with the UUID of {}'.format(uuid))
 
         if request.args.get('recheck'):
-            self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
+            self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
             return "OK", 200
         if request.args.get('paused', '') == 'paused':
             self.datastore.data['watching'].get(uuid).pause()
@@ -246,7 +246,7 @@ class CreateWatch(Resource):
 
         new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags)
         if new_uuid:
-            self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid, 'skip_when_checksum_same': True}))
+            self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
             return {'uuid': new_uuid}, 201
         else:
             return "Invalid or unsupported URL", 400
@@ -303,7 +303,7 @@ class CreateWatch(Resource):
 
         if request.args.get('recheck_all'):
             for uuid in self.datastore.data['watching'].keys():
-                self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
+                self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
             return {'status': "OK"}, 200
 
         return list, 200
diff --git a/changedetectionio/blueprint/price_data_follower/__init__.py b/changedetectionio/blueprint/price_data_follower/__init__.py
index a41552d8..6011303a 100644
--- a/changedetectionio/blueprint/price_data_follower/__init__.py
+++ b/changedetectionio/blueprint/price_data_follower/__init__.py
@@ -19,7 +19,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue
         datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT
         datastore.data['watching'][uuid]['processor'] = 'restock_diff'
         datastore.data['watching'][uuid].clear_watch()
-        update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
+        update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
         return redirect(url_for("index"))
 
     @login_required
diff --git a/changedetectionio/flask_app.py b/changedetectionio/flask_app.py
index 32c2b316..d9a953ee 100644
--- a/changedetectionio/flask_app.py
+++ b/changedetectionio/flask_app.py
@@ -799,7 +799,7 @@ def changedetection_app(config=None, datastore_o=None):
             datastore.needs_write_urgent = True
 
             # Queue the watch for immediate recheck, with a higher priority
-            update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
+            update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
 
             # Diff page [edit] link should go back to diff page
             if request.args.get("next") and request.args.get("next") == 'diff':
@@ -980,7 +980,7 @@ def changedetection_app(config=None, datastore_o=None):
                 importer = import_url_list()
                 importer.run(data=request.values.get('urls'), flash=flash, datastore=datastore, processor=request.values.get('processor', 'text_json_diff'))
                 for uuid in importer.new_uuids:
-                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
+                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
 
                 if len(importer.remaining_data) == 0:
                     return redirect(url_for('index'))
@@ -993,7 +993,7 @@ def changedetection_app(config=None, datastore_o=None):
                 d_importer = import_distill_io_json()
                 d_importer.run(data=request.values.get('distill-io'), flash=flash, datastore=datastore)
                 for uuid in d_importer.new_uuids:
-                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
+                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
 
             # XLSX importer
             if request.files and request.files.get('xlsx_file'):
@@ -1017,7 +1017,7 @@ def changedetection_app(config=None, datastore_o=None):
                     w_importer.run(data=file, flash=flash, datastore=datastore)
 
                 for uuid in w_importer.new_uuids:
-                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
+                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
 
         # Could be some remaining, or we could be on GET
         form = forms.importForm(formdata=request.form if request.method == 'POST' else None)
@@ -1414,8 +1414,7 @@ def changedetection_app(config=None, datastore_o=None):
                 update_handler.fetcher.headers['content-type'] = tmp_watch.get('content-type')
                 try:
                     changed_detected, update_obj, text_after_filter = update_handler.run_changedetection(
-                        watch=tmp_watch,
-                        skip_when_checksum_same=False,
+                        watch=tmp_watch
                     )
                 except FilterNotFoundInResponse as e:
                     text_after_filter = f"Filter not found in HTML: {str(e)}"
@@ -1515,7 +1514,7 @@ def changedetection_app(config=None, datastore_o=None):
         new_uuid = datastore.clone(uuid)
         if new_uuid:
             if not datastore.data['watching'].get(uuid).get('paused'):
-                update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid, 'skip_when_checksum_same': True}))
+                update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid}))
             flash('Cloned.')
 
         return redirect(url_for('index'))
@@ -1536,7 +1535,7 @@ def changedetection_app(config=None, datastore_o=None):
 
         if uuid:
             if uuid not in running_uuids:
-                update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
+                update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
             i = 1
 
         elif tag:
@@ -1547,7 +1546,7 @@ def changedetection_app(config=None, datastore_o=None):
                         continue
                     if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']:
                         update_q.put(
-                            queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid, 'skip_when_checksum_same': False})
+                            queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid})
                         )
                         i += 1
 
@@ -1557,7 +1556,7 @@ def changedetection_app(config=None, datastore_o=None):
                 if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']:
                     if with_errors and not watch.get('last_error'):
                         continue
-                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid, 'skip_when_checksum_same': False}))
+                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
                     i += 1
 
         flash(f"{i} watches queued for rechecking.")
@@ -1616,7 +1615,7 @@ def changedetection_app(config=None, datastore_o=None):
                 uuid = uuid.strip()
                 if datastore.data['watching'].get(uuid):
                     # Recheck and require a full reprocessing
-                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
+                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
             flash("{} watches queued for rechecking".format(len(uuids)))
 
         elif (op == 'clear-errors'):
@@ -1940,7 +1939,7 @@ def ticker_thread_check_time_launch_checks():
                         f"{now - watch['last_checked']:0.2f}s since last checked")
 
                     # Into the queue with you
-                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=priority, item={'uuid': uuid, 'skip_when_checksum_same': True}))
+                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=priority, item={'uuid': uuid}))
 
                     # Reset for next time
                     watch.jitter_seconds = 0
diff --git a/changedetectionio/processors/__init__.py b/changedetectionio/processors/__init__.py
index 54ffcea7..72f237d7 100644
--- a/changedetectionio/processors/__init__.py
+++ b/changedetectionio/processors/__init__.py
@@ -155,7 +155,7 @@ class difference_detection_processor():
         # After init, call run_changedetection() which will do the actual change-detection
 
     @abstractmethod
-    def run_changedetection(self, watch, skip_when_checksum_same: bool = True):
+    def run_changedetection(self, watch):
         update_obj = {'last_notification_error': False, 'last_error': False}
         some_data = 'xxxxx'
         update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest()
diff --git a/changedetectionio/processors/restock_diff/processor.py b/changedetectionio/processors/restock_diff/processor.py
index 8a87aab2..b4b7901c 100644
--- a/changedetectionio/processors/restock_diff/processor.py
+++ b/changedetectionio/processors/restock_diff/processor.py
@@ -140,7 +140,7 @@ class perform_site_check(difference_detection_processor):
     screenshot = None
     xpath_data = None
 
-    def run_changedetection(self, watch, skip_when_checksum_same=True):
+    def run_changedetection(self, watch):
         import hashlib
 
         if not watch:
diff --git a/changedetectionio/processors/text_json_diff/processor.py b/changedetectionio/processors/text_json_diff/processor.py
index a35724b5..e62016ca 100644
--- a/changedetectionio/processors/text_json_diff/processor.py
+++ b/changedetectionio/processors/text_json_diff/processor.py
@@ -35,7 +35,7 @@ class PDFToHTMLToolNotFound(ValueError):
 # (set_proxy_from_list)
 class perform_site_check(difference_detection_processor):
 
-    def run_changedetection(self, watch, skip_when_checksum_same=True):
+    def run_changedetection(self, watch):
 
         changed_detected = False
         html_content = ""
@@ -59,9 +59,6 @@ class perform_site_check(difference_detection_processor):
         # Watches added automatically in the queue manager will skip if its the same checksum as the previous run
         # Saves a lot of CPU
         update_obj['previous_md5_before_filters'] = hashlib.md5(self.fetcher.content.encode('utf-8')).hexdigest()
-        if skip_when_checksum_same:
-            if update_obj['previous_md5_before_filters'] == watch.get('previous_md5_before_filters'):
-                raise content_fetchers.exceptions.checksumFromPreviousCheckWasTheSame()
 
         # Fetching complete, now filters
 
diff --git a/changedetectionio/update_worker.py b/changedetectionio/update_worker.py
index 97e1ec27..22be5128 100644
--- a/changedetectionio/update_worker.py
+++ b/changedetectionio/update_worker.py
@@ -260,9 +260,6 @@ class update_worker(threading.Thread):
                     try:
                         # Processor is what we are using for detecting the "Change"
                         processor = watch.get('processor', 'text_json_diff')
-                        # Abort processing when the content was the same as the last fetch
-                        skip_when_same_checksum = queued_item_data.item.get('skip_when_checksum_same')
-
 
                         # Init a new 'difference_detection_processor', first look in processors
                         processor_module_name = f"changedetectionio.processors.{processor}.processor"
@@ -279,8 +276,7 @@ class update_worker(threading.Thread):
                         update_handler.call_browser()
 
                         changed_detected, update_obj, contents = update_handler.run_changedetection(
-                            watch=watch,
-                            skip_when_checksum_same=skip_when_same_checksum,
+                            watch=watch
                         )
 
                         # Re #342