Remove same checksum skip check - saved a little CPU but added a lot of complexity (#2700)

3 months ago · 5bb47e47db
parent 03151da68e
commit 5bb47e47db
10 changed files with 21 additions and 31 deletions
--- a/changedetectionio/api/api_v1.py
+++ b/changedetectionio/api/api_v1.py
@ -58,7 +58,7 @@ class Watch(Resource):
            abort(404, message='No watch exists with the UUID of {}'.format(uuid))
        if request.args.get('recheck'):
-            self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
+            self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
            return "OK", 200
        if request.args.get('paused', '') == 'paused':
            self.datastore.data['watching'].get(uuid).pause()
@ -246,7 +246,7 @@ class CreateWatch(Resource):
        new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags)
        if new_uuid:
-            self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid, 'skip_when_checksum_same': True}))
+            self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
            return {'uuid': new_uuid}, 201
        else:
            return "Invalid or unsupported URL", 400
@ -303,7 +303,7 @@ class CreateWatch(Resource):
        if request.args.get('recheck_all'):
            for uuid in self.datastore.data['watching'].keys():
-                self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
+                self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
            return {'status': "OK"}, 200
        return list, 200
--- a/changedetectionio/blueprint/price_data_follower/init.py
+++ b/changedetectionio/blueprint/price_data_follower/init.py
@ -19,7 +19,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue
        datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT
        datastore.data['watching'][uuid]['processor'] = 'restock_diff'
        datastore.data['watching'][uuid].clear_watch()
-        update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
+        update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
        return redirect(url_for("index"))
    @login_required
--- a/changedetectionio/flask_app.py
+++ b/changedetectionio/flask_app.py
@ -795,7 +795,7 @@ def changedetection_app(config=None, datastore_o=None):
            datastore.needs_write_urgent = True
            # Queue the watch for immediate recheck, with a higher priority
-            update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
+            update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
            # Diff page [edit] link should go back to diff page
            if request.args.get("next") and request.args.get("next") == 'diff':
@ -976,7 +976,7 @@ def changedetection_app(config=None, datastore_o=None):
                importer = import_url_list()
                importer.run(data=request.values.get('urls'), flash=flash, datastore=datastore, processor=request.values.get('processor', 'text_json_diff'))
                for uuid in importer.new_uuids:
-                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
+                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
                if len(importer.remaining_data) == 0:
                    return redirect(url_for('index'))
@ -989,7 +989,7 @@ def changedetection_app(config=None, datastore_o=None):
                d_importer = import_distill_io_json()
                d_importer.run(data=request.values.get('distill-io'), flash=flash, datastore=datastore)
                for uuid in d_importer.new_uuids:
-                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
+                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
            # XLSX importer
            if request.files and request.files.get('xlsx_file'):
@ -1013,7 +1013,7 @@ def changedetection_app(config=None, datastore_o=None):
                    w_importer.run(data=file, flash=flash, datastore=datastore)
                for uuid in w_importer.new_uuids:
-                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
+                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
        # Could be some remaining, or we could be on GET
        form = forms.importForm(formdata=request.form if request.method == 'POST' else None)
@ -1442,7 +1442,7 @@ def changedetection_app(config=None, datastore_o=None):
        new_uuid = datastore.clone(uuid)
        if new_uuid:
            if not datastore.data['watching'].get(uuid).get('paused'):
-                update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid, 'skip_when_checksum_same': True}))
+                update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid}))
            flash('Cloned.')
        return redirect(url_for('index'))
@ -1463,7 +1463,7 @@ def changedetection_app(config=None, datastore_o=None):
        if uuid:
            if uuid not in running_uuids:
-                update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
+                update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
            i = 1
        elif tag:
@ -1474,7 +1474,7 @@ def changedetection_app(config=None, datastore_o=None):
                        continue
                    if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']:
                        update_q.put(
-                            queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid, 'skip_when_checksum_same': False})
+                            queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid})
                        )
                        i += 1
@ -1484,7 +1484,7 @@ def changedetection_app(config=None, datastore_o=None):
                if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']:
                    if with_errors and not watch.get('last_error'):
                        continue
-                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid, 'skip_when_checksum_same': False}))
+                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
                    i += 1
        flash(f"{i} watches queued for rechecking.")
        return redirect(url_for('index', tag=tag))
@ -1542,7 +1542,7 @@ def changedetection_app(config=None, datastore_o=None):
                uuid = uuid.strip()
                if datastore.data['watching'].get(uuid):
                    # Recheck and require a full reprocessing
-                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
+                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
            flash("{} watches queued for rechecking".format(len(uuids)))
        elif (op == 'clear-errors'):
@ -1866,7 +1866,7 @@ def ticker_thread_check_time_launch_checks():
                        f"{now - watch['last_checked']:0.2f}s since last checked")
                    # Into the queue with you
-                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=priority, item={'uuid': uuid, 'skip_when_checksum_same': True}))
+                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=priority, item={'uuid': uuid}))
                    # Reset for next time
                    watch.jitter_seconds = 0
--- a/changedetectionio/processors/init.py
+++ b/changedetectionio/processors/init.py
@ -157,7 +157,7 @@ class difference_detection_processor():
        # After init, call run_changedetection() which will do the actual change-detection
    @abstractmethod
-    def run_changedetection(self, watch, skip_when_checksum_same: bool = True):
+    def run_changedetection(self, watch):
        update_obj = {'last_notification_error': False, 'last_error': False}
        some_data = 'xxxxx'
        update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest()
--- a/changedetectionio/processors/restock_diff/processor.py
+++ b/changedetectionio/processors/restock_diff/processor.py
@ -144,7 +144,7 @@ class perform_site_check(difference_detection_processor):
    screenshot = None
    xpath_data = None
-    def run_changedetection(self, watch, skip_when_checksum_same=True):
+    def run_changedetection(self, watch):
        import hashlib
        if not watch:
--- a/changedetectionio/processors/text_json_diff/init.py
+++ b/changedetectionio/processors/text_json_diff/init.py
@ -11,10 +11,7 @@ def _task(watch, update_handler):
    try:
        # The slow process (we run 2 of these in parallel)
-        changed_detected, update_obj, text_after_filter = update_handler.run_changedetection(
+        changed_detected, update_obj, text_after_filter = update_handler.run_changedetection(watch=watch)
            watch=watch,
            skip_when_checksum_same=False,
        )
    except FilterNotFoundInResponse as e:
        text_after_filter = f"Filter not found in HTML: {str(e)}"
    except ReplyWithContentButNoText as e:
--- a/changedetectionio/processors/text_json_diff/processor.py
+++ b/changedetectionio/processors/text_json_diff/processor.py
@ -35,7 +35,7 @@ class PDFToHTMLToolNotFound(ValueError):
 # (set_proxy_from_list)
 class perform_site_check(difference_detection_processor):
-    def run_changedetection(self, watch, skip_when_checksum_same=True):
+    def run_changedetection(self, watch):
        changed_detected = False
        html_content = ""
        screenshot = False  # as bytes
@ -58,9 +58,6 @@ class perform_site_check(difference_detection_processor):
        # Watches added automatically in the queue manager will skip if its the same checksum as the previous run
        # Saves a lot of CPU
        update_obj['previous_md5_before_filters'] = hashlib.md5(self.fetcher.content.encode('utf-8')).hexdigest()
        if skip_when_checksum_same:
            if update_obj['previous_md5_before_filters'] == watch.get('previous_md5_before_filters'):
                raise content_fetchers.exceptions.checksumFromPreviousCheckWasTheSame()
        # Fetching complete, now filters
--- a/changedetectionio/static/styles/scss/styles.scss
+++ b/changedetectionio/static/styles/scss/styles.scss
@ -147,6 +147,7 @@ body.spinner-active {
  }
 }
 .tabs ul li a {
  // .tab-pane-inner will have the #id that the tab button jumps/anchors to
  scroll-margin-top: 200px;
--- a/changedetectionio/static/styles/styles.css
+++ b/changedetectionio/static/styles/styles.css
@ -605,6 +605,7 @@ body.spinner-active #pure-menu-horizontal-spinner {
    background-color: var(--color-background-menu-link-hover);
    color: var(--color-text-menu-link-hover); }
 .tabs ul li a {
  scroll-margin-top: 200px; }
--- a/changedetectionio/update_worker.py
+++ b/changedetectionio/update_worker.py
@ -260,9 +260,6 @@ class update_worker(threading.Thread):
                    try:
                        # Processor is what we are using for detecting the "Change"
                        processor = watch.get('processor', 'text_json_diff')
                        # Abort processing when the content was the same as the last fetch
                        skip_when_same_checksum = queued_item_data.item.get('skip_when_checksum_same')
                        # Init a new 'difference_detection_processor', first look in processors
                        processor_module_name = f"changedetectionio.processors.{processor}.processor"
@ -278,10 +275,7 @@ class update_worker(threading.Thread):
                        update_handler.call_browser()
-                        changed_detected, update_obj, contents = update_handler.run_changedetection(
+                        changed_detected, update_obj, contents = update_handler.run_changedetection(watch=watch)
                            watch=watch,
                            skip_when_checksum_same=skip_when_same_checksum,
                        )
                        # Re #342
                        # In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.