Always process, dont skip when checksums were the same, saved a small amount of CPU but added complexity and issues

skip_when_checksum_same-disable
dgtlmoon 3 months ago
parent ec4e2f5649
commit 51fc81ad3e

@ -58,7 +58,7 @@ class Watch(Resource):
abort(404, message='No watch exists with the UUID of {}'.format(uuid)) abort(404, message='No watch exists with the UUID of {}'.format(uuid))
if request.args.get('recheck'): if request.args.get('recheck'):
self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True})) self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
return "OK", 200 return "OK", 200
if request.args.get('paused', '') == 'paused': if request.args.get('paused', '') == 'paused':
self.datastore.data['watching'].get(uuid).pause() self.datastore.data['watching'].get(uuid).pause()
@ -246,7 +246,7 @@ class CreateWatch(Resource):
new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags) new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags)
if new_uuid: if new_uuid:
self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid, 'skip_when_checksum_same': True})) self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
return {'uuid': new_uuid}, 201 return {'uuid': new_uuid}, 201
else: else:
return "Invalid or unsupported URL", 400 return "Invalid or unsupported URL", 400
@ -303,7 +303,7 @@ class CreateWatch(Resource):
if request.args.get('recheck_all'): if request.args.get('recheck_all'):
for uuid in self.datastore.data['watching'].keys(): for uuid in self.datastore.data['watching'].keys():
self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True})) self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
return {'status': "OK"}, 200 return {'status': "OK"}, 200
return list, 200 return list, 200

@ -19,7 +19,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue
datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT
datastore.data['watching'][uuid]['processor'] = 'restock_diff' datastore.data['watching'][uuid]['processor'] = 'restock_diff'
datastore.data['watching'][uuid].clear_watch() datastore.data['watching'][uuid].clear_watch()
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False})) update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
return redirect(url_for("index")) return redirect(url_for("index"))
@login_required @login_required

@ -799,7 +799,7 @@ def changedetection_app(config=None, datastore_o=None):
datastore.needs_write_urgent = True datastore.needs_write_urgent = True
# Queue the watch for immediate recheck, with a higher priority # Queue the watch for immediate recheck, with a higher priority
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False})) update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
# Diff page [edit] link should go back to diff page # Diff page [edit] link should go back to diff page
if request.args.get("next") and request.args.get("next") == 'diff': if request.args.get("next") and request.args.get("next") == 'diff':
@ -980,7 +980,7 @@ def changedetection_app(config=None, datastore_o=None):
importer = import_url_list() importer = import_url_list()
importer.run(data=request.values.get('urls'), flash=flash, datastore=datastore, processor=request.values.get('processor', 'text_json_diff')) importer.run(data=request.values.get('urls'), flash=flash, datastore=datastore, processor=request.values.get('processor', 'text_json_diff'))
for uuid in importer.new_uuids: for uuid in importer.new_uuids:
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True})) update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
if len(importer.remaining_data) == 0: if len(importer.remaining_data) == 0:
return redirect(url_for('index')) return redirect(url_for('index'))
@ -993,7 +993,7 @@ def changedetection_app(config=None, datastore_o=None):
d_importer = import_distill_io_json() d_importer = import_distill_io_json()
d_importer.run(data=request.values.get('distill-io'), flash=flash, datastore=datastore) d_importer.run(data=request.values.get('distill-io'), flash=flash, datastore=datastore)
for uuid in d_importer.new_uuids: for uuid in d_importer.new_uuids:
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True})) update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
# XLSX importer # XLSX importer
if request.files and request.files.get('xlsx_file'): if request.files and request.files.get('xlsx_file'):
@ -1017,7 +1017,7 @@ def changedetection_app(config=None, datastore_o=None):
w_importer.run(data=file, flash=flash, datastore=datastore) w_importer.run(data=file, flash=flash, datastore=datastore)
for uuid in w_importer.new_uuids: for uuid in w_importer.new_uuids:
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True})) update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
# Could be some remaining, or we could be on GET # Could be some remaining, or we could be on GET
form = forms.importForm(formdata=request.form if request.method == 'POST' else None) form = forms.importForm(formdata=request.form if request.method == 'POST' else None)
@ -1414,8 +1414,7 @@ def changedetection_app(config=None, datastore_o=None):
update_handler.fetcher.headers['content-type'] = tmp_watch.get('content-type') update_handler.fetcher.headers['content-type'] = tmp_watch.get('content-type')
try: try:
changed_detected, update_obj, text_after_filter = update_handler.run_changedetection( changed_detected, update_obj, text_after_filter = update_handler.run_changedetection(
watch=tmp_watch, watch=tmp_watch
skip_when_checksum_same=False,
) )
except FilterNotFoundInResponse as e: except FilterNotFoundInResponse as e:
text_after_filter = f"Filter not found in HTML: {str(e)}" text_after_filter = f"Filter not found in HTML: {str(e)}"
@ -1515,7 +1514,7 @@ def changedetection_app(config=None, datastore_o=None):
new_uuid = datastore.clone(uuid) new_uuid = datastore.clone(uuid)
if new_uuid: if new_uuid:
if not datastore.data['watching'].get(uuid).get('paused'): if not datastore.data['watching'].get(uuid).get('paused'):
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid, 'skip_when_checksum_same': True})) update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid}))
flash('Cloned.') flash('Cloned.')
return redirect(url_for('index')) return redirect(url_for('index'))
@ -1536,7 +1535,7 @@ def changedetection_app(config=None, datastore_o=None):
if uuid: if uuid:
if uuid not in running_uuids: if uuid not in running_uuids:
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False})) update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
i = 1 i = 1
elif tag: elif tag:
@ -1547,7 +1546,7 @@ def changedetection_app(config=None, datastore_o=None):
continue continue
if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']: if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']:
update_q.put( update_q.put(
queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid, 'skip_when_checksum_same': False}) queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid})
) )
i += 1 i += 1
@ -1557,7 +1556,7 @@ def changedetection_app(config=None, datastore_o=None):
if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']: if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']:
if with_errors and not watch.get('last_error'): if with_errors and not watch.get('last_error'):
continue continue
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid, 'skip_when_checksum_same': False})) update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
i += 1 i += 1
flash(f"{i} watches queued for rechecking.") flash(f"{i} watches queued for rechecking.")
@ -1616,7 +1615,7 @@ def changedetection_app(config=None, datastore_o=None):
uuid = uuid.strip() uuid = uuid.strip()
if datastore.data['watching'].get(uuid): if datastore.data['watching'].get(uuid):
# Recheck and require a full reprocessing # Recheck and require a full reprocessing
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False})) update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
flash("{} watches queued for rechecking".format(len(uuids))) flash("{} watches queued for rechecking".format(len(uuids)))
elif (op == 'clear-errors'): elif (op == 'clear-errors'):
@ -1940,7 +1939,7 @@ def ticker_thread_check_time_launch_checks():
f"{now - watch['last_checked']:0.2f}s since last checked") f"{now - watch['last_checked']:0.2f}s since last checked")
# Into the queue with you # Into the queue with you
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=priority, item={'uuid': uuid, 'skip_when_checksum_same': True})) update_q.put(queuedWatchMetaData.PrioritizedItem(priority=priority, item={'uuid': uuid}))
# Reset for next time # Reset for next time
watch.jitter_seconds = 0 watch.jitter_seconds = 0

@ -155,7 +155,7 @@ class difference_detection_processor():
# After init, call run_changedetection() which will do the actual change-detection # After init, call run_changedetection() which will do the actual change-detection
@abstractmethod @abstractmethod
def run_changedetection(self, watch, skip_when_checksum_same: bool = True): def run_changedetection(self, watch):
update_obj = {'last_notification_error': False, 'last_error': False} update_obj = {'last_notification_error': False, 'last_error': False}
some_data = 'xxxxx' some_data = 'xxxxx'
update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest() update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest()

@ -140,7 +140,7 @@ class perform_site_check(difference_detection_processor):
screenshot = None screenshot = None
xpath_data = None xpath_data = None
def run_changedetection(self, watch, skip_when_checksum_same=True): def run_changedetection(self, watch):
import hashlib import hashlib
if not watch: if not watch:

@ -35,7 +35,7 @@ class PDFToHTMLToolNotFound(ValueError):
# (set_proxy_from_list) # (set_proxy_from_list)
class perform_site_check(difference_detection_processor): class perform_site_check(difference_detection_processor):
def run_changedetection(self, watch, skip_when_checksum_same=True): def run_changedetection(self, watch):
changed_detected = False changed_detected = False
html_content = "" html_content = ""
@ -59,9 +59,6 @@ class perform_site_check(difference_detection_processor):
# Watches added automatically in the queue manager will skip if its the same checksum as the previous run # Watches added automatically in the queue manager will skip if its the same checksum as the previous run
# Saves a lot of CPU # Saves a lot of CPU
update_obj['previous_md5_before_filters'] = hashlib.md5(self.fetcher.content.encode('utf-8')).hexdigest() update_obj['previous_md5_before_filters'] = hashlib.md5(self.fetcher.content.encode('utf-8')).hexdigest()
if skip_when_checksum_same:
if update_obj['previous_md5_before_filters'] == watch.get('previous_md5_before_filters'):
raise content_fetchers.exceptions.checksumFromPreviousCheckWasTheSame()
# Fetching complete, now filters # Fetching complete, now filters

@ -260,9 +260,6 @@ class update_worker(threading.Thread):
try: try:
# Processor is what we are using for detecting the "Change" # Processor is what we are using for detecting the "Change"
processor = watch.get('processor', 'text_json_diff') processor = watch.get('processor', 'text_json_diff')
# Abort processing when the content was the same as the last fetch
skip_when_same_checksum = queued_item_data.item.get('skip_when_checksum_same')
# Init a new 'difference_detection_processor', first look in processors # Init a new 'difference_detection_processor', first look in processors
processor_module_name = f"changedetectionio.processors.{processor}.processor" processor_module_name = f"changedetectionio.processors.{processor}.processor"
@ -279,8 +276,7 @@ class update_worker(threading.Thread):
update_handler.call_browser() update_handler.call_browser()
changed_detected, update_obj, contents = update_handler.run_changedetection( changed_detected, update_obj, contents = update_handler.run_changedetection(
watch=watch, watch=watch
skip_when_checksum_same=skip_when_same_checksum,
) )
# Re #342 # Re #342

Loading…
Cancel
Save