From 601a20ea496120b598567674f13386f5717d0889 Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Wed, 15 Jun 2022 19:11:20 +0200 Subject: [PATCH] Trigger filters improvement- it's possible some changes weren't getting detected because the previous checksum only recorded when an event occurred (#697) --- changedetectionio/fetch_site_status.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/changedetectionio/fetch_site_status.py b/changedetectionio/fetch_site_status.py index 18441385..7a4f0a2e 100644 --- a/changedetectionio/fetch_site_status.py +++ b/changedetectionio/fetch_site_status.py @@ -224,11 +224,7 @@ class perform_site_check(): else: fetched_md5 = hashlib.md5(stripped_text_from_html).hexdigest() - # On the first run of a site, watch['previous_md5'] will be None, set it the current one. - if not watch.get('previous_md5'): - watch['previous_md5'] = fetched_md5 - update_obj["previous_md5"] = fetched_md5 - + ############ Blocking rules, after checksum ################# blocked_by_not_found_trigger_text = False if len(watch['trigger_text']): @@ -245,7 +241,7 @@ class perform_site_check(): if not blocked_by_not_found_trigger_text and watch['previous_md5'] != fetched_md5: changed_detected = True - update_obj["previous_md5"] = fetched_md5 + update_obj["last_changed"] = timestamp # Extract title as title @@ -254,4 +250,12 @@ class perform_site_check(): if not watch['title'] or not len(watch['title']): update_obj['title'] = html_tools.extract_element(find='title', html_content=fetcher.content) + # Always record the new checksum + update_obj["previous_md5"] = fetched_md5 + + # On the first run of a site, watch['previous_md5'] will be None, set it the current one. + if not watch.get('previous_md5'): + watch['previous_md5'] = fetched_md5 + + return changed_detected, update_obj, text_content_before_ignored_filter, fetcher.screenshot, fetcher.xpath_data