diff --git a/changedetectionio/__init__.py b/changedetectionio/__init__.py index 24718357..be1fc6be 100644 --- a/changedetectionio/__init__.py +++ b/changedetectionio/__init__.py @@ -405,7 +405,7 @@ def changedetection_app(config=None, datastore_o=None): # Get the most recent one newest_history_key = datastore.get_val(uuid, 'newest_history_key') - # 0 means that theres only one, so that there should be no 'unviewed' history availabe + # 0 means that theres only one, so that there should be no 'unviewed' history available if newest_history_key == 0: newest_history_key = list(datastore.data['watching'][uuid]['history'].keys())[0] @@ -418,7 +418,11 @@ def changedetection_app(config=None, datastore_o=None): stripped_content = handler.strip_ignore_text(raw_content, datastore.data['watching'][uuid]['ignore_text']) - checksum = hashlib.md5(stripped_content).hexdigest() + if datastore.data['settings']['application'].get('ignore_whitespace', False): + checksum = hashlib.md5(stripped_content.translate(None, b'\r\n\t ')).hexdigest() + else: + checksum = hashlib.md5(stripped_content).hexdigest() + return checksum return datastore.data['watching'][uuid]['previous_md5'] @@ -553,6 +557,7 @@ def changedetection_app(config=None, datastore_o=None): form.minutes_between_check.data = int(datastore.data['settings']['requests']['minutes_between_check']) form.notification_urls.data = datastore.data['settings']['application']['notification_urls'] form.global_ignore_text.data = datastore.data['settings']['application']['global_ignore_text'] + form.ignore_whitespace.data = datastore.data['settings']['application']['ignore_whitespace'] form.extract_title_as_title.data = datastore.data['settings']['application']['extract_title_as_title'] form.fetch_backend.data = datastore.data['settings']['application']['fetch_backend'] form.notification_title.data = datastore.data['settings']['application']['notification_title'] @@ -580,7 +585,8 @@ def changedetection_app(config=None, datastore_o=None): datastore.data['settings']['application']['notification_urls'] = form.notification_urls.data datastore.data['settings']['application']['base_url'] = form.base_url.data datastore.data['settings']['application']['global_ignore_text'] = form.global_ignore_text.data - + datastore.data['settings']['application']['ignore_whitespace'] = form.ignore_whitespace.data + if form.trigger_check.data: if len(form.notification_urls.data): n_object = {'watch_url': "Test from changedetection.io!", diff --git a/changedetectionio/fetch_site_status.py b/changedetectionio/fetch_site_status.py index dec73987..0a957114 100644 --- a/changedetectionio/fetch_site_status.py +++ b/changedetectionio/fetch_site_status.py @@ -58,8 +58,7 @@ class perform_site_check(): watch = self.datastore.data['watching'][uuid] - update_obj = {'previous_md5': self.datastore.data['watching'][uuid]['previous_md5'], - 'history': {}, + update_obj = { "last_checked": timestamp } @@ -137,8 +136,16 @@ class perform_site_check(): else: stripped_text_from_html = stripped_text_from_html.encode('utf8') + # Re #133 - if we should strip whitespaces from triggering the change detected comparison + if self.datastore.data['settings']['application'].get('ignore_whitespace', False): + fetched_md5 = hashlib.md5(stripped_text_from_html.translate(None, b'\r\n\t ')).hexdigest() + else: + fetched_md5 = hashlib.md5(stripped_text_from_html).hexdigest() - fetched_md5 = hashlib.md5(stripped_text_from_html).hexdigest() + # On the first run of a site, watch['previous_md5'] will be an empty string, set it the current one. + if not len(watch['previous_md5']): + watch['previous_md5'] = fetched_md5 + update_obj["previous_md5"] = fetched_md5 blocked_by_not_found_trigger_text = False @@ -160,16 +167,12 @@ class perform_site_check(): break - # could be None or False depending on JSON type - # On the first run of a site, watch['previous_md5'] will be an empty string + if not blocked_by_not_found_trigger_text and watch['previous_md5'] != fetched_md5: changed_detected = True - - # Don't confuse people by updating as last-changed, when it actually just changed from None.. - if self.datastore.get_val(uuid, 'previous_md5'): - update_obj["last_changed"] = timestamp - update_obj["previous_md5"] = fetched_md5 + update_obj["last_changed"] = timestamp + # Extract title as title if is_html: diff --git a/changedetectionio/forms.py b/changedetectionio/forms.py index 586a27bb..020d9fa8 100644 --- a/changedetectionio/forms.py +++ b/changedetectionio/forms.py @@ -258,4 +258,5 @@ class globalSettingsForm(commonSettingsForm): [validators.NumberRange(min=1)]) extract_title_as_title = BooleanField('Extract
+ + + Which is across multiple lines
+Which is across multiple lines
+ + So let's see what happens. + + + + """ + + with open("test-datastore/endpoint-content.txt", "w") as f: + f.write(test_return_data) + + + +# If there was only a change in the whitespacing, then we shouldnt have a change detected +def test_check_ignore_whitespace(client, live_server): + sleep_time_for_fetch_thread = 3 + + # Give the endpoint time to spin up + time.sleep(1) + + set_original_ignore_response() + + # Goto the settings page, add our ignore text + res = client.post( + url_for("settings_page"), + data={ + "minutes_between_check": 180, + "ignore_whitespace": "y", + 'fetch_backend': "html_requests" + }, + follow_redirects=True + ) + assert b"Settings updated." in res.data + + # Add our URL to the import page + test_url = url_for('test_endpoint', _external=True) + res = client.post( + url_for("import_page"), + data={"urls": test_url}, + follow_redirects=True + ) + assert b"1 Imported" in res.data + + time.sleep(sleep_time_for_fetch_thread) + # Trigger a check + client.get(url_for("api_watch_checknow"), follow_redirects=True) + + set_original_ignore_response_but_with_whitespace() + time.sleep(sleep_time_for_fetch_thread) + # Trigger a check + client.get(url_for("api_watch_checknow"), follow_redirects=True) + + # Give the thread time to pick it up + time.sleep(sleep_time_for_fetch_thread) + + # It should report nothing found (no new 'unviewed' class) + res = client.get(url_for("index")) + assert b'unviewed' not in res.data + assert b'/test-endpoint' in res.data diff --git a/changedetectionio/update_worker.py b/changedetectionio/update_worker.py index a8992d96..a4181426 100644 --- a/changedetectionio/update_worker.py +++ b/changedetectionio/update_worker.py @@ -64,74 +64,77 @@ class update_worker(threading.Thread): self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)}) else: - if update_obj: - try: - self.datastore.update_watch(uuid=uuid, update_obj=update_obj) - if changed_detected: - n_object = {} - # A change was detected - fname = self.datastore.save_history_text(watch_uuid=uuid, contents=contents) - - # Update history with the stripped text for future reference, this will also mean we save the first - # Should always be keyed by string(timestamp) - self.datastore.update_watch(uuid, {"history": {str(update_obj["last_checked"]): fname}}) - - watch = self.datastore.data['watching'][uuid] - - print (">> Change detected in UUID {} - {}".format(uuid, watch['url'])) - - # Notifications should only trigger on the second time (first time, we gather the initial snapshot) - if len(watch['history']) > 1: - - dates = list(watch['history'].keys()) - # Convert to int, sort and back to str again - # @todo replace datastore getter that does this automatically - dates = [int(i) for i in dates] - dates.sort(reverse=True) - dates = [str(i) for i in dates] - - prev_fname = watch['history'][dates[1]] - - - # Did it have any notification alerts to hit? - if len(watch['notification_urls']): - print(">>> Notifications queued for UUID from watch {}".format(uuid)) - n_object['notification_urls'] = watch['notification_urls'] - n_object['notification_title'] = watch['notification_title'] - n_object['notification_body'] = watch['notification_body'] - n_object['notification_format'] = watch['notification_format'] - - # No? maybe theres a global setting, queue them all - elif len(self.datastore.data['settings']['application']['notification_urls']): - print(">>> Watch notification URLs were empty, using GLOBAL notifications for UUID: {}".format(uuid)) - n_object['notification_urls'] = self.datastore.data['settings']['application']['notification_urls'] - n_object['notification_title'] = self.datastore.data['settings']['application']['notification_title'] - n_object['notification_body'] = self.datastore.data['settings']['application']['notification_body'] - n_object['notification_format'] = self.datastore.data['settings']['application']['notification_format'] + try: + watch = self.datastore.data['watching'][uuid] + + # For the FIRST time we check a site, or a change detected, save the snapshot. + if changed_detected or not watch['last_checked']: + # A change was detected + fname = self.datastore.save_history_text(watch_uuid=uuid, contents=contents) + # Should always be keyed by string(timestamp) + self.datastore.update_watch(uuid, {"history": {str(update_obj["last_checked"]): fname}}) + + # Generally update anything interesting returned + self.datastore.update_watch(uuid=uuid, update_obj=update_obj) + + # A change was detected + if changed_detected: + n_object = {} + print (">> Change detected in UUID {} - {}".format(uuid, watch['url'])) + + # Notifications should only trigger on the second time (first time, we gather the initial snapshot) + if len(watch['history']) > 1: + + dates = list(watch['history'].keys()) + # Convert to int, sort and back to str again + # @todo replace datastore getter that does this automatically + dates = [int(i) for i in dates] + dates.sort(reverse=True) + dates = [str(i) for i in dates] + + prev_fname = watch['history'][dates[1]] + + + # Did it have any notification alerts to hit? + if len(watch['notification_urls']): + print(">>> Notifications queued for UUID from watch {}".format(uuid)) + n_object['notification_urls'] = watch['notification_urls'] + n_object['notification_title'] = watch['notification_title'] + n_object['notification_body'] = watch['notification_body'] + n_object['notification_format'] = watch['notification_format'] + + # No? maybe theres a global setting, queue them all + elif len(self.datastore.data['settings']['application']['notification_urls']): + print(">>> Watch notification URLs were empty, using GLOBAL notifications for UUID: {}".format(uuid)) + n_object['notification_urls'] = self.datastore.data['settings']['application']['notification_urls'] + n_object['notification_title'] = self.datastore.data['settings']['application']['notification_title'] + n_object['notification_body'] = self.datastore.data['settings']['application']['notification_body'] + n_object['notification_format'] = self.datastore.data['settings']['application']['notification_format'] + else: + print(">>> NO notifications queued, watch and global notification URLs were empty.") + + # Only prepare to notify if the rules above matched + if 'notification_urls' in n_object: + # HTML needs linebreak, but MarkDown and Text can use a linefeed + if n_object['notification_format'] == 'HTML': + line_feed_sep = "" else: - print(">>> NO notifications queued, watch and global notification URLs were empty.") - - # Only prepare to notify if the rules above matched - if 'notification_urls' in n_object: - # HTML needs linebreak, but MarkDown and Text can use a linefeed - if n_object['notification_format'] == 'HTML': - line_feed_sep = "" - else: - line_feed_sep = "\n" - - from changedetectionio import diff - n_object.update({ - 'watch_url': watch['url'], - 'uuid': uuid, - 'current_snapshot': contents.decode('utf-8'), - 'diff_full': diff.render_diff(prev_fname, fname, line_feed_sep=line_feed_sep), - 'diff': diff.render_diff(prev_fname, fname, True, line_feed_sep=line_feed_sep) - }) - - self.notification_q.put(n_object) - - except Exception as e: - print("!!!! Exception in update_worker !!!\n", e) + line_feed_sep = "\n" + + from changedetectionio import diff + n_object.update({ + 'watch_url': watch['url'], + 'uuid': uuid, + 'current_snapshot': contents.decode('utf-8'), + 'diff_full': diff.render_diff(prev_fname, fname, line_feed_sep=line_feed_sep), + 'diff': diff.render_diff(prev_fname, fname, True, line_feed_sep=line_feed_sep) + }) + + self.notification_q.put(n_object) + + except Exception as e: + # Catch everything possible here, so that if a worker crashes, we don't lose it until restart! + print("!!!! Exception in update_worker !!!\n", e) self.current_uuid = None # Done self.q.task_done()