diff --git a/changedetectionio/forms.py b/changedetectionio/forms.py index abc2fc4f..8a88c166 100644 --- a/changedetectionio/forms.py +++ b/changedetectionio/forms.py @@ -481,6 +481,7 @@ class processor_text_json_diff_form(commonSettingsForm): ignore_status_codes = BooleanField('Ignore status codes (process non-2xx status codes as normal)', default=False) check_unique_lines = BooleanField('Only trigger when unique lines appear in all history', default=False) sort_text_alphabetically = BooleanField('Sort text alphabetically', default=False) + trim_text_whitespace = BooleanField('Trim whitespace before and after text', default=False) filter_text_added = BooleanField('Added lines', default=True) filter_text_replaced = BooleanField('Replaced/changed lines', default=True) diff --git a/changedetectionio/model/__init__.py b/changedetectionio/model/__init__.py index e439de4f..6ecb56f8 100644 --- a/changedetectionio/model/__init__.py +++ b/changedetectionio/model/__init__.py @@ -59,6 +59,7 @@ class watch_base(dict): 'time_between_check': {'weeks': None, 'days': None, 'hours': None, 'minutes': None, 'seconds': None}, 'time_between_check_use_default': True, 'title': None, + 'trim_text_whitespace': False, 'track_ldjson_price_data': None, 'trigger_text': [], # List of text or regex to wait for until a change is detected 'url': '', diff --git a/changedetectionio/processors/text_json_diff/processor.py b/changedetectionio/processors/text_json_diff/processor.py index 115cb6b1..f5a23572 100644 --- a/changedetectionio/processors/text_json_diff/processor.py +++ b/changedetectionio/processors/text_json_diff/processor.py @@ -210,6 +210,9 @@ class perform_site_check(difference_detection_processor): stripped_text_from_html = stripped_text_from_html.replace('\n\n', '\n') stripped_text_from_html = '\n'.join( sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower() )) + if watch.get('trim_text_whitespace') and stripped_text_from_html: + stripped_text_from_html = '\n'.join(line.strip() for line in stripped_text_from_html.splitlines()) + # Re #340 - return the content before the 'ignore text' was applied text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8') diff --git a/changedetectionio/templates/edit.html b/changedetectionio/templates/edit.html index e5f5e642..8ce3b095 100644 --- a/changedetectionio/templates/edit.html +++ b/changedetectionio/templates/edit.html @@ -338,6 +338,11 @@ nav {{ render_checkbox_field(form.sort_text_alphabetically) }} Helps reduce changes detected caused by sites shuffling lines around, combine with check unique lines below. +
+ {{ render_checkbox_field(form.trim_text_whitespace) }} + Remove any whitespace before and after each line of text + +
{{ render_checkbox_field(form.check_unique_lines) }} Good for websites that just move the content around, and you want to know when NEW content is added, compares new lines against all history for this watch.