Added form.trim_text_whitespace

extra-filters
dgtlmoon 4 months ago
parent 09aae40c4a
commit 0506c01c07

@ -481,6 +481,7 @@ class processor_text_json_diff_form(commonSettingsForm):
ignore_status_codes = BooleanField('Ignore status codes (process non-2xx status codes as normal)', default=False)
check_unique_lines = BooleanField('Only trigger when unique lines appear in all history', default=False)
sort_text_alphabetically = BooleanField('Sort text alphabetically', default=False)
trim_text_whitespace = BooleanField('Trim whitespace before and after text', default=False)
filter_text_added = BooleanField('Added lines', default=True)
filter_text_replaced = BooleanField('Replaced/changed lines', default=True)

@ -59,6 +59,7 @@ class watch_base(dict):
'time_between_check': {'weeks': None, 'days': None, 'hours': None, 'minutes': None, 'seconds': None},
'time_between_check_use_default': True,
'title': None,
'trim_text_whitespace': False,
'track_ldjson_price_data': None,
'trigger_text': [], # List of text or regex to wait for until a change is detected
'url': '',

@ -210,6 +210,9 @@ class perform_site_check(difference_detection_processor):
stripped_text_from_html = stripped_text_from_html.replace('\n\n', '\n')
stripped_text_from_html = '\n'.join( sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower() ))
if watch.get('trim_text_whitespace') and stripped_text_from_html:
stripped_text_from_html = '\n'.join(line.strip() for line in stripped_text_from_html.splitlines())
# Re #340 - return the content before the 'ignore text' was applied
text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8')

@ -338,6 +338,11 @@ nav
{{ render_checkbox_field(form.sort_text_alphabetically) }}
<span class="pure-form-message-inline">Helps reduce changes detected caused by sites shuffling lines around, combine with <i>check unique lines</i> below.</span>
</fieldset>
<fieldset class="pure-control-group">
{{ render_checkbox_field(form.trim_text_whitespace) }}
<span class="pure-form-message-inline">Remove any whitespace before and after each line of text</span>
</fieldset>
<fieldset class="pure-control-group">
{{ render_checkbox_field(form.check_unique_lines) }}
<span class="pure-form-message-inline">Good for websites that just move the content around, and you want to know when NEW content is added, compares new lines against all history for this watch.</span>

Loading…
Cancel
Save