diff --git a/changedetectionio/forms.py b/changedetectionio/forms.py
index 5011afaf..d3ac30fc 100644
--- a/changedetectionio/forms.py
+++ b/changedetectionio/forms.py
@@ -480,8 +480,10 @@ class processor_text_json_diff_form(commonSettingsForm):
body = TextAreaField('Request body', [validators.Optional()])
method = SelectField('Request method', choices=valid_method, default=default_method)
ignore_status_codes = BooleanField('Ignore status codes (process non-2xx status codes as normal)', default=False)
- check_unique_lines = BooleanField('Only trigger when unique lines appear', default=False)
+ check_unique_lines = BooleanField('Only trigger when unique lines appear in all history', default=False)
+ remove_duplicate_lines = BooleanField('Remove duplicate lines of text', default=False)
sort_text_alphabetically = BooleanField('Sort text alphabetically', default=False)
+ trim_text_whitespace = BooleanField('Trim whitespace before and after text', default=False)
filter_text_added = BooleanField('Added lines', default=True)
filter_text_replaced = BooleanField('Replaced/changed lines', default=True)
diff --git a/changedetectionio/model/__init__.py b/changedetectionio/model/__init__.py
index e439de4f..3b95c91c 100644
--- a/changedetectionio/model/__init__.py
+++ b/changedetectionio/model/__init__.py
@@ -60,6 +60,8 @@ class watch_base(dict):
'time_between_check_use_default': True,
'title': None,
'track_ldjson_price_data': None,
+ 'trim_text_whitespace': False,
+ 'remove_duplicate_lines': False,
'trigger_text': [], # List of text or regex to wait for until a change is detected
'url': '',
'uuid': str(uuid.uuid4()),
diff --git a/changedetectionio/processors/text_json_diff/processor.py b/changedetectionio/processors/text_json_diff/processor.py
index 77c37131..d8be0967 100644
--- a/changedetectionio/processors/text_json_diff/processor.py
+++ b/changedetectionio/processors/text_json_diff/processor.py
@@ -218,11 +218,19 @@ class perform_site_check(difference_detection_processor):
is_rss=is_rss)) #1874 activate the
something will add an extra line feed to signify the paragraph gap
# we end up with 'Some text\n\n', sorting will add all those extra \n at the start, so we remove them here.
- stripped_text_from_html = stripped_text_from_html.replace('\n\n', '\n')
- stripped_text_from_html = '\n'.join( sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower() ))
+ stripped_text_from_html = stripped_text_from_html.replace("\n\n", "\n")
+ stripped_text_from_html = '\n'.join(sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower()))
+
# Re #340 - return the content before the 'ignore text' was applied
text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8')
@@ -304,7 +312,7 @@ class perform_site_check(difference_detection_processor):
for match in res:
regex_matched_output += [match] + [b'\n']
- # Now we will only show what the regex matched
+ ##########################################################
stripped_text_from_html = b''
text_content_before_ignored_filter = b''
if regex_matched_output:
@@ -312,6 +320,8 @@ class perform_site_check(difference_detection_processor):
stripped_text_from_html = b''.join(regex_matched_output)
text_content_before_ignored_filter = stripped_text_from_html
+
+
# Re #133 - if we should strip whitespaces from triggering the change detected comparison
if self.datastore.data['settings']['application'].get('ignore_whitespace', False):
fetched_md5 = hashlib.md5(stripped_text_from_html.translate(None, b'\r\n\t ')).hexdigest()
diff --git a/changedetectionio/templates/edit.html b/changedetectionio/templates/edit.html
index 8ce58755..118aaec6 100644
--- a/changedetectionio/templates/edit.html
+++ b/changedetectionio/templates/edit.html
@@ -331,11 +331,22 @@ nav
So it's always better to select Added+Replaced when you're interested in new content. When content is merely moved in a list, it will also trigger an addition, consider enabling Only trigger when unique lines appear
-
+
+
+