From 616c0b3f65feeb9dbe21e83b06bbf42c0d27613e Mon Sep 17 00:00:00 2001
From: dgtlmoon
Date: Fri, 2 Feb 2024 11:36:58 +0100
Subject: [PATCH] New text filter - Sort text alphabetically filter (#2153)
---
changedetectionio/forms.py | 1 +
changedetectionio/model/Watch.py | 1 +
.../processors/text_json_diff.py | 6 ++
changedetectionio/templates/edit.html | 4 +
changedetectionio/tests/test_unique_lines.py | 83 +++++++++++++++++--
5 files changed, 86 insertions(+), 9 deletions(-)
diff --git a/changedetectionio/forms.py b/changedetectionio/forms.py
index 9f72a748..a4480cc1 100644
--- a/changedetectionio/forms.py
+++ b/changedetectionio/forms.py
@@ -465,6 +465,7 @@ class watchForm(commonSettingsForm):
method = SelectField('Request method', choices=valid_method, default=default_method)
ignore_status_codes = BooleanField('Ignore status codes (process non-2xx status codes as normal)', default=False)
check_unique_lines = BooleanField('Only trigger when unique lines appear', default=False)
+ sort_text_alphabetically = BooleanField('Sort text alphabetically', default=False)
filter_text_added = BooleanField('Added lines', default=True)
filter_text_replaced = BooleanField('Replaced/changed lines', default=True)
diff --git a/changedetectionio/model/Watch.py b/changedetectionio/model/Watch.py
index 2bb7a400..602df5cc 100644
--- a/changedetectionio/model/Watch.py
+++ b/changedetectionio/model/Watch.py
@@ -58,6 +58,7 @@ base_config = {
'previous_md5_before_filters': False, # Used for skipping changedetection entirely
'proxy': None, # Preferred proxy connection
'remote_server_reply': None, # From 'server' reply header
+ 'sort_text_alphabetically': False,
'subtractive_selectors': [],
'tag': '', # Old system of text name for a tag, to be removed
'tags': [], # list of UUIDs to App.Tags
diff --git a/changedetectionio/processors/text_json_diff.py b/changedetectionio/processors/text_json_diff.py
index 619a2856..0f185150 100644
--- a/changedetectionio/processors/text_json_diff.py
+++ b/changedetectionio/processors/text_json_diff.py
@@ -204,6 +204,12 @@ class perform_site_check(difference_detection_processor):
is_rss=is_rss # #1874 activate the something
will add an extra line feed to signify the paragraph gap
+ # we end up with 'Some text\n\n', sorting will add all those extra \n at the start, so we remove them here.
+ stripped_text_from_html = stripped_text_from_html.replace('\n\n', '\n')
+ stripped_text_from_html = '\n'.join( sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower() ))
+
# Re #340 - return the content before the 'ignore text' was applied
text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8')
diff --git a/changedetectionio/templates/edit.html b/changedetectionio/templates/edit.html
index a5acd213..812ddb2b 100644
--- a/changedetectionio/templates/edit.html
+++ b/changedetectionio/templates/edit.html
@@ -339,6 +339,10 @@ nav
When content is merely moved in a list, it will also trigger an addition, consider enabling Only trigger when unique lines appear
+