From 854520005de9ee190c83f5c3013ebe9a77ef1772 Mon Sep 17 00:00:00 2001
From: dgtlmoon <dgtlmoon@gmail.com>
Date: Mon, 21 Jun 2021 17:17:22 +1000
Subject: [PATCH] #81 - Regex support (#90)

* Re #81 - Regex support
* minor cleanup
---
 backend/__init__.py                     | 10 ++++++++
 backend/fetch_site_status.py            | 24 +++++++++++++++----
 backend/forms.py                        | 25 ++++++++++++++++++--
 backend/templates/edit.html             | 10 ++++++--
 backend/tests/test_ignore_regex_text.py | 31 +++++++++++++++++++++++++
 5 files changed, 92 insertions(+), 8 deletions(-)
 create mode 100644 backend/tests/test_ignore_regex_text.py

diff --git a/backend/__init__.py b/backend/__init__.py
index 2909c305..3522b45f 100644
--- a/backend/__init__.py
+++ b/backend/__init__.py
@@ -369,6 +369,10 @@ def changedetection_app(config=None, datastore_o=None):
             uuid = list(datastore.data['watching'].keys()).pop()
 
         if request.method == 'GET':
+            if not uuid in datastore.data['watching']:
+                flash("No watch with the UUID %s found." % (uuid), "error")
+                return redirect(url_for('index'))
+
             populate_form_from_watch(form, datastore.data['watching'][uuid])
 
         if request.method == 'POST' and form.validate():
@@ -415,6 +419,9 @@ def changedetection_app(config=None, datastore_o=None):
             return redirect(url_for('index'))
 
         else:
+            if request.method == 'POST' and not form.validate():
+                flash("An error occurred, please see below.", "error")
+
             output = render_template("edit.html", uuid=uuid, watch=datastore.data['watching'][uuid], form=form)
 
         return output
@@ -478,6 +485,9 @@ def changedetection_app(config=None, datastore_o=None):
 
             flash("Settings updated.")
 
+        if request.method == 'POST' and not form.validate():
+            flash("An error occurred, please see below.", "error")
+
         output = render_template("settings.html", form=form)
         return output
 
diff --git a/backend/fetch_site_status.py b/backend/fetch_site_status.py
index a1034769..6856a63f 100644
--- a/backend/fetch_site_status.py
+++ b/backend/fetch_site_status.py
@@ -13,18 +13,34 @@ class perform_site_check():
         self.datastore = datastore
 
     def strip_ignore_text(self, content, list_ignore_text):
+        import re
         ignore = []
+        ignore_regex = []
         for k in list_ignore_text:
-            ignore.append(k.encode('utf8'))
+
+            # Is it a regex?
+            if k[0] == '/':
+                ignore_regex.append(k.strip(" /"))
+            else:
+                ignore.append(k)
 
         output = []
         for line in content.splitlines():
-            line = line.encode('utf8')
 
             # Always ignore blank lines in this mode. (when this function gets called)
             if len(line.strip()):
-                if not any(skip_text in line for skip_text in ignore):
-                    output.append(line)
+                regex_matches = False
+
+                # if any of these match, skip
+                for regex in ignore_regex:
+                    try:
+                        if re.search(regex, line, re.IGNORECASE):
+                            regex_matches = True
+                    except Exception as e:
+                        continue
+
+                if not regex_matches and not any(skip_text in line for skip_text in ignore):
+                    output.append(line.encode('utf8'))
 
         return "\n".encode('utf8').join(output)
 
diff --git a/backend/forms.py b/backend/forms.py
index fa41d9fb..f0449200 100644
--- a/backend/forms.py
+++ b/backend/forms.py
@@ -1,6 +1,7 @@
 from wtforms import Form, BooleanField, StringField, PasswordField, validators, IntegerField, fields, TextAreaField, \
     Field
 from wtforms import widgets
+from wtforms.validators import ValidationError
 from wtforms.fields import html5
 
 
@@ -47,7 +48,7 @@ class SaltyPasswordField(StringField):
         if valuelist:
             # Remove empty strings
             self.encrypted_password = self.build_password(valuelist[0])
-            self.data=[]
+            self.data = []
         else:
             self.data = []
 
@@ -80,6 +81,26 @@ class StringDictKeyValue(StringField):
         else:
             self.data = {}
 
+class ListRegex(object):
+    """
+    Validates that anything that looks like a regex passes as a regex
+    """
+    def __init__(self, message=None):
+        self.message = message
+
+    def __call__(self, form, field):
+        import re
+
+        for line in field.data:
+            if line[0] == '/' and line[-1] == '/':
+                # Because internally we dont wrap in /
+                line = line.strip('/')
+                try:
+                    re.compile(line)
+                except re.error:
+                    message = field.gettext('RegEx \'%s\' is not a valid regular expression.')
+                    raise ValidationError(message % (line))
+
 
 class watchForm(Form):
     # https://wtforms.readthedocs.io/en/2.3.x/fields/#module-wtforms.fields.html5
@@ -91,7 +112,7 @@ class watchForm(Form):
                                                [validators.Optional(), validators.NumberRange(min=1)])
     css_filter = StringField('CSS Filter')
 
-    ignore_text = StringListField('Ignore Text')
+    ignore_text = StringListField('Ignore Text', [ListRegex()])
     notification_urls = StringListField('Notification URL List')
     headers = StringDictKeyValue('Request Headers')
     trigger_check = BooleanField('Send test notification on save')
diff --git a/backend/templates/edit.html b/backend/templates/edit.html
index cbc201c7..9c600f9c 100644
--- a/backend/templates/edit.html
+++ b/backend/templates/edit.html
@@ -22,8 +22,14 @@
             </div>
             <!-- @todo: move to tabs --->
             <fieldset class="pure-group">
-                {{ render_field(form.ignore_text, rows=5) }}
-                <span class="pure-form-message-inline">Each line will be processed separately as an ignore rule.</span>
+                {{ render_field(form.ignore_text, rows=5,  placeholder="Some text to ignore in a line
+/some.regex\d{2}/   for case-INsensitive regex
+") }}
+                <span class="pure-form-message-inline">
+                    Each line processed separately, any line matching will be ignored.<br/>
+                    Regular Expression support, wrap the line in forward slash <b>/regex/</b>.
+                </span>
+
             </fieldset>
 
             <fieldset class="pure-group">
diff --git a/backend/tests/test_ignore_regex_text.py b/backend/tests/test_ignore_regex_text.py
new file mode 100644
index 00000000..482434a2
--- /dev/null
+++ b/backend/tests/test_ignore_regex_text.py
@@ -0,0 +1,31 @@
+#!/usr/bin/python3
+
+import time
+from flask import url_for
+from . util import live_server_setup
+
+def test_setup(live_server):
+    live_server_setup(live_server)
+
+# Unit test of the stripper
+# Always we are dealing in utf-8
+def test_strip_regex_text_func():
+    from backend import fetch_site_status
+
+    test_content = """
+    but sometimes we want to remove the lines.
+    
+    but 1 lines
+    but including 1234 lines
+    igNORe-cAse text we dont want to keep    
+    but not always."""
+
+    ignore_lines = ["sometimes", "/\s\d{2,3}\s/", "/ignore-case text/"]
+
+    fetcher = fetch_site_status.perform_site_check(datastore=False)
+    stripped_content = fetcher.strip_ignore_text(test_content, ignore_lines)
+
+    assert b"but 1 lines" in stripped_content
+    assert b"igNORe-cAse text" not in stripped_content
+    assert b"but 1234 lines" not in stripped_content
+