#81 - Regex support (#90)

* Re #81 - Regex support * minor cleanup
4 years ago · 854520005d
parent af24079053
commit 854520005d
5 changed files with 92 additions and 8 deletions
--- a/backend/init.py
+++ b/backend/init.py
@ -369,6 +369,10 @@ def changedetection_app(config=None, datastore_o=None):
            uuid = list(datastore.data['watching'].keys()).pop()
        if request.method == 'GET':
            if not uuid in datastore.data['watching']:
                flash("No watch with the UUID %s found." % (uuid), "error")
                return redirect(url_for('index'))
            populate_form_from_watch(form, datastore.data['watching'][uuid])
        if request.method == 'POST' and form.validate():
@ -415,6 +419,9 @@ def changedetection_app(config=None, datastore_o=None):
            return redirect(url_for('index'))
        else:
            if request.method == 'POST' and not form.validate():
                flash("An error occurred, please see below.", "error")
            output = render_template("edit.html", uuid=uuid, watch=datastore.data['watching'][uuid], form=form)
        return output
@ -478,6 +485,9 @@ def changedetection_app(config=None, datastore_o=None):
            flash("Settings updated.")
        if request.method == 'POST' and not form.validate():
            flash("An error occurred, please see below.", "error")
        output = render_template("settings.html", form=form)
        return output
--- a/backend/fetch_site_status.py
+++ b/backend/fetch_site_status.py
@ -13,18 +13,34 @@ class perform_site_check():
        self.datastore = datastore
    def strip_ignore_text(self, content, list_ignore_text):
        import re
        ignore = []
        ignore_regex = []
        for k in list_ignore_text:
-            ignore.append(k.encode('utf8'))
+
            # Is it a regex?
            if k[0] == '/':
                ignore_regex.append(k.strip(" /"))
            else:
                ignore.append(k)
        output = []
        for line in content.splitlines():
            line = line.encode('utf8')
            # Always ignore blank lines in this mode. (when this function gets called)
            if len(line.strip()):
-                if not any(skip_text in line for skip_text in ignore):
+                regex_matches = False
-                    output.append(line)
+
                # if any of these match, skip
                for regex in ignore_regex:
                    try:
                        if re.search(regex, line, re.IGNORECASE):
                            regex_matches = True
                    except Exception as e:
                        continue
                if not regex_matches and not any(skip_text in line for skip_text in ignore):
                    output.append(line.encode('utf8'))
        return "\n".encode('utf8').join(output)
--- a/backend/forms.py
+++ b/backend/forms.py
@ -1,6 +1,7 @@
 from wtforms import Form, BooleanField, StringField, PasswordField, validators, IntegerField, fields, TextAreaField, \
    Field
 from wtforms import widgets
 from wtforms.validators import ValidationError
 from wtforms.fields import html5
@ -47,7 +48,7 @@ class SaltyPasswordField(StringField):
        if valuelist:
            # Remove empty strings
            self.encrypted_password = self.build_password(valuelist[0])
-            self.data=[]
+            self.data = []
        else:
            self.data = []
@ -80,6 +81,26 @@ class StringDictKeyValue(StringField):
        else:
            self.data = {}
 class ListRegex(object):
    """
    Validates that anything that looks like a regex passes as a regex
    """
    def __init__(self, message=None):
        self.message = message
    def __call__(self, form, field):
        import re
        for line in field.data:
            if line[0] == '/' and line[-1] == '/':
                # Because internally we dont wrap in /
                line = line.strip('/')
                try:
                    re.compile(line)
                except re.error:
                    message = field.gettext('RegEx \'%s\' is not a valid regular expression.')
                    raise ValidationError(message % (line))
 class watchForm(Form):
    # https://wtforms.readthedocs.io/en/2.3.x/fields/#module-wtforms.fields.html5
@ -91,7 +112,7 @@ class watchForm(Form):
                                               [validators.Optional(), validators.NumberRange(min=1)])
    css_filter = StringField('CSS Filter')
-    ignore_text = StringListField('Ignore Text')
+    ignore_text = StringListField('Ignore Text', [ListRegex()])
    notification_urls = StringListField('Notification URL List')
    headers = StringDictKeyValue('Request Headers')
    trigger_check = BooleanField('Send test notification on save')
--- a/backend/templates/edit.html
+++ b/backend/templates/edit.html
@ -22,8 +22,14 @@
            </div>
            <!-- @todo: move to tabs --->
            <fieldset class="pure-group">
-                {{ render_field(form.ignore_text, rows=5) }}
+                {{ render_field(form.ignore_text, rows=5,  placeholder="Some text to ignore in a line
-                <span class="pure-form-message-inline">Each line will be processed separately as an ignore rule.</span>
+/some.regex\d{2}/   for case-INsensitive regex
 ") }}
                <span class="pure-form-message-inline">
                    Each line processed separately, any line matching will be ignored.<br/>
                    Regular Expression support, wrap the line in forward slash <b>/regex/</b>.
                </span>
            </fieldset>
            <fieldset class="pure-group">
--- a/backend/tests/test_ignore_regex_text.py
+++ b/backend/tests/test_ignore_regex_text.py
@ -0,0 +1,31 @@
 #!/usr/bin/python3
 import time
 from flask import url_for
 from . util import live_server_setup
 def test_setup(live_server):
    live_server_setup(live_server)
 # Unit test of the stripper
 # Always we are dealing in utf-8
 def test_strip_regex_text_func():
    from backend import fetch_site_status
    test_content = """
    but sometimes we want to remove the lines.
    but 1 lines
    but including 1234 lines
    igNORe-cAse text we dont want to keep    
    but not always."""
    ignore_lines = ["sometimes", "/\s\d{2,3}\s/", "/ignore-case text/"]
    fetcher = fetch_site_status.perform_site_check(datastore=False)
    stripped_content = fetcher.strip_ignore_text(test_content, ignore_lines)
    assert b"but 1 lines" in stripped_content
    assert b"igNORe-cAse text" not in stripped_content
    assert b"but 1234 lines" not in stripped_content