From 854520005de9ee190c83f5c3013ebe9a77ef1772 Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Mon, 21 Jun 2021 17:17:22 +1000 Subject: [PATCH] #81 - Regex support (#90) * Re #81 - Regex support * minor cleanup --- backend/__init__.py | 10 ++++++++ backend/fetch_site_status.py | 24 +++++++++++++++---- backend/forms.py | 25 ++++++++++++++++++-- backend/templates/edit.html | 10 ++++++-- backend/tests/test_ignore_regex_text.py | 31 +++++++++++++++++++++++++ 5 files changed, 92 insertions(+), 8 deletions(-) create mode 100644 backend/tests/test_ignore_regex_text.py diff --git a/backend/__init__.py b/backend/__init__.py index 2909c305..3522b45f 100644 --- a/backend/__init__.py +++ b/backend/__init__.py @@ -369,6 +369,10 @@ def changedetection_app(config=None, datastore_o=None): uuid = list(datastore.data['watching'].keys()).pop() if request.method == 'GET': + if not uuid in datastore.data['watching']: + flash("No watch with the UUID %s found." % (uuid), "error") + return redirect(url_for('index')) + populate_form_from_watch(form, datastore.data['watching'][uuid]) if request.method == 'POST' and form.validate(): @@ -415,6 +419,9 @@ def changedetection_app(config=None, datastore_o=None): return redirect(url_for('index')) else: + if request.method == 'POST' and not form.validate(): + flash("An error occurred, please see below.", "error") + output = render_template("edit.html", uuid=uuid, watch=datastore.data['watching'][uuid], form=form) return output @@ -478,6 +485,9 @@ def changedetection_app(config=None, datastore_o=None): flash("Settings updated.") + if request.method == 'POST' and not form.validate(): + flash("An error occurred, please see below.", "error") + output = render_template("settings.html", form=form) return output diff --git a/backend/fetch_site_status.py b/backend/fetch_site_status.py index a1034769..6856a63f 100644 --- a/backend/fetch_site_status.py +++ b/backend/fetch_site_status.py @@ -13,18 +13,34 @@ class perform_site_check(): self.datastore = datastore def strip_ignore_text(self, content, list_ignore_text): + import re ignore = [] + ignore_regex = [] for k in list_ignore_text: - ignore.append(k.encode('utf8')) + + # Is it a regex? + if k[0] == '/': + ignore_regex.append(k.strip(" /")) + else: + ignore.append(k) output = [] for line in content.splitlines(): - line = line.encode('utf8') # Always ignore blank lines in this mode. (when this function gets called) if len(line.strip()): - if not any(skip_text in line for skip_text in ignore): - output.append(line) + regex_matches = False + + # if any of these match, skip + for regex in ignore_regex: + try: + if re.search(regex, line, re.IGNORECASE): + regex_matches = True + except Exception as e: + continue + + if not regex_matches and not any(skip_text in line for skip_text in ignore): + output.append(line.encode('utf8')) return "\n".encode('utf8').join(output) diff --git a/backend/forms.py b/backend/forms.py index fa41d9fb..f0449200 100644 --- a/backend/forms.py +++ b/backend/forms.py @@ -1,6 +1,7 @@ from wtforms import Form, BooleanField, StringField, PasswordField, validators, IntegerField, fields, TextAreaField, \ Field from wtforms import widgets +from wtforms.validators import ValidationError from wtforms.fields import html5 @@ -47,7 +48,7 @@ class SaltyPasswordField(StringField): if valuelist: # Remove empty strings self.encrypted_password = self.build_password(valuelist[0]) - self.data=[] + self.data = [] else: self.data = [] @@ -80,6 +81,26 @@ class StringDictKeyValue(StringField): else: self.data = {} +class ListRegex(object): + """ + Validates that anything that looks like a regex passes as a regex + """ + def __init__(self, message=None): + self.message = message + + def __call__(self, form, field): + import re + + for line in field.data: + if line[0] == '/' and line[-1] == '/': + # Because internally we dont wrap in / + line = line.strip('/') + try: + re.compile(line) + except re.error: + message = field.gettext('RegEx \'%s\' is not a valid regular expression.') + raise ValidationError(message % (line)) + class watchForm(Form): # https://wtforms.readthedocs.io/en/2.3.x/fields/#module-wtforms.fields.html5 @@ -91,7 +112,7 @@ class watchForm(Form): [validators.Optional(), validators.NumberRange(min=1)]) css_filter = StringField('CSS Filter') - ignore_text = StringListField('Ignore Text') + ignore_text = StringListField('Ignore Text', [ListRegex()]) notification_urls = StringListField('Notification URL List') headers = StringDictKeyValue('Request Headers') trigger_check = BooleanField('Send test notification on save') diff --git a/backend/templates/edit.html b/backend/templates/edit.html index cbc201c7..9c600f9c 100644 --- a/backend/templates/edit.html +++ b/backend/templates/edit.html @@ -22,8 +22,14 @@
- {{ render_field(form.ignore_text, rows=5) }} - Each line will be processed separately as an ignore rule. + {{ render_field(form.ignore_text, rows=5, placeholder="Some text to ignore in a line +/some.regex\d{2}/ for case-INsensitive regex +") }} + + Each line processed separately, any line matching will be ignored.
+ Regular Expression support, wrap the line in forward slash /regex/. +
+
diff --git a/backend/tests/test_ignore_regex_text.py b/backend/tests/test_ignore_regex_text.py new file mode 100644 index 00000000..482434a2 --- /dev/null +++ b/backend/tests/test_ignore_regex_text.py @@ -0,0 +1,31 @@ +#!/usr/bin/python3 + +import time +from flask import url_for +from . util import live_server_setup + +def test_setup(live_server): + live_server_setup(live_server) + +# Unit test of the stripper +# Always we are dealing in utf-8 +def test_strip_regex_text_func(): + from backend import fetch_site_status + + test_content = """ + but sometimes we want to remove the lines. + + but 1 lines + but including 1234 lines + igNORe-cAse text we dont want to keep + but not always.""" + + ignore_lines = ["sometimes", "/\s\d{2,3}\s/", "/ignore-case text/"] + + fetcher = fetch_site_status.perform_site_check(datastore=False) + stripped_content = fetcher.strip_ignore_text(test_content, ignore_lines) + + assert b"but 1 lines" in stripped_content + assert b"igNORe-cAse text" not in stripped_content + assert b"but 1234 lines" not in stripped_content +