diff --git a/changedetectionio/forms.py b/changedetectionio/forms.py
index 4ad1b1a7..9d29c5c1 100644
--- a/changedetectionio/forms.py
+++ b/changedetectionio/forms.py
@@ -349,6 +349,8 @@ class watchForm(commonSettingsForm):
save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
save_and_preview_button = SubmitField('Save & Preview', render_kw={"class": "pure-button pure-button-primary"})
proxy = RadioField('Proxy')
+ filter_failure_notification_send = BooleanField(
+ 'Send a notification when the filter can no longer be found on the page', default=False)
def validate(self, **kwargs):
if not super().validate():
@@ -387,6 +389,11 @@ class globalSettingsApplicationForm(commonSettingsForm):
api_access_token_enabled = BooleanField('API access token security check enabled', default=True, validators=[validators.Optional()])
password = SaltyPasswordField()
+ filter_failure_notification_threshold_attempts = IntegerField('Number of times the filter can be missing before sending a notification',
+ render_kw={"style": "width: 5em;"},
+ validators=[validators.NumberRange(min=0,
+ message="Should contain zero or more attempts")])
+
class globalSettingsForm(Form):
# Define these as FormFields/"sub forms", this way it matches the JSON storage
diff --git a/changedetectionio/html_tools.py b/changedetectionio/html_tools.py
index cc6a476d..6b73e5d1 100644
--- a/changedetectionio/html_tools.py
+++ b/changedetectionio/html_tools.py
@@ -1,5 +1,4 @@
import json
-import re
from typing import List
from bs4 import BeautifulSoup
@@ -8,16 +7,23 @@ import re
from inscriptis import get_text
from inscriptis.model.config import ParserConfig
+class FilterNotFoundInResponse(ValueError):
+ def __init__(self, msg):
+ ValueError.__init__(self, msg)
class JSONNotFound(ValueError):
def __init__(self, msg):
ValueError.__init__(self, msg)
+
# Given a CSS Rule, and a blob of HTML, return the blob of HTML that matches
def css_filter(css_filter, html_content):
soup = BeautifulSoup(html_content, "html.parser")
html_block = ""
- for item in soup.select(css_filter, separator=""):
+ r = soup.select(css_filter, separator="")
+ if len(r) == 0:
+ raise FilterNotFoundInResponse(css_filter)
+ for item in r:
html_block += str(item)
return html_block + "\n"
@@ -42,8 +48,12 @@ def xpath_filter(xpath_filter, html_content):
tree = html.fromstring(bytes(html_content, encoding='utf-8'))
html_block = ""
- for item in tree.xpath(xpath_filter.strip(), namespaces={'re':'http://exslt.org/regular-expressions'}):
- html_block+= etree.tostring(item, pretty_print=True).decode('utf-8')+"
"
+ r = tree.xpath(xpath_filter.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'})
+ if len(r) == 0:
+ raise FilterNotFoundInResponse(css_filter)
+
+ for item in r:
+ html_block += etree.tostring(item, pretty_print=True).decode('utf-8') + "
"
return html_block
diff --git a/changedetectionio/model/App.py b/changedetectionio/model/App.py
index 21ed1431..6e74d483 100644
--- a/changedetectionio/model/App.py
+++ b/changedetectionio/model/App.py
@@ -5,6 +5,8 @@ from changedetectionio.notification import (
default_notification_title,
)
+_FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT = 6
+
class model(dict):
base_config = {
'note': "Hello! If you change this file manually, please be sure to restart your changedetection.io instance!",
@@ -30,6 +32,7 @@ class model(dict):
'extract_title_as_title': False,
'empty_pages_are_a_change': False,
'fetch_backend': getenv("DEFAULT_FETCH_BACKEND", "html_requests"),
+ 'filter_failure_notification_threshold_attempts': _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT,
'global_ignore_text': [], # List of text to ignore when calculating the comparison checksum
'global_subtractive_selectors': [],
'ignore_whitespace': True,
diff --git a/changedetectionio/model/Watch.py b/changedetectionio/model/Watch.py
index 6c22b390..0e80d41b 100644
--- a/changedetectionio/model/Watch.py
+++ b/changedetectionio/model/Watch.py
@@ -41,6 +41,8 @@ class model(dict):
'trigger_text': [], # List of text or regex to wait for until a change is detected
'text_should_not_be_present': [], # Text that should not present
'fetch_backend': None,
+ 'filter_failure_notification_send': True,
+ 'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
'extract_title_as_title': False,
'check_unique_lines': False, # On change-detected, compare against all history if its something new
'proxy': None, # Preferred proxy connection
diff --git a/changedetectionio/notification.py b/changedetectionio/notification.py
index 8b396275..b0def158 100644
--- a/changedetectionio/notification.py
+++ b/changedetectionio/notification.py
@@ -34,7 +34,6 @@ def process_notification(n_object, datastore):
valid_notification_formats[default_notification_format],
)
-
# Insert variables into the notification content
notification_parameters = create_notification_parameters(n_object, datastore)
diff --git a/changedetectionio/templates/edit.html b/changedetectionio/templates/edit.html
index 5ba754d5..c706b0b2 100644
--- a/changedetectionio/templates/edit.html
+++ b/changedetectionio/templates/edit.html
@@ -62,6 +62,12 @@
{{ render_checkbox_field(form.extract_title_as_title) }}
+
+ {{ render_checkbox_field(form.filter_failure_notification_send) }}
+
+ Sends a notification when the filter can no longer be seen on the page, good for knowing when the page changed and your filter will not work anymore.
+
+
diff --git a/changedetectionio/templates/settings.html b/changedetectionio/templates/settings.html
index 57ba0e57..8b3e2e8d 100644
--- a/changedetectionio/templates/settings.html
+++ b/changedetectionio/templates/settings.html
@@ -36,7 +36,13 @@
{{ render_field(form.requests.form.jitter_seconds, class="jitter_seconds") }}
Example - 3 seconds random jitter could trigger up to 3 seconds earlier or up to 3 seconds later
-
+
+ {{ render_field(form.application.form.filter_failure_notification_threshold_attempts, class="filter_failure_notification_threshold_attempts") }}
+ After this many consecutive times that the CSS/xPath filter is missing, send a notification
+
+ Set to 0 to disable
+
+
{% if not hide_remove_pass %}
{% if current_user.is_authenticated %}
diff --git a/changedetectionio/tests/test_filter_failure_notification.py b/changedetectionio/tests/test_filter_failure_notification.py
new file mode 100644
index 00000000..17dff1f3
--- /dev/null
+++ b/changedetectionio/tests/test_filter_failure_notification.py
@@ -0,0 +1,123 @@
+import os
+import time
+import re
+from flask import url_for
+from .util import set_original_response, live_server_setup
+from changedetectionio.model import App
+
+
+def set_response_with_filter():
+ test_return_data = """
+
+ Some initial text
+
Which is across multiple lines
+
+ So let's see what happens.
+
Some text thats the same
+
+
+ """
+
+ with open("test-datastore/endpoint-content.txt", "w") as f:
+ f.write(test_return_data)
+ return None
+
+
+# Hard to just add more live server URLs when one test is already running (I think)
+# So we add our test here (was in a different file)
+def test_check_notification(client, live_server):
+ live_server_setup(live_server)
+ set_original_response()
+
+ # Give the endpoint time to spin up
+ time.sleep(1)
+
+ # Add our URL to the import page
+ test_url = url_for('test_endpoint', _external=True)
+ res = client.post(
+ url_for("form_watch_add"),
+ data={"url": test_url, "tag": ''},
+ follow_redirects=True
+ )
+ assert b"Watch added" in res.data
+
+ # Give the thread time to pick up the first version
+ time.sleep(3)
+
+ # Goto the edit page, add our ignore text
+ # Add our URL to the import page
+ url = url_for('test_notification_endpoint', _external=True)
+ notification_url = url.replace('http', 'json')
+
+ print(">>>> Notification URL: " + notification_url)
+
+ # Just a regular notification setting, this will be used by the special 'filter not found' notification
+ notification_form_data = {"notification_urls": notification_url,
+ "notification_title": "New ChangeDetection.io Notification - {watch_url}",
+ "notification_body": "BASE URL: {base_url}\n"
+ "Watch URL: {watch_url}\n"
+ "Watch UUID: {watch_uuid}\n"
+ "Watch title: {watch_title}\n"
+ "Watch tag: {watch_tag}\n"
+ "Preview: {preview_url}\n"
+ "Diff URL: {diff_url}\n"
+ "Snapshot: {current_snapshot}\n"
+ "Diff: {diff}\n"
+ "Diff Full: {diff_full}\n"
+ ":-)",
+ "notification_format": "Text"}
+
+ notification_form_data.update({
+ "url": test_url,
+ "tag": "my tag",
+ "title": "my title",
+ "headers": "",
+ "css_filter": '#nope-doesnt-exist',
+ "fetch_backend": "html_requests"})
+
+ res = client.post(
+ url_for("edit_page", uuid="first"),
+ data=notification_form_data,
+ follow_redirects=True
+ )
+ assert b"Updated watch." in res.data
+ time.sleep(3)
+
+ # Now the notification should not exist, because we didnt reach the threshold
+ assert not os.path.isfile("test-datastore/notification.txt")
+
+ for i in range(0, App._FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT):
+ res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
+ time.sleep(3)
+
+ # We should see something in the frontend
+ assert b'Did the page change its layout' in res.data
+
+ # Now it should exist and contain our "filter not found" alert
+ assert os.path.isfile("test-datastore/notification.txt")
+ notification = False
+ with open("test-datastore/notification.txt", 'r') as f:
+ notification = f.read()
+ assert 'CSS/xPath filter was not present in the page' in notification
+ assert '#nope-doesnt-exist' in notification
+
+ # Remove it and prove that it doesnt trigger when not expected
+ os.unlink("test-datastore/notification.txt")
+ set_response_with_filter()
+
+ for i in range(0, App._FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT):
+ client.get(url_for("form_watch_checknow"), follow_redirects=True)
+ time.sleep(3)
+
+ # It should have sent a notification, but..
+ assert os.path.isfile("test-datastore/notification.txt")
+ # but it should not contain the info about the failed filter
+ with open("test-datastore/notification.txt", 'r') as f:
+ notification = f.read()
+ assert not 'CSS/xPath filter was not present in the page' in notification
+
+ # cleanup for the next
+ client.get(
+ url_for("form_delete", uuid="all"),
+ follow_redirects=True
+ )
diff --git a/changedetectionio/update_worker.py b/changedetectionio/update_worker.py
index 7202142d..a9acf502 100644
--- a/changedetectionio/update_worker.py
+++ b/changedetectionio/update_worker.py
@@ -3,6 +3,8 @@ import queue
import time
from changedetectionio import content_fetcher
+from changedetectionio.html_tools import FilterNotFoundInResponse
+
# A single update worker
#
# Requests for checking on a single site(watch) from a queue of watches
@@ -19,6 +21,32 @@ class update_worker(threading.Thread):
self.datastore = datastore
super().__init__(*args, **kwargs)
+ def send_filter_failure_notification(self, uuid):
+
+ threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts')
+ watch = self.datastore.data['watching'].get(uuid, False)
+
+ n_object = {'notification_title': 'Changedetection.io - Alert - CSS/xPath filter was not present in the page',
+ 'notification_body': "Your configured CSS/xPath filter of '{}' for {{watch_url}} did not appear on the page after {} attempts, did the page change layout?\n\nLink: {{base_url}}/edit/{{watch_uuid}}\n\nThanks - Your omniscient changedetection.io installation :)\n".format(
+ watch['css_filter'],
+ threshold),
+ 'notification_format': 'text'}
+
+ if len(watch['notification_urls']):
+ n_object['notification_urls'] = watch['notification_urls']
+
+ elif len(self.datastore.data['settings']['application']['notification_urls']):
+ n_object['notification_urls'] = self.datastore.data['settings']['application']['notification_urls']
+
+ # Only prepare to notify if the rules above matched
+ if 'notification_urls' in n_object:
+ n_object.update({
+ 'watch_url': watch['url'],
+ 'uuid': uuid
+ })
+ self.notification_q.put(n_object)
+ print("Sent filter not found notification for {}".format(uuid))
+
def run(self):
from changedetectionio import fetch_site_status
@@ -55,11 +83,23 @@ class update_worker(threading.Thread):
except content_fetcher.ReplyWithContentButNoText as e:
# Totally fine, it's by choice - just continue on, nothing more to care about
# Page had elements/content but no renderable text
- if self.datastore.data['watching'].get(uuid, False) and self.datastore.data['watching'][uuid].get('css_filter'):
- self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Got HTML content but no text found (CSS / xPath Filter not found in page?)"})
- else:
- self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Got HTML content but no text found."})
- pass
+ self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Got HTML content but no text found."})
+ except FilterNotFoundInResponse as e:
+ err_text = "Filter '{}' not found - Did the page change its layout?".format(str(e))
+ c = 0
+ if self.datastore.data['watching'].get(uuid, False):
+ c = self.datastore.data['watching'][uuid].get('consecutive_filter_failures', 5)
+ c += 1
+
+ # Send notification if we reached the threshold?
+ threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', 0)
+ print("Filter for {} not found, consecutive_filter_failures: {}".format(uuid, c))
+ if threshold >0 and c >= threshold:
+ self.send_filter_failure_notification(uuid)
+ c = 0
+
+ self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
+ 'consecutive_filter_failures': c})
except content_fetcher.EmptyReply as e:
# Some kind of custom to-str handler in the exception handler that does this?
err_text = "EmptyReply - try increasing 'Wait seconds before extracting text', Status Code {}".format(e.status_code)
@@ -89,6 +129,7 @@ class update_worker(threading.Thread):
fname = watch.save_history_text(contents=contents, timestamp=str(round(time.time())))
# Generally update anything interesting returned
+ update_obj['consecutive_filter_failures'] = 0
self.datastore.update_watch(uuid=uuid, update_obj=update_obj)
# A change was detected