Merge branch 'diff-filters' of https://github.com/bwees/changedetection.io into diff-filters

2 years ago · 2a649afd22
parent 526f8fac45 e76f5efee3
commit 2a649afd22
28 changed files with 735 additions and 4257 deletions
--- a/README.md
+++ b/README.md
@ -3,14 +3,16 @@

 ![changedetection.io](https://github.com/dgtlmoon/changedetection.io/actions/workflows/test-only.yml/badge.svg?branch=master)

-## Self-Hosted, Open Source, Change Monitoring of Web Pages
+## Web Site Change Detection, Monitoring and Notification - Self-Hosted or SaaS.

-_Know when web pages change! Stay ontop of new information!_ 
+_Know when web pages change! Stay ontop of new information! get notifications when important website content changes_ 

 Live your data-life *pro-actively* instead of *re-actively*.

 Free, Open-source web page monitoring, notification and change detection. Don't have time? [**Try our $6.99/month subscription - unlimited checks and watches!**](https://lemonade.changedetection.io/start)

+[![Discord](https://img.shields.io/badge/DISCORD-%237289DA.svg?style=for-the-badge&logo=discord&logoColor=white)](https://discord.gg/XJZy7QK3ja) [ ![YouTube](https://img.shields.io/badge/YouTube-%23FF0000.svg?style=for-the-badge&logo=YouTube&logoColor=white)](https://www.youtube.com/channel/UCbS09q1TRf0o4N2t-WA3emQ) [![LinkedIn](https://img.shields.io/badge/linkedin-%230077B5.svg?style=for-the-badge&logo=linkedin&logoColor=white)](https://www.linkedin.com/company/changedetection-io/)
+

 [<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring"  title="Self-hosted web page change monitoring"  />](https://lemonade.changedetection.io/start)

--- a/changedetectionio/.gitignore
+++ b/changedetectionio/.gitignore
@ -1 +1,2 @@
 test-datastore
+package-lock.json
--- a/changedetectionio/init.py
+++ b/changedetectionio/init.py
@ -44,7 +44,7 @@ from flask_wtf import CSRFProtect
 from changedetectionio import html_tools
 from changedetectionio.api import api_v1

-__version__ = '0.39.16'
+__version__ = '0.39.17.1'

 datastore = None

@ -580,6 +580,9 @@ def changedetection_app(config=None, datastore_o=None):
        if request.method == 'POST' and form.validate():
            extra_update_obj = {}

+            if request.args.get('unpause_on_save'):
+                extra_update_obj['paused'] = False
+
            # Re #110, if they submit the same as the default value, set it to None, so we continue to follow the default
            # Assume we use the default value, unless something relevant is different, then use the form value
            # values could be None, 0 etc.
@ -619,6 +622,9 @@ def changedetection_app(config=None, datastore_o=None):
            datastore.data['watching'][uuid].update(form.data)
            datastore.data['watching'][uuid].update(extra_update_obj)

+            if request.args.get('unpause_on_save'):
+                flash("Updated watch - unpaused!.")
+            else:
                flash("Updated watch.")

            # Re #286 - We wait for syncing new data to disk in another thread every 60 seconds
@ -1063,9 +1069,9 @@ def changedetection_app(config=None, datastore_o=None):
        except FileNotFoundError:
            abort(404)

-    @app.route("/api/add", methods=['POST'])
+    @app.route("/form/add/quickwatch", methods=['POST'])
    @login_required
-    def form_watch_add():
+    def form_quick_watch_add():
        from changedetectionio import forms
        form = forms.quickWatchForm(request.form)

@ -1078,13 +1084,19 @@ def changedetection_app(config=None, datastore_o=None):
            flash('The URL {} already exists'.format(url), "error")
            return redirect(url_for('index'))

-        # @todo add_watch should throw a custom Exception for validation etc
-        new_uuid = datastore.add_watch(url=url, tag=request.form.get('tag').strip())
-        if new_uuid:
+        add_paused = request.form.get('edit_and_watch_submit_button') != None
+        new_uuid = datastore.add_watch(url=url, tag=request.form.get('tag').strip(), extras={'paused': add_paused})
+
+
+        if not add_paused and new_uuid:
            # Straight into the queue.
            update_q.put(new_uuid)
            flash("Watch added.")

+        if add_paused:
+            flash('Watch added in Paused state, saving will unpause.')
+            return redirect(url_for('edit_page', uuid=new_uuid, unpause_on_save=1))
+
        return redirect(url_for('index'))


--- a/changedetectionio/content_fetcher.py
+++ b/changedetectionio/content_fetcher.py
@ -63,8 +63,8 @@ class Fetcher():
                      break;
                    }
                    if('' !==r.id) {
-                      chained_css.unshift("#"+r.id);
-                      final_selector= chained_css.join('>');
+                      chained_css.unshift("#"+CSS.escape(r.id));
+                      final_selector= chained_css.join(' > ');
                      // Be sure theres only one, some sites have multiples of the same ID tag :-(
                      if (window.document.querySelectorAll(final_selector).length ==1 ) {
                        return final_selector;
--- a/changedetectionio/fetch_site_status.py
+++ b/changedetectionio/fetch_site_status.py
@ -12,6 +12,7 @@ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)


 # Some common stuff here that can be moved to a base class
+# (set_proxy_from_list)
 class perform_site_check():

    def __init__(self, *args, datastore, **kwargs):
@ -46,6 +47,20 @@ class perform_site_check():

        return proxy_args

+    # Doesn't look like python supports forward slash auto enclosure in re.findall
+    # So convert it to inline flag "foobar(?i)" type configuration
+    def forward_slash_enclosed_regex_to_options(self, regex):
+        res = re.search(r'^/(.*?)/(\w+)$', regex, re.IGNORECASE)
+
+        if res:
+            regex = res.group(1)
+            regex += '(?{})'.format(res.group(2))
+        else:
+            regex += '(?{})'.format('i')
+
+        return regex
+
+
    def run(self, uuid):
        timestamp = int(time.time())  # used for storage etc too

@ -151,7 +166,9 @@ class perform_site_check():
                is_html = False

        if is_html or is_source:
+            
            # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
+            fetcher.content = html_tools.workarounds_for_obfuscations(fetcher.content)
            html_content = fetcher.content

            # If not JSON,  and if it's not text/plain..
@ -214,15 +231,27 @@ class perform_site_check():
        if len(extract_text) > 0:
            regex_matched_output = []
            for s_re in extract_text:
-                result = re.findall(s_re.encode('utf8'), stripped_text_from_html,
-                                    flags=re.MULTILINE | re.DOTALL | re.LOCALE)
-                if result:
-                    regex_matched_output = regex_matched_output + result
+                # incase they specified something in '/.../x'
+                regex = self.forward_slash_enclosed_regex_to_options(s_re)
+                result = re.findall(regex.encode('utf-8'), stripped_text_from_html)
+
+                for l in result:
+                    if type(l) is tuple:
+                        #@todo - some formatter option default (between groups)
+                        regex_matched_output += list(l) + [b'\n']
+                    else:
+                        # @todo - some formatter option default (between each ungrouped result)
+                        regex_matched_output += [l] + [b'\n']

+            # Now we will only show what the regex matched
+            stripped_text_from_html = b''
+            text_content_before_ignored_filter = b''
            if regex_matched_output:
-                stripped_text_from_html = b'\n'.join(regex_matched_output)
+                # @todo some formatter for presentation?
+                stripped_text_from_html = b''.join(regex_matched_output)
                text_content_before_ignored_filter = stripped_text_from_html

+
        # Re #133 - if we should strip whitespaces from triggering the change detected comparison
        if self.datastore.data['settings']['application'].get('ignore_whitespace', False):
            fetched_md5 = hashlib.md5(stripped_text_from_html.translate(None, b'\r\n\t ')).hexdigest()
--- a/changedetectionio/forms.py
+++ b/changedetectionio/forms.py
@ -308,6 +308,9 @@ class ValidateCSSJSONXPATHInput(object):
 class quickWatchForm(Form):
    url = fields.URLField('URL', validators=[validateURL()])
    tag = StringField('Group tag', [validators.Optional()])
+    watch_submit_button = SubmitField('Watch', render_kw={"class": "pure-button pure-button-primary"})
+    edit_and_watch_submit_button = SubmitField('Edit > Watch', render_kw={"class": "pure-button pure-button-primary"})
+

 # Common to a single watch and the global settings
 class commonSettingsForm(Form):
@ -351,6 +354,8 @@ class watchForm(commonSettingsForm):
    save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
    save_and_preview_button = SubmitField('Save & Preview', render_kw={"class": "pure-button pure-button-primary"})
    proxy = RadioField('Proxy')
+    filter_failure_notification_send = BooleanField(
+        'Send a notification when the filter can no longer be found on the page', default=False)

    def validate(self, **kwargs):
        if not super().validate():
@ -389,6 +394,11 @@ class globalSettingsApplicationForm(commonSettingsForm):
    api_access_token_enabled = BooleanField('API access token security check enabled', default=True, validators=[validators.Optional()])
    password = SaltyPasswordField()

+    filter_failure_notification_threshold_attempts = IntegerField('Number of times the filter can be missing before sending a notification',
+                                                                  render_kw={"style": "width: 5em;"},
+                                                                  validators=[validators.NumberRange(min=0,
+                                                                                                     message="Should contain zero or more attempts")])
+

 class globalSettingsForm(Form):
    # Define these as FormFields/"sub forms", this way it matches the JSON storage
--- a/changedetectionio/html_tools.py
+++ b/changedetectionio/html_tools.py
@ -1,5 +1,4 @@
 import json
-import re
 from typing import List

 from bs4 import BeautifulSoup
@ -8,16 +7,23 @@ import re
 from inscriptis import get_text
 from inscriptis.model.config import ParserConfig

+class FilterNotFoundInResponse(ValueError):
+    def __init__(self, msg):
+        ValueError.__init__(self, msg)

 class JSONNotFound(ValueError):
    def __init__(self, msg):
        ValueError.__init__(self, msg)

+
 # Given a CSS Rule, and a blob of HTML, return the blob of HTML that matches
 def css_filter(css_filter, html_content):
    soup = BeautifulSoup(html_content, "html.parser")
    html_block = ""
-    for item in soup.select(css_filter, separator=""):
+    r = soup.select(css_filter, separator="")
+    if len(html_content) > 0 and len(r) == 0:
+        raise FilterNotFoundInResponse(css_filter)
+    for item in r:
        html_block += str(item)

    return html_block + "\n"
@ -42,8 +48,19 @@ def xpath_filter(xpath_filter, html_content):
    tree = html.fromstring(bytes(html_content, encoding='utf-8'))
    html_block = ""

-    for item in tree.xpath(xpath_filter.strip(), namespaces={'re':'http://exslt.org/regular-expressions'}):
-        html_block+= etree.tostring(item, pretty_print=True).decode('utf-8')+"<br/>"
+    r = tree.xpath(xpath_filter.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'})
+    if len(html_content) > 0 and len(r) == 0:
+        raise FilterNotFoundInResponse(xpath_filter)
+
+    #@note: //title/text() wont work where <title>CDATA..
+
+    for element in r:
+        if type(element) == etree._ElementStringResult:
+            html_block += str(element) + "<br/>"
+        elif type(element) == etree._ElementUnicodeResult:
+            html_block += str(element) + "<br/>"
+        else:
+            html_block += etree.tostring(element, pretty_print=True).decode('utf-8') + "<br/>"

    return html_block

@ -202,3 +219,17 @@ def html_to_text(html_content: str, render_anchor_tag_content=False) -> str:

    return text_content

+def workarounds_for_obfuscations(content):
+    """
+    Some sites are using sneaky tactics to make prices and other information un-renderable by Inscriptis
+    This could go into its own Pip package in the future, for faster updates
+    """
+
+    # HomeDepot.com style <span>$<!-- -->90<!-- -->.<!-- -->74</span>
+    # https://github.com/weblyzard/inscriptis/issues/45
+    if not content:
+        return content
+
+    content = re.sub('<!--\s+-->', '', content)
+
+    return content
--- a/changedetectionio/model/App.py
+++ b/changedetectionio/model/App.py
@ -1,30 +1,28 @@
-import collections
-import os
-
-import uuid as uuid_builder
-
+from os import getenv
 from changedetectionio.notification import (
    default_notification_body,
    default_notification_format,
    default_notification_title,
 )

+_FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT = 6
+
 class model(dict):
    base_config = {
            'note': "Hello! If you change this file manually, please be sure to restart your changedetection.io instance!",
            'watching': {},
            'settings': {
                'headers': {
-                    'User-Agent': os.getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT", 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36'),
+                    'User-Agent': getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT", 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36'),
                    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
                    'Accept-Encoding': 'gzip, deflate',  # No support for brolti in python requests yet.
                    'Accept-Language': 'en-GB,en-US;q=0.9,en;'
                },
                'requests': {
-                    'timeout': int(os.getenv("DEFAULT_SETTINGS_REQUESTS_TIMEOUT", "45")),  # Default 45 seconds
+                    'timeout': int(getenv("DEFAULT_SETTINGS_REQUESTS_TIMEOUT", "45")),  # Default 45 seconds
                    'time_between_check': {'weeks': None, 'days': None, 'hours': 3, 'minutes': None, 'seconds': None},
                    'jitter_seconds': 0,
-                    'workers': int(os.getenv("DEFAULT_SETTINGS_REQUESTS_WORKERS", "10")),  # Number of threads, lower is better for slow connections
+                    'workers': int(getenv("DEFAULT_SETTINGS_REQUESTS_WORKERS", "10")),  # Number of threads, lower is better for slow connections
                    'proxy': None # Preferred proxy connection
                },
                'application': {
@ -33,7 +31,8 @@ class model(dict):
                    'base_url' : None,
                    'extract_title_as_title': False,
                    'empty_pages_are_a_change': False,
-                    'fetch_backend': os.getenv("DEFAULT_FETCH_BACKEND", "html_requests"),
+                    'fetch_backend': getenv("DEFAULT_FETCH_BACKEND", "html_requests"),
+                    'filter_failure_notification_threshold_attempts': _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT,
                    'global_ignore_text': [], # List of text to ignore when calculating the comparison checksum
                    'global_subtractive_selectors': [],
                    'ignore_whitespace': True,
--- a/changedetectionio/model/Watch.py
+++ b/changedetectionio/model/Watch.py
@ -240,3 +240,4 @@ class model(dict):
                diff_types["add"] = True

        return diff_types
+
--- a/changedetectionio/notification.py
+++ b/changedetectionio/notification.py
@ -34,7 +34,6 @@ def process_notification(n_object, datastore):
        valid_notification_formats[default_notification_format],
    )

-
    # Insert variables into the notification content
    notification_parameters = create_notification_parameters(n_object, datastore)

@ -64,7 +63,7 @@ def process_notification(n_object, datastore):

                # So if no avatar_url is specified, add one so it can be correctly calculated into the total payload
                k = '?' if not '?' in url else '&'
-                if not 'avatar_url' in url:
+                if not 'avatar_url' in url and not url.startswith('mail'):
                    url += k + 'avatar_url=https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/changedetectionio/static/images/avatar-256x256.png'

                if url.startswith('tgram://'):
@ -79,13 +78,21 @@ def process_notification(n_object, datastore):
                    n_title = n_title[0:payload_max_size]
                    n_body = n_body[0:body_limit]

-                elif url.startswith('discord://'):
+                elif url.startswith('discord://') or url.startswith('https://discordapp.com/api/webhooks') or url.startswith('https://discord.com/api'):
                    # real limit is 2000, but minus some for extra metadata
                    payload_max_size = 1700
                    body_limit = max(0, payload_max_size - len(n_title))
                    n_title = n_title[0:payload_max_size]
                    n_body = n_body[0:body_limit]

+                elif url.startswith('mailto'):
+                    # Apprise will default to HTML, so we need to override it
+                    # So that whats' generated in n_body is in line with what is going to be sent.
+                    # https://github.com/caronc/apprise/issues/633#issuecomment-1191449321
+                    if not 'format=' in url and (n_format == 'text' or n_format == 'markdown'):
+                        prefix = '?' if not '?' in url else '&'
+                        url = "{}{}format={}".format(url, prefix, n_format)
+
                apobj.add(url)

                apobj.notify(
--- a/changedetectionio/static/styles/package-lock.json
+++ b/changedetectionio/static/styles/package-lock.json
--- a/changedetectionio/static/styles/styles.css
+++ b/changedetectionio/static/styles/styles.css
@ -1,9 +1,7 @@
 /*
 * -- BASE STYLES --
 * Most of these are inherited from Base, but I want to change a few.
- * nvm use v14.18.1
- * npm install
- * npm run build
+ * nvm use v14.18.1 && npm install && npm run build
 * or npm run watch
 */
 body {
@ -203,13 +201,18 @@ body:after, body:before {
  border-radius: 10px;
  margin-bottom: 1em; }
  #new-watch-form input {
-    width: auto !important;
-    display: inline-block; }
+    display: inline-block;
+    margin-bottom: 5px; }
  #new-watch-form .label {
    display: none; }
  #new-watch-form legend {
    color: #fff;
    font-weight: bold; }
+  #new-watch-form #watch-add-wrapper-zone > div {
+    display: inline-block; }
+  @media only screen and (max-width: 760px) {
+    #new-watch-form #watch-add-wrapper-zone #url {
+      width: 100%; } }

 #diff-col {
  padding-left: 40px; }
--- a/changedetectionio/static/styles/styles.scss
+++ b/changedetectionio/static/styles/styles.scss
@ -1,9 +1,7 @@
 /*
 * -- BASE STYLES --
 * Most of these are inherited from Base, but I want to change a few.
- * nvm use v14.18.1
- * npm install
- * npm run build
+ * nvm use v14.18.1 && npm install && npm run build
 * or npm run watch
 */
 body {
@ -269,8 +267,8 @@ body:after, body:before {
  border-radius: 10px;
  margin-bottom: 1em;
  input {
-    width: auto !important;
    display: inline-block;
+    margin-bottom: 5px;
  }
  .label {
    display: none;
@ -279,6 +277,17 @@ body:after, body:before {
    color: #fff;
    font-weight: bold;
  }
+
+  #watch-add-wrapper-zone {
+    > div {
+      display: inline-block;
+    }
+    @media only screen and (max-width: 760px) {
+      #url {
+        width: 100%;
+      }
+    }
+  }
 }


--- a/changedetectionio/store.py
+++ b/changedetectionio/store.py
@ -158,8 +158,7 @@ class ChangeDetectionStore:
    @property
    def threshold_seconds(self):
        seconds = 0
-        mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
-        for m, n in mtable.items():
+        for m, n in Watch.mtable.items():
            x = self.__data['settings']['requests']['time_between_check'].get(m)
            if x:
                seconds += x * n
--- a/changedetectionio/templates/edit.html
+++ b/changedetectionio/templates/edit.html
@ -33,7 +33,7 @@

    <div class="box-wrap inner">
        <form class="pure-form pure-form-stacked"
-              action="{{ url_for('edit_page', uuid=uuid, next = request.args.get('next') ) }}" method="POST">
+              action="{{ url_for('edit_page', uuid=uuid, next = request.args.get('next'), unpause_on_save = request.args.get('unpause_on_save')) }}" method="POST">
             <input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/>

            <div class="tab-pane-inner" id="general">
@ -62,6 +62,12 @@
                    <div class="pure-control-group">
                        {{ render_checkbox_field(form.extract_title_as_title) }}
                    </div>
+                    <div class="pure-control-group">
+                        {{ render_checkbox_field(form.filter_failure_notification_send) }}
+                        <span class="pure-form-message-inline">
+                         Sends a notification when the filter can no longer be seen on the page, good for knowing when the page changed and your filter will not work anymore.
+                        </span>
+                    </div>
                </fieldset>
            </div>

@ -167,15 +173,26 @@ User-Agent: wonderbra 1.0") }}
                        </div>
                    </fieldset>
                    <div class="pure-control-group">
-                        {{ render_field(form.css_filter, placeholder=".class-name or #some-id, or other CSS selector rule.",
-                        class="m-d") }}
+                        {% set field = render_field(form.css_filter,
+                            placeholder=".class-name or #some-id, or other CSS selector rule.",
+                            class="m-d")
+                        %}
+                        {{ field }}
+                        {% if '/text()' in  field %}
+                          <span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the &lt;element&gt; contains &lt;![CDATA[]]&gt;</strong></span><br/>
+                        {% endif %}
                        <span class="pure-form-message-inline">
                    <ul>
                        <li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
                        <li>JSON - Limit text to this JSON rule, using <a href="https://pypi.org/project/jsonpath-ng/">JSONPath</a>, prefix with <code>"json:"</code>, use <code>json:$</code> to force re-formatting if required,  <a
                                href="https://jsonpath.com/" target="new">test your JSONPath here</a></li>
-                        <li>XPath - Limit text to this XPath rule, simply start with a forward-slash, example  <code>//*[contains(@class, 'sametext')]</code> or <code>xpath://*[contains(@class, 'sametext')]</code>, <a
+                        <li>XPath - Limit text to this XPath rule, simply start with a forward-slash,
+                            <ul>
+                                <li>Example:  <code>//*[contains(@class, 'sametext')]</code> or <code>xpath://*[contains(@class, 'sametext')]</code>, <a
                                href="http://xpather.com/" target="new">test your XPath here</a></li>
+                                <li>Example: Get all titles from an RSS feed <code>//title/text()</code></li>
+                            </ul>
+                            </li>
                    </ul>
                    Please be sure that you thoroughly understand how to write CSS or JSONPath, XPath selector rules before filing an issue on GitHub! <a
                                href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br/>
@ -200,7 +217,7 @@ nav
                    <span class="pure-form-message-inline">
                        <ul>
                            <li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li>
-                            <li>Regular Expression support, wrap the line in forward slash <code>/regex/</code></li>
+                            <li>Regular Expression support, wrap the entire line in forward slash <code>/regex/</code></li>
                            <li>Changing this will affect the comparison checksum which may trigger an alert</li>
                            <li>Use the preview/show current tab to see ignores</li>
                        </ul>
@ -243,8 +260,15 @@ Unavailable") }}
                        {{ render_field(form.extract_text, rows=5, placeholder="\d+ online") }}
                        <span class="pure-form-message-inline">
                    <ul>
-                        <li>Extracts text in the final output after other filters using regular expressions, for example <code>\d+ online</code></li>
-                        <li>One line per regular-expression.</li>
+                        <li>Extracts text in the final output (line by line) after other filters using regular expressions;
+                            <ul>
+                                <li>Regular expression &dash; example <code>/reports.+?2022/i</code></li>
+                                <li>Use <code>//(?aiLmsux))</code> type flags (more <a href="https://docs.python.org/3/library/re.html#index-15">information here</a>)<br/></li>
+                                <li>Keyword example &dash; example <code>Out of stock</code></li>
+                                <li>Use groups to extract just that text &dash; example <code>/reports.+?(\d+)/i</code> returns a list of years only</li>
+                            </ul>
+                        </li>
+                        <li>One line per regular-expression/ string match</li>
                    </ul>
                        </span>
                    </div>
--- a/changedetectionio/templates/settings.html
+++ b/changedetectionio/templates/settings.html
@ -36,7 +36,13 @@
                        {{ render_field(form.requests.form.jitter_seconds, class="jitter_seconds") }}
                        <span class="pure-form-message-inline">Example - 3 seconds random jitter could trigger up to 3 seconds earlier or up to 3 seconds later</span>
                    </div>
-
+                    <div class="pure-control-group">
+                        {{ render_field(form.application.form.filter_failure_notification_threshold_attempts, class="filter_failure_notification_threshold_attempts") }}
+                        <span class="pure-form-message-inline">After this many consecutive times that the CSS/xPath filter is missing, send a notification
+                            <br/>
+                        Set to <strong>0</strong> to disable
+                        </span>
+                    </div>
                    <div class="pure-control-group">
                        {% if not hide_remove_pass %}
                            {% if current_user.is_authenticated %}
@ -148,7 +154,7 @@ nav
                        <ul>
                            <li>Note: This is applied globally in addition to the per-watch rules.</li>
                            <li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li>
-                            <li>Regular Expression support, wrap the line in forward slash <code>/regex/</code></li>
+                            <li>Regular Expression support, wrap the entire line in forward slash <code>/regex/</code></li>
                            <li>Changing this will affect the comparison checksum which may trigger an alert</li>
                            <li>Use the preview/show current tab to see ignores</li>
                        </ul>
--- a/changedetectionio/templates/watch-overview.html
+++ b/changedetectionio/templates/watch-overview.html
@ -1,18 +1,25 @@
 {% extends 'base.html' %}
 {% block content %}
-{% from '_helpers.jinja' import render_simple_field %}
+{% from '_helpers.jinja' import render_simple_field, render_field %}
 <script type="text/javascript" src="{{url_for('static_content', group='js', filename='jquery-3.6.0.min.js')}}"></script>
 <script type="text/javascript" src="{{url_for('static_content', group='js', filename='watch-overview.js')}}" defer></script>

 <div class="box">

-    <form class="pure-form" action="{{ url_for('form_watch_add') }}" method="POST" id="new-watch-form">
+    <form class="pure-form" action="{{ url_for('form_quick_watch_add') }}" method="POST" id="new-watch-form">
        <input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/>
        <fieldset>
            <legend>Add a new change detection watch</legend>
+            <div id="watch-add-wrapper-zone">
+                <div>
                    {{ render_simple_field(form.url, placeholder="https://...", required=true) }}
                    {{ render_simple_field(form.tag, value=active_tag if active_tag else '', placeholder="watch group") }}
-            <button type="submit" class="pure-button pure-button-primary">Watch</button>
+                </div>
+                <div>
+                    {{ render_simple_field(form.watch_submit_button, title="Watch this URL!" ) }}
+                    {{ render_simple_field(form.edit_and_watch_submit_button, title="Edit first then Watch") }}
+                </div>
+            </div>
        </fieldset>
        <span style="color:#eee; font-size: 80%;"><img style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread-white.svg')}}" /> Tip: You can also add 'shared' watches. <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Sharing-a-Watch">More info</a></a></span>
    </form>
--- a/changedetectionio/tests/test_errorhandling.py
+++ b/changedetectionio/tests/test_errorhandling.py
@ -28,13 +28,9 @@ def test_error_handler(client, live_server):
    )
    assert b"1 Imported" in res.data

-    # Trigger a check
-    client.get(url_for("form_watch_checknow"), follow_redirects=True)
-
    # Give the thread time to pick it up
    time.sleep(3)

-
    res = client.get(url_for("index"))
    assert b'unviewed' not in res.data
    assert b'Status Code 403' in res.data
@ -53,9 +49,6 @@ def test_error_text_handler(client, live_server):
    )
    assert b"1 Imported" in res.data

-    # Trigger a check
-    client.get(url_for("form_watch_checknow"), follow_redirects=True)
-
    # Give the thread time to pick it up
    time.sleep(3)

--- a/changedetectionio/tests/test_extract_regex.py
+++ b/changedetectionio/tests/test_extract_regex.py
@ -15,7 +15,7 @@ def set_original_response():
     </br>
     So let's see what happens.  </br>
     <div id="sametext">Some text thats the same</div>
-     <div id="changetext">Some text that will change</div>
+     <div class="changetext">Some text that will change</div>     
     </body>
     </html>
    """
@ -33,7 +33,8 @@ def set_modified_response():
     </br>
     So let's see what happens.  </br>
     <div id="sametext">Some text thats the same</div>
-     <div id="changetext">Some text that did change ( 1000 online <br/> 80 guests<br/>  2000 online )</div>
+     <div class="changetext">Some text that did change ( 1000 online <br/> 80 guests<br/>  2000 online )</div>
+     <div class="changetext">SomeCase insensitive 3456</div>
     </body>
     </html>
    """
@ -44,11 +45,78 @@ def set_modified_response():
    return None


-def test_check_filter_and_regex_extract(client, live_server):
-    sleep_time_for_fetch_thread = 3
+def set_multiline_response():
+    test_return_data = """<html>
+       <body>
+     
+     <p>Something <br/>
+        across 6 billion multiple<br/>
+        lines
+     </p>
+     
+     <div>aaand something lines</div>
+     </body>
+     </html>
+    """
+
+    with open("test-datastore/endpoint-content.txt", "w") as f:
+        f.write(test_return_data)
+
+    return None
+
+
+def test_setup(client, live_server):

    live_server_setup(live_server)
-    css_filter = "#changetext"
+
+def test_check_filter_multiline(client, live_server):
+
+    set_multiline_response()
+
+    # Add our URL to the import page
+    test_url = url_for('test_endpoint', _external=True)
+    res = client.post(
+        url_for("import_page"),
+        data={"urls": test_url},
+        follow_redirects=True
+    )
+    assert b"1 Imported" in res.data
+
+    time.sleep(3)
+
+    # Goto the edit page, add our ignore text
+    # Add our URL to the import page
+    res = client.post(
+        url_for("edit_page", uuid="first"),
+        data={"css_filter": '',
+              'extract_text': '/something.+?6 billion.+?lines/si',
+              "url": test_url,
+              "tag": "",
+              "headers": "",
+              'fetch_backend': "html_requests"
+              },
+        follow_redirects=True
+    )
+
+    assert b"Updated watch." in res.data
+    time.sleep(3)
+
+    res = client.get(
+        url_for("preview_page", uuid="first"),
+        follow_redirects=True
+    )
+
+
+    assert b'<div class="">Something' in res.data
+    assert b'<div class="">across 6 billion multiple' in res.data
+    assert b'<div class="">lines' in res.data
+
+    # but the last one, which also says 'lines' shouldnt be here (non-greedy match checking)
+    assert b'aaand something lines' not in res.data
+
+def test_check_filter_and_regex_extract(client, live_server):
+    sleep_time_for_fetch_thread = 3
+    css_filter = ".changetext"

    set_original_response()

@ -64,6 +132,7 @@ def test_check_filter_and_regex_extract(client, live_server):
    )
    assert b"1 Imported" in res.data

+    time.sleep(1)
    # Trigger a check
    client.get(url_for("form_watch_checknow"), follow_redirects=True)

@ -75,7 +144,7 @@ def test_check_filter_and_regex_extract(client, live_server):
    res = client.post(
        url_for("edit_page", uuid="first"),
        data={"css_filter": css_filter,
-              'extract_text': '\d+ online\n\d+ guests',
+              'extract_text': '\d+ online\r\n\d+ guests\r\n/somecase insensitive \d+/i\r\n/somecase insensitive (345\d)/i',
              "url": test_url,
              "tag": "",
              "headers": "",
@ -86,15 +155,6 @@ def test_check_filter_and_regex_extract(client, live_server):

    assert b"Updated watch." in res.data

-    # Check it saved
-    res = client.get(
-        url_for("edit_page", uuid="first"),
-    )
-    assert b'\d+ online' in res.data
-
-    # Trigger a check
-#    client.get(url_for("form_watch_checknow"), follow_redirects=True)
-
    # Give the thread time to pick it up
    time.sleep(sleep_time_for_fetch_thread)

@ -126,5 +186,13 @@ def test_check_filter_and_regex_extract(client, live_server):
    # Both regexs should be here
    assert b'<div class="">80 guests' in res.data

+    # Regex with flag handling should be here
+    assert b'<div class="">SomeCase insensitive 3456' in res.data
+
+    # Singular group from /somecase insensitive (345\d)/i
+    assert b'<div class="">3456' in res.data
+
+    # Regex with multiline flag handling should be here
+
    # Should not be here
    assert b'Some text that did change' not in res.data
--- a/changedetectionio/tests/test_filter_exist_changes.py
+++ b/changedetectionio/tests/test_filter_exist_changes.py
@ -0,0 +1,134 @@
+#!/usr/bin/python3
+
+# https://www.reddit.com/r/selfhosted/comments/wa89kp/comment/ii3a4g7/?context=3
+import os
+import time
+from flask import url_for
+from .util import set_original_response, live_server_setup
+from changedetectionio.model import App
+
+
+def set_response_without_filter():
+    test_return_data = """<html>
+       <body>
+     Some initial text</br>
+     <p>Which is across multiple lines</p>
+     </br>
+     So let's see what happens.  </br>
+     <div id="nope-doesnt-exist">Some text thats the same</div>     
+     </body>
+     </html>
+    """
+
+    with open("test-datastore/endpoint-content.txt", "w") as f:
+        f.write(test_return_data)
+    return None
+
+
+def set_response_with_filter():
+    test_return_data = """<html>
+       <body>
+     Some initial text</br>
+     <p>Which is across multiple lines</p>
+     </br>
+     So let's see what happens.  </br>
+     <div class="ticket-available">Ticket now on sale!</div>     
+     </body>
+     </html>
+    """
+
+    with open("test-datastore/endpoint-content.txt", "w") as f:
+        f.write(test_return_data)
+    return None
+
+def test_filter_doesnt_exist_then_exists_should_get_notification(client, live_server):
+#  Filter knowingly doesn't exist, like someone setting up a known filter to see if some cinema tickets are on sale again
+#  And the page has that filter available
+#  Then I should get a notification
+
+    live_server_setup(live_server)
+
+    # Give the endpoint time to spin up
+    time.sleep(1)
+    set_response_without_filter()
+
+    # Add our URL to the import page
+    test_url = url_for('test_endpoint', _external=True)
+    res = client.post(
+        url_for("form_quick_watch_add"),
+        data={"url": test_url, "tag": 'cinema'},
+        follow_redirects=True
+    )
+    assert b"Watch added" in res.data
+
+    # Give the thread time to pick up the first version
+    time.sleep(3)
+
+    # Goto the edit page, add our ignore text
+    # Add our URL to the import page
+    url = url_for('test_notification_endpoint', _external=True)
+    notification_url = url.replace('http', 'json')
+
+    print(">>>> Notification URL: " + notification_url)
+
+    # Just a regular notification setting, this will be used by the special 'filter not found' notification
+    notification_form_data = {"notification_urls": notification_url,
+                              "notification_title": "New ChangeDetection.io Notification - {watch_url}",
+                              "notification_body": "BASE URL: {base_url}\n"
+                                                   "Watch URL: {watch_url}\n"
+                                                   "Watch UUID: {watch_uuid}\n"
+                                                   "Watch title: {watch_title}\n"
+                                                   "Watch tag: {watch_tag}\n"
+                                                   "Preview: {preview_url}\n"
+                                                   "Diff URL: {diff_url}\n"
+                                                   "Snapshot: {current_snapshot}\n"
+                                                   "Diff: {diff}\n"
+                                                   "Diff Full: {diff_full}\n"
+                                                   ":-)",
+                              "notification_format": "Text"}
+
+    notification_form_data.update({
+        "url": test_url,
+        "tag": "my tag",
+        "title": "my title",
+        "headers": "",
+        "css_filter": '.ticket-available',
+        "fetch_backend": "html_requests"})
+
+    res = client.post(
+        url_for("edit_page", uuid="first"),
+        data=notification_form_data,
+        follow_redirects=True
+    )
+    assert b"Updated watch." in res.data
+    time.sleep(3)
+
+    # Shouldn't exist, shouldn't have fired
+    assert not os.path.isfile("test-datastore/notification.txt")
+    # Now the filter should exist
+    set_response_with_filter()
+    client.get(url_for("form_watch_checknow"), follow_redirects=True)
+    time.sleep(3)
+
+    assert os.path.isfile("test-datastore/notification.txt")
+
+    with open("test-datastore/notification.txt", 'r') as f:
+        notification = f.read()
+
+    assert 'Ticket now on sale' in notification
+    os.unlink("test-datastore/notification.txt")
+
+
+    # Test that if it gets removed, then re-added, we get a notification
+    # Remove the target and re-add it, we should get a new notification
+    set_response_without_filter()
+    client.get(url_for("form_watch_checknow"), follow_redirects=True)
+    time.sleep(3)
+    assert not os.path.isfile("test-datastore/notification.txt")
+
+    set_response_with_filter()
+    client.get(url_for("form_watch_checknow"), follow_redirects=True)
+    time.sleep(3)
+    assert os.path.isfile("test-datastore/notification.txt")
+
+# Also test that the filter was updated after the first one was requested
--- a/changedetectionio/tests/test_filter_failure_notification.py
+++ b/changedetectionio/tests/test_filter_failure_notification.py
@ -0,0 +1,144 @@
+import os
+import time
+import re
+from flask import url_for
+from .util import set_original_response, live_server_setup
+from changedetectionio.model import App
+
+
+def set_response_with_filter():
+    test_return_data = """<html>
+       <body>
+     Some initial text</br>
+     <p>Which is across multiple lines</p>
+     </br>
+     So let's see what happens.  </br>
+     <div id="nope-doesnt-exist">Some text thats the same</div>     
+     </body>
+     </html>
+    """
+
+    with open("test-datastore/endpoint-content.txt", "w") as f:
+        f.write(test_return_data)
+    return None
+
+def run_filter_test(client, content_filter):
+
+    # Give the endpoint time to spin up
+    time.sleep(1)
+    # cleanup for the next
+    client.get(
+        url_for("form_delete", uuid="all"),
+        follow_redirects=True
+    )
+    if os.path.isfile("test-datastore/notification.txt"):
+        os.unlink("test-datastore/notification.txt")
+
+    # Add our URL to the import page
+    test_url = url_for('test_endpoint', _external=True)
+    res = client.post(
+        url_for("form_quick_watch_add"),
+        data={"url": test_url, "tag": ''},
+        follow_redirects=True
+    )
+
+    assert b"Watch added" in res.data
+
+    # Give the thread time to pick up the first version
+    time.sleep(3)
+
+    # Goto the edit page, add our ignore text
+    # Add our URL to the import page
+    url = url_for('test_notification_endpoint', _external=True)
+    notification_url = url.replace('http', 'json')
+
+    print(">>>> Notification URL: " + notification_url)
+
+    # Just a regular notification setting, this will be used by the special 'filter not found' notification
+    notification_form_data = {"notification_urls": notification_url,
+                              "notification_title": "New ChangeDetection.io Notification - {watch_url}",
+                              "notification_body": "BASE URL: {base_url}\n"
+                                                   "Watch URL: {watch_url}\n"
+                                                   "Watch UUID: {watch_uuid}\n"
+                                                   "Watch title: {watch_title}\n"
+                                                   "Watch tag: {watch_tag}\n"
+                                                   "Preview: {preview_url}\n"
+                                                   "Diff URL: {diff_url}\n"
+                                                   "Snapshot: {current_snapshot}\n"
+                                                   "Diff: {diff}\n"
+                                                   "Diff Full: {diff_full}\n"
+                                                   ":-)",
+                              "notification_format": "Text"}
+
+    notification_form_data.update({
+        "url": test_url,
+        "tag": "my tag",
+        "title": "my title",
+        "headers": "",
+        "filter_failure_notification_send": 'y',
+        "css_filter": content_filter,
+        "fetch_backend": "html_requests"})
+
+    res = client.post(
+        url_for("edit_page", uuid="first"),
+        data=notification_form_data,
+        follow_redirects=True
+    )
+    assert b"Updated watch." in res.data
+    time.sleep(3)
+
+    # Now the notification should not exist, because we didnt reach the threshold
+    assert not os.path.isfile("test-datastore/notification.txt")
+
+    for i in range(0, App._FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT):
+        res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
+        time.sleep(3)
+
+    # We should see something in the frontend
+    assert b'Warning, filter' in res.data
+
+    # Now it should exist and contain our "filter not found" alert
+    assert os.path.isfile("test-datastore/notification.txt")
+    notification = False
+    with open("test-datastore/notification.txt", 'r') as f:
+        notification = f.read()
+    assert 'CSS/xPath filter was not present in the page' in notification
+    assert content_filter.replace('"', '\\"') in notification
+
+    # Remove it and prove that it doesnt trigger when not expected
+    os.unlink("test-datastore/notification.txt")
+    set_response_with_filter()
+
+    for i in range(0, App._FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT):
+        client.get(url_for("form_watch_checknow"), follow_redirects=True)
+        time.sleep(3)
+
+    # It should have sent a notification, but..
+    assert os.path.isfile("test-datastore/notification.txt")
+    # but it should not contain the info about the failed filter
+    with open("test-datastore/notification.txt", 'r') as f:
+        notification = f.read()
+    assert not 'CSS/xPath filter was not present in the page' in notification
+
+    # cleanup for the next
+    client.get(
+        url_for("form_delete", uuid="all"),
+        follow_redirects=True
+    )
+    os.unlink("test-datastore/notification.txt")
+
+
+def test_setup(live_server):
+    live_server_setup(live_server)
+
+def test_check_css_filter_failure_notification(client, live_server):
+    set_original_response()
+    time.sleep(1)
+    run_filter_test(client, '#nope-doesnt-exist')
+
+def test_check_xpath_filter_failure_notification(client, live_server):
+    set_original_response()
+    time.sleep(1)
+    run_filter_test(client, '//*[@id="nope-doesnt-exist"]')
+
+# Test that notification is never sent
--- a/changedetectionio/tests/test_notification.py
+++ b/changedetectionio/tests/test_notification.py
@ -36,7 +36,7 @@ def test_check_notification(client, live_server):
    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
    res = client.post(
-        url_for("form_watch_add"),
+        url_for("form_quick_watch_add"),
        data={"url": test_url, "tag": ''},
        follow_redirects=True
    )
@ -172,7 +172,7 @@ def test_notification_validation(client, live_server):
    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
    res = client.post(
-        url_for("form_watch_add"),
+        url_for("form_quick_watch_add"),
        data={"url": test_url, "tag": 'nice one'},
        follow_redirects=True
    )
--- a/changedetectionio/tests/test_notification_errors.py
+++ b/changedetectionio/tests/test_notification_errors.py
@ -16,7 +16,7 @@ def test_check_notification_error_handling(client, live_server):
    # use a different URL so that it doesnt interfere with the actual check until we are ready
    test_url = url_for('test_endpoint', _external=True)
    res = client.post(
-        url_for("form_watch_add"),
+        url_for("form_quick_watch_add"),
        data={"url": "https://changedetection.io/CHANGELOG.txt", "tag": ''},
        follow_redirects=True
    )
--- a/changedetectionio/tests/test_obfuscations.py
+++ b/changedetectionio/tests/test_obfuscations.py
@ -0,0 +1,43 @@
+#!/usr/bin/python3
+
+import time
+from flask import url_for
+from .util import live_server_setup
+
+
+def set_original_ignore_response():
+    test_return_data = """<html>
+       <body>
+     <span>The price is</span><span>$<!-- -->90<!-- -->.<!-- -->74</span>
+     </body>
+     </html>
+
+    """
+
+    with open("test-datastore/endpoint-content.txt", "w") as f:
+        f.write(test_return_data)
+
+
+def test_obfuscations(client, live_server):
+    set_original_ignore_response()
+    live_server_setup(live_server)
+    time.sleep(1)
+    # Add our URL to the import page
+    test_url = url_for('test_endpoint', _external=True)
+    res = client.post(
+        url_for("import_page"),
+        data={"urls": test_url},
+        follow_redirects=True
+    )
+    assert b"1 Imported" in res.data
+
+    # Give the thread time to pick it up
+    time.sleep(3)
+
+    # Check HTML conversion detected and workd
+    res = client.get(
+        url_for("preview_page", uuid="first"),
+        follow_redirects=True
+    )
+
+    assert b'$90.74' in res.data
--- a/changedetectionio/tests/test_xpath_selector.py
+++ b/changedetectionio/tests/test_xpath_selector.py
@ -86,6 +86,7 @@ def test_check_xpath_filter_utf8(client, live_server):
        follow_redirects=True
    )
    assert b"1 Imported" in res.data
+    time.sleep(1)
    res = client.post(
        url_for("edit_page", uuid="first"),
        data={"css_filter": filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
@ -99,6 +100,68 @@ def test_check_xpath_filter_utf8(client, live_server):
    assert b'Deleted' in res.data


+# Handle utf-8 charset replies https://github.com/dgtlmoon/changedetection.io/pull/613
+def test_check_xpath_text_function_utf8(client, live_server):
+    filter='//item/title/text()'
+
+    d='''<?xml version="1.0" encoding="UTF-8"?>
+<rss xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" xmlns:dc="http://purl.org/dc/elements/1.1/" version="2.0">
+	<channel>
+		<title>rpilocator.com</title>
+		<link>https://rpilocator.com</link>
+		<description>Find Raspberry Pi Computers in Stock</description>
+		<lastBuildDate>Thu, 19 May 2022 23:27:30 GMT</lastBuildDate>
+		<image>
+			<url>https://rpilocator.com/favicon.png</url>
+			<title>rpilocator.com</title>
+			<link>https://rpilocator.com/</link>
+			<width>32</width>
+			<height>32</height>
+		</image>
+		<item>
+			<title>Stock Alert (UK): RPi CM4</title>
+			<foo>something else unrelated</foo>
+		</item>
+		<item>
+			<title>Stock Alert (UK): Big monitor</title>
+			<foo>something else unrelated</foo>
+		</item>		
+	</channel>
+</rss>'''
+
+    with open("test-datastore/endpoint-content.txt", "w") as f:
+        f.write(d)
+
+    # Add our URL to the import page
+    test_url = url_for('test_endpoint', _external=True, content_type="application/rss+xml;charset=UTF-8")
+    res = client.post(
+        url_for("import_page"),
+        data={"urls": test_url},
+        follow_redirects=True
+    )
+    assert b"1 Imported" in res.data
+    time.sleep(1)
+    res = client.post(
+        url_for("edit_page", uuid="first"),
+        data={"css_filter": filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
+        follow_redirects=True
+    )
+    assert b"Updated watch." in res.data
+    time.sleep(3)
+    res = client.get(url_for("index"))
+    assert b'Unicode strings with encoding declaration are not supported.' not in res.data
+
+    # The service should echo back the request headers
+    res = client.get(
+        url_for("preview_page", uuid="first"),
+        follow_redirects=True
+    )
+
+    assert b'<div class="">Stock Alert (UK): RPi CM4' in res.data
+    assert b'<div class="">Stock Alert (UK): Big monitor' in res.data
+
+    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
+    assert b'Deleted' in res.data

 def test_check_markup_xpath_filter_restriction(client, live_server):
    sleep_time_for_fetch_thread = 3
--- a/changedetectionio/update_worker.py
+++ b/changedetectionio/update_worker.py
@ -3,6 +3,8 @@ import queue
 import time

 from changedetectionio import content_fetcher
+from changedetectionio.html_tools import FilterNotFoundInResponse
+
 # A single update worker
 #
 # Requests for checking on a single site(watch) from a queue of watches
@ -19,6 +21,32 @@ class update_worker(threading.Thread):
        self.datastore = datastore
        super().__init__(*args, **kwargs)

+    def send_filter_failure_notification(self, uuid):
+
+        threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts')
+        watch = self.datastore.data['watching'].get(uuid, False)
+
+        n_object = {'notification_title': 'Changedetection.io - Alert - CSS/xPath filter was not present in the page',
+                    'notification_body': "Your configured CSS/xPath filter of '{}' for {{watch_url}} did not appear on the page after {} attempts, did the page change layout?\n\nLink: {{base_url}}/edit/{{watch_uuid}}\n\nThanks - Your omniscient changedetection.io installation :)\n".format(
+                        watch['css_filter'],
+                        threshold),
+                    'notification_format': 'text'}
+
+        if len(watch['notification_urls']):
+            n_object['notification_urls'] = watch['notification_urls']
+
+        elif len(self.datastore.data['settings']['application']['notification_urls']):
+            n_object['notification_urls'] = self.datastore.data['settings']['application']['notification_urls']
+
+        # Only prepare to notify if the rules above matched
+        if 'notification_urls' in n_object:
+            n_object.update({
+                'watch_url': watch['url'],
+                'uuid': uuid
+            })
+            self.notification_q.put(n_object)
+            print("Sent filter not found notification for {}".format(uuid))
+
    def run(self):
        from changedetectionio import fetch_site_status

@ -37,10 +65,12 @@ class update_worker(threading.Thread):
                if uuid in list(self.datastore.data['watching'].keys()):

                    changed_detected = False
-                    contents = ""
+                    contents = b''
                    screenshot = False
                    update_obj= {}
                    xpath_data = False
+                    process_changedetection_results = True
+
                    now = time.time()

                    try:
@ -52,14 +82,35 @@ class update_worker(threading.Thread):
                            raise Exception("Error - returned data from the fetch handler SHOULD be bytes")
                    except PermissionError as e:
                        self.app.logger.error("File permission error updating", uuid, str(e))
+                        process_changedetection_results = False
                    except content_fetcher.ReplyWithContentButNoText as e:
                        # Totally fine, it's by choice - just continue on, nothing more to care about
                        # Page had elements/content but no renderable text
-                        if self.datastore.data['watching'].get(uuid, False) and self.datastore.data['watching'][uuid].get('css_filter'):
-                            self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Got HTML content but no text found (CSS / xPath Filter not found in page?)"})
-                        else:
+                        # Backend (not filters) gave zero output
                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Got HTML content but no text found."})
-                        pass
+                        process_changedetection_results = False
+
+                    except FilterNotFoundInResponse as e:
+                        err_text = "Warning, filter '{}' not found".format(str(e))
+                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
+                                                                           # So that we get a trigger when the content is added again
+                                                                           'previous_md5': ''})
+
+                        # Only when enabled, send the notification
+                        if self.datastore.data['watching'][uuid].get('filter_failure_notification_send', False):
+                            c = self.datastore.data['watching'][uuid].get('consecutive_filter_failures', 5)
+                            c += 1
+                            # Send notification if we reached the threshold?
+                            threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts',
+                                                                                           0)
+                            print("Filter for {} not found, consecutive_filter_failures: {}".format(uuid, c))
+                            if threshold > 0 and c >= threshold:
+                                self.send_filter_failure_notification(uuid)
+                                c = 0
+                            self.datastore.update_watch(uuid=uuid, update_obj={'consecutive_filter_failures': c})
+
+                        process_changedetection_results = True
+
                    except content_fetcher.EmptyReply as e:
                        # Some kind of custom to-str handler in the exception handler that does this?
                        err_text = "EmptyReply - try increasing 'Wait seconds before extracting text', Status Code {}".format(e.status_code)
@ -69,16 +120,22 @@ class update_worker(threading.Thread):
                        err_text = "Screenshot unavailable, page did not render fully in the expected time - try increasing 'Wait seconds before extracting text'"
                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
                                                                           'last_check_status': e.status_code})
+                        process_changedetection_results = False
                    except content_fetcher.PageUnloadable as e:
                        err_text = "Page request from server didnt respond correctly"
                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
                                                                           'last_check_status': e.status_code})
-
                    except Exception as e:
                        self.app.logger.error("Exception reached processing watch UUID: %s - %s", uuid, str(e))
                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
-
+                        # Other serious error
+                        process_changedetection_results = False
                    else:
+                        # Mark that we never had any failures
+                        update_obj['consecutive_filter_failures'] = 0
+
+                    # Different exceptions mean that we may or may not want to bump the snapshot, trigger notifications etc
+                    if process_changedetection_results:
                        try:
                            watch = self.datastore.data['watching'][uuid]
                            fname = "" # Saved history text filename
@ -88,7 +145,6 @@ class update_worker(threading.Thread):
                                # A change was detected
                                fname = watch.save_history_text(contents=contents, timestamp=str(round(time.time())))

-                            # Generally update anything interesting returned
                            self.datastore.update_watch(uuid=uuid, update_obj=update_obj)

                            # A change was detected
@ -154,7 +210,7 @@ class update_worker(threading.Thread):
                            self.app.logger.error("Exception reached processing watch UUID: %s - %s", uuid, str(e))
                            self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})

-                    finally:
+
                        # Always record that we atleast tried
                        self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3),
                                                                           'last_checked': round(time.time())})
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -24,7 +24,7 @@ services:
  #             https://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.common.proxy
  #
  #       Alternative Playwright URL, do not use "'s or 's!
-  #      - PLAYWRIGHT_DRIVER_URL=ws://playwright-chrome:3000/
+  #      - PLAYWRIGHT_DRIVER_URL=ws://playwright-chrome:3000/?stealth=1&--disable-web-security=true
  #
  #       Playwright proxy settings playwright_proxy_server, playwright_proxy_bypass, playwright_proxy_username, playwright_proxy_password
  #
--- a/16
+++ b/16
@ -1,16 +0,0 @@
-diff --git a/changedetectionio/content_fetcher.py b/changedetectionio/content_fetcher.py
-index 331ef959..ca43edc8 100644
--- a/changedetectionio/content_fetcher.py
-+++ b/changedetectionio/content_fetcher.py
-@@ -309,7 +309,10 @@ class base_html_playwright(Fetcher):
-                 page.set_default_navigation_timeout(90000)
-                 page.set_default_timeout(90000)
- 
-               # Bug - never set viewport size BEFORE page.goto
-+                # Listen for all console events and handle errors
-+                page.on("console", lambda msg: print(f"Playwright console: Watch URL: {url} {msg.type}: {msg.text} {msg.args}"))
-+
-+                # Bug - never set viewport size BEFORE page.goto
- 
-                 # Waits for the next navigation. Using Python context manager
-                 # prevents a race condition between clicking and waiting for a navigation.