Filters can now accept a list/multiple filters (#1064) #623

2 years ago · 359fc48fb4
parent d0efeb9770
commit 359fc48fb4
23 changed files with 237 additions and 145 deletions
--- a/changedetectionio/init.py
+++ b/changedetectionio/init.py
@ -599,7 +599,7 @@ def changedetection_app(config=None, datastore_o=None):
                    extra_update_obj['previous_md5'] = get_current_checksum_include_ignore_text(uuid=uuid)

            # Reset the previous_md5 so we process a new snapshot including stripping ignore text.
-            if form.css_filter.data.strip() != datastore.data['watching'][uuid]['css_filter']:
+            if form.include_filters.data != datastore.data['watching'][uuid].get('include_filters', []):
                if len(datastore.data['watching'][uuid].history):
                    extra_update_obj['previous_md5'] = get_current_checksum_include_ignore_text(uuid=uuid)

--- a/changedetectionio/content_fetcher.py
+++ b/changedetectionio/content_fetcher.py
@ -164,16 +164,16 @@ class Fetcher():
                }


-                // inject the current one set in the css_filter, which may be a CSS rule
+                // inject the current one set in the include_filters, which may be a CSS rule
                // used for displaying the current one in VisualSelector, where its not one we generated.
-                if (css_filter.length) {
+                if (include_filters.length) {
                   q=false;                   
                   try {
                       // is it xpath?
-                       if (css_filter.startsWith('/') || css_filter.startsWith('xpath:')) {
-                         q=document.evaluate(css_filter.replace('xpath:',''), document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
+                       if (include_filters.startsWith('/') || include_filters.startsWith('xpath:')) {
+                         q=document.evaluate(include_filters.replace('xpath:',''), document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
                       } else {
-                         q=document.querySelector(css_filter);
+                         q=document.querySelector(include_filters);
                       }                       
                   } catch (e) {
                    // Maybe catch DOMException and alert? 
@ -186,7 +186,7 @@ class Fetcher():
                                   
                   if (bbox && bbox['width'] >0 && bbox['height']>0) {                       
                       size_pos.push({
-                           xpath: css_filter,
+                           xpath: include_filters,
                           width: bbox['width'], 
                           height: bbox['height'],
                           left: bbox['left'],
@ -220,7 +220,7 @@ class Fetcher():
            request_body,
            request_method,
            ignore_status_codes=False,
-            current_css_filter=None):
+            current_include_filters=None):
        # Should set self.error, self.status_code and self.content
        pass

@ -310,7 +310,7 @@ class base_html_playwright(Fetcher):
            request_body,
            request_method,
            ignore_status_codes=False,
-            current_css_filter=None):
+            current_include_filters=None):

        from playwright.sync_api import sync_playwright
        import playwright._impl._api_types
@ -413,10 +413,10 @@ class base_html_playwright(Fetcher):
            self.status_code = response.status
            self.headers = response.all_headers()

-            if current_css_filter is not None:
-                page.evaluate("var css_filter={}".format(json.dumps(current_css_filter)))
+            if current_include_filters is not None:
+                page.evaluate("var include_filters={}".format(json.dumps(current_include_filters)))
            else:
-                page.evaluate("var css_filter=''")
+                page.evaluate("var include_filters=''")

            self.xpath_data = page.evaluate("async () => {" + self.xpath_element_js + "}")

@ -497,7 +497,7 @@ class base_html_webdriver(Fetcher):
            request_body,
            request_method,
            ignore_status_codes=False,
-            current_css_filter=None):
+            current_include_filters=None):

        from selenium import webdriver
        from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
@ -573,7 +573,7 @@ class html_requests(Fetcher):
            request_body,
            request_method,
            ignore_status_codes=False,
-            current_css_filter=None):
+            current_include_filters=None):

        # Make requests use a more modern looking user-agent
        if not 'User-Agent' in request_headers:
--- a/changedetectionio/fetch_site_status.py
+++ b/changedetectionio/fetch_site_status.py
@ -10,6 +10,12 @@ from changedetectionio import content_fetcher, html_tools
 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)


+class FilterNotFoundInResponse(ValueError):
+    def __init__(self, msg):
+        ValueError.__init__(self, msg)
+
+
+
 # Some common stuff here that can be moved to a base class
 # (set_proxy_from_list)
 class perform_site_check():
@ -104,7 +110,7 @@ class perform_site_check():
        if watch['webdriver_js_execute_code'] is not None and watch['webdriver_js_execute_code'].strip():
            fetcher.webdriver_js_execute_code = watch['webdriver_js_execute_code']

-        fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, watch['css_filter'])
+        fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, watch['include_filters'])
        fetcher.quit()

        self.screenshot = fetcher.screenshot
@ -128,25 +134,26 @@ class perform_site_check():
            is_html = False
            is_json = False

-        css_filter_rule = watch['css_filter']
+        include_filters_rule = watch['include_filters']
        subtractive_selectors = watch.get(
            "subtractive_selectors", []
        ) + self.datastore.data["settings"]["application"].get(
            "global_subtractive_selectors", []
        )

-        has_filter_rule = css_filter_rule and len(css_filter_rule.strip())
+        has_filter_rule = include_filters_rule and len("".join(include_filters_rule).strip())
        has_subtractive_selectors = subtractive_selectors and len(subtractive_selectors[0].strip())

        if is_json and not has_filter_rule:
-            css_filter_rule = "json:$"
+            include_filters_rule.append("json:$")
            has_filter_rule = True

        if has_filter_rule:
            json_filter_prefixes = ['json:', 'jq:']
-            if any(prefix in css_filter_rule for prefix in json_filter_prefixes):
-                stripped_text_from_html = html_tools.extract_json_as_string(content=fetcher.content, json_filter=css_filter_rule)
-                is_html = False
+            for filter in include_filters_rule:
+                if any(prefix in filter for prefix in json_filter_prefixes):
+                    stripped_text_from_html += html_tools.extract_json_as_string(content=fetcher.content, json_filter=filter)
+                    is_html = False

        if is_html or is_source:
            
@ -161,18 +168,28 @@ class perform_site_check():
            else:
                # Then we assume HTML
                if has_filter_rule:
-                    # For HTML/XML we offer xpath as an option, just start a regular xPath "/.."
-                    if css_filter_rule[0] == '/' or css_filter_rule.startswith('xpath:'):
-                        html_content = html_tools.xpath_filter(xpath_filter=css_filter_rule.replace('xpath:', ''),
-                                                               html_content=fetcher.content)
-                    else:
-                        # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
-                        html_content = html_tools.css_filter(css_filter=css_filter_rule, html_content=fetcher.content)
+                    html_content = ""
+                    for filter_rule in include_filters_rule:
+                        # For HTML/XML we offer xpath as an option, just start a regular xPath "/.."
+                        if filter_rule[0] == '/' or filter_rule.startswith('xpath:'):
+                            html_content += html_tools.xpath_filter(xpath_filter=filter_rule.replace('xpath:', ''),
+                                                                    html_content=fetcher.content,
+                                                                    append_pretty_line_formatting=not is_source)
+                        else:
+                            # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
+                            html_content += html_tools.include_filters(include_filters=filter_rule,
+                                                                  html_content=fetcher.content,
+                                                                  append_pretty_line_formatting=not is_source)
+
+                    if not html_content.strip():
+                        raise FilterNotFoundInResponse(include_filters_rule)

                if has_subtractive_selectors:
                    html_content = html_tools.element_removal(subtractive_selectors, html_content)

-                if not is_source:
+                if is_source:
+                    stripped_text_from_html = html_content
+                else:
                    # extract text
                    stripped_text_from_html = \
                        html_tools.html_to_text(
@ -182,9 +199,6 @@ class perform_site_check():
                                "render_anchor_tag_content", False)
                        )

-                elif is_source:
-                    stripped_text_from_html = html_content
-
        # Re #340 - return the content before the 'ignore text' was applied
        text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8')

--- a/changedetectionio/forms.py
+++ b/changedetectionio/forms.py
@ -349,7 +349,7 @@ class watchForm(commonSettingsForm):

    time_between_check = FormField(TimeBetweenCheckForm)

-    css_filter = StringField('CSS/JSON/XPATH Filter', [ValidateCSSJSONXPATHInput()], default='')
+    include_filters = StringListField('CSS/JSONPath/JQ/XPath Filters', [ValidateCSSJSONXPATHInput()], default='')

    subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)])

--- a/changedetectionio/html_tools.py
+++ b/changedetectionio/html_tools.py
@ -7,26 +7,30 @@ from typing import List
 import json
 import re

-class FilterNotFoundInResponse(ValueError):
-    def __init__(self, msg):
-        ValueError.__init__(self, msg)
+# HTML added to be sure each result matching a filter (.example) gets converted to a new line by Inscriptis
+TEXT_FILTER_LIST_LINE_SUFFIX = "<br/>"

 class JSONNotFound(ValueError):
    def __init__(self, msg):
        ValueError.__init__(self, msg)
        
-
 # Given a CSS Rule, and a blob of HTML, return the blob of HTML that matches
-def css_filter(css_filter, html_content):
+def include_filters(include_filters, html_content, append_pretty_line_formatting=False):
    soup = BeautifulSoup(html_content, "html.parser")
    html_block = ""
-    r = soup.select(css_filter, separator="")
-    if len(html_content) > 0 and len(r) == 0:
-        raise FilterNotFoundInResponse(css_filter)
-    for item in r:
-        html_block += str(item)
+    r = soup.select(include_filters, separator="")
+
+    for element in r:
+        # When there's more than 1 match, then add the suffix to separate each line
+        # And where the matched result doesn't include something that will cause Inscriptis to add a newline
+        # (This way each 'match' reliably has a new-line in the diff)
+        # Divs are converted to 4 whitespaces by inscriptis
+        if append_pretty_line_formatting and len(html_block) and not element.name in (['br', 'hr', 'div', 'p']):
+            html_block += TEXT_FILTER_LIST_LINE_SUFFIX

-    return html_block + "\n"
+        html_block += str(element)
+
+    return html_block

 def subtractive_css_selector(css_selector, html_content):
    soup = BeautifulSoup(html_content, "html.parser")
@ -42,25 +46,29 @@ def element_removal(selectors: List[str], html_content):


 # Return str Utf-8 of matched rules
-def xpath_filter(xpath_filter, html_content):
+def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False):
    from lxml import etree, html

    tree = html.fromstring(bytes(html_content, encoding='utf-8'))
    html_block = ""

    r = tree.xpath(xpath_filter.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'})
-    if len(html_content) > 0 and len(r) == 0:
-        raise FilterNotFoundInResponse(xpath_filter)
-
    #@note: //title/text() wont work where <title>CDATA..

    for element in r:
+        # When there's more than 1 match, then add the suffix to separate each line
+        # And where the matched result doesn't include something that will cause Inscriptis to add a newline
+        # (This way each 'match' reliably has a new-line in the diff)
+        # Divs are converted to 4 whitespaces by inscriptis
+        if append_pretty_line_formatting and len(html_block) and (not hasattr( element, 'tag' ) or not element.tag in (['br', 'hr', 'div', 'p'])):
+            html_block += TEXT_FILTER_LIST_LINE_SUFFIX
+
        if type(element) == etree._ElementStringResult:
-            html_block += str(element) + "<br/>"
+            html_block += str(element)
        elif type(element) == etree._ElementUnicodeResult:
-            html_block += str(element) + "<br/>"
+            html_block += str(element)
        else:
-            html_block += etree.tostring(element, pretty_print=True).decode('utf-8') + "<br/>"
+            html_block += etree.tostring(element, pretty_print=True).decode('utf-8')

    return html_block

--- a/changedetectionio/importer.py
+++ b/changedetectionio/importer.py
@ -103,12 +103,12 @@ class import_distill_io_json(Importer):
                    pass
                except IndexError:
                    pass
-
+                extras['include_filters'] = []
                try:
-                    extras['css_filter'] = d_config['selections'][0]['frames'][0]['includes'][0]['expr']
                    if d_config['selections'][0]['frames'][0]['includes'][0]['type'] == 'xpath':
-                        extras['css_filter'] = 'xpath:' + extras['css_filter']
-
+                        extras['include_filters'].append('xpath:' + d_config['selections'][0]['frames'][0]['includes'][0]['expr'])
+                    else:
+                        extras['include_filters'].append(d_config['selections'][0]['frames'][0]['includes'][0]['expr'])
                except KeyError:
                    pass
                except IndexError:
--- a/changedetectionio/model/Watch.py
+++ b/changedetectionio/model/Watch.py
@ -36,7 +36,7 @@ class model(dict):
            'notification_body': None,
            'notification_format': default_notification_format_for_watch,
            'notification_muted': False,
-            'css_filter': '',
+            'include_filters': [],
            'last_error': False,
            'extract_text': [],  # Extract text by regex after filters
            'subtractive_selectors': [],
--- a/changedetectionio/static/js/visual-selector.js
+++ b/changedetectionio/static/js/visual-selector.js
@ -50,7 +50,7 @@ $(document).ready(function() {
        state_clicked=false;
        ctx.clearRect(0, 0, c.width, c.height);
        xctx.clearRect(0, 0, c.width, c.height);
-        $("#css_filter").val('');
+        $("#include_filters").val('');
    });


@ -68,7 +68,7 @@ $(document).ready(function() {
               xctx = c.getContext("2d");
                // redline highlight context
               ctx = c.getContext("2d");
-               current_default_xpath =$("#css_filter").val();
+               current_default_xpath =$("#include_filters").val();
               fetch_data();
               $('#selector-canvas').off("mousemove mousedown");
               // screenshot_url defined in the edit.html template
@ -205,9 +205,9 @@ $(document).ready(function() {
        var sel = selector_data['size_pos'][current_selected_i];
        if (sel[0] == '/') {
        // @todo - not sure just checking / is right
-            $("#css_filter").val('xpath:'+sel.xpath);
+            $("#include_filters").val('xpath:'+sel.xpath);
        } else {
-            $("#css_filter").val(sel.xpath);
+            $("#include_filters").val(sel.xpath);
        }
        xctx.fillStyle = 'rgba(205,205,205,0.95)';
        xctx.strokeStyle = 'rgba(225,0,0,0.9)';
--- a/changedetectionio/store.py
+++ b/changedetectionio/store.py
@ -82,8 +82,13 @@ class ChangeDetectionStore:
        except (FileNotFoundError, json.decoder.JSONDecodeError):
            if include_default_watches:
                print("Creating JSON store at", self.datastore_path)
-                self.add_watch(url='https://news.ycombinator.com/', tag='Tech news')
-                self.add_watch(url='https://changedetection.io/CHANGELOG.txt', tag='changedetection.io')
+                self.add_watch(url='https://news.ycombinator.com/',
+                               tag='Tech news',
+                               extras={'fetch_backend': 'html_requests'})
+
+                self.add_watch(url='https://changedetection.io/CHANGELOG.txt',
+                               tag='changedetection.io',
+                               extras={'fetch_backend': 'html_requests'})

        self.__data['version_tag'] = version_tag

@ -267,7 +272,7 @@ class ChangeDetectionStore:
            extras = {}
        # should always be str
        if tag is None or not tag:
-            tag=''
+            tag = ''

        # Incase these are copied across, assume it's a reference and deepcopy()
        apply_extras = deepcopy(extras)
@ -282,17 +287,31 @@ class ChangeDetectionStore:
                res = r.json()

                # List of permissible attributes we accept from the wild internet
-                for k in ['url', 'tag',
-                          'paused', 'title',
-                          'previous_md5', 'headers',
-                          'body', 'method',
-                          'ignore_text', 'css_filter',
-                          'subtractive_selectors', 'trigger_text',
-                          'extract_title_as_title', 'extract_text',
-                          'text_should_not_be_present',
-                          'webdriver_js_execute_code']:
+                for k in [
+                    'body',
+                    'css_filter',
+                    'extract_text',
+                    'extract_title_as_title',
+                    'headers',
+                    'ignore_text',
+                    'include_filters',
+                    'method',
+                    'paused',
+                    'previous_md5',
+                    'subtractive_selectors',
+                    'tag',
+                    'text_should_not_be_present',
+                    'title',
+                    'trigger_text',
+                    'webdriver_js_execute_code',
+                    'url',
+                ]:
                    if res.get(k):
-                        apply_extras[k] = res[k]
+                        if k != 'css_filter':
+                            apply_extras[k] = res[k]
+                        else:
+                            # We renamed the field and made it a list
+                            apply_extras['include_filters'] = [res['css_filter']]

            except Exception as e:
                logging.error("Error fetching metadata for shared watch link", url, str(e))
@ -315,12 +334,13 @@ class ChangeDetectionStore:
                    del apply_extras[k]

            new_watch.update(apply_extras)
-            self.__data['watching'][new_uuid]=new_watch
+            self.__data['watching'][new_uuid] = new_watch

        self.__data['watching'][new_uuid].ensure_data_dir_exists()

        if write_to_disk_now:
            self.sync_to_json()
+
        return new_uuid

    def visualselector_data_is_ready(self, watch_uuid):
@ -584,3 +604,14 @@ class ChangeDetectionStore:
        for v in ['User-Agent', 'Accept', 'Accept-Encoding', 'Accept-Language']:
            if self.data['settings']['headers'].get(v):
                del self.data['settings']['headers'][v]
+
+    # Convert filters to a list of filters css_filter -> include_filters
+    def update_8(self):
+        for uuid, watch in self.data['watching'].items():
+            try:
+                existing_filter = watch.get('css_filter', '')
+                if existing_filter:
+                    watch['include_filters'] = [existing_filter]
+            except:
+                continue
+        return
--- a/changedetectionio/templates/edit.html
+++ b/changedetectionio/templates/edit.html
@ -174,15 +174,17 @@ User-Agent: wonderbra 1.0") }}
                        </div>
                    </fieldset>
                    <div class="pure-control-group">
-                        {% set field = render_field(form.css_filter,
-                            placeholder=".class-name or #some-id, or other CSS selector rule.",
+                        {% set field = render_field(form.include_filters,
+                            rows=5,
+                            placeholder="#example
+xpath://body/div/span[contains(@class, 'example-class')]",
                            class="m-d")
                        %}
                        {{ field }}
                        {% if '/text()' in  field %}
                          <span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the &lt;element&gt; contains &lt;![CDATA[]]&gt;</strong></span><br/>
                        {% endif %}
-                        <span class="pure-form-message-inline">
+                        <span class="pure-form-message-inline">One rule per line, <i>any</i> rules that matches will be used.<br/>
                    <ul>
                        <li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
                        <li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed).
--- a/changedetectionio/tests/proxy_list/test_multiple_proxy.py
+++ b/changedetectionio/tests/proxy_list/test_multiple_proxy.py
@ -24,7 +24,7 @@ def test_preferred_proxy(client, live_server):
    res = client.post(
        url_for("edit_page", uuid="first"),
        data={
-                "css_filter": "",
+                "include_filters": "",
                "fetch_backend": "html_requests",
                "headers": "",
                "proxy": "proxy-two",
--- a/changedetectionio/tests/test_auth.py
+++ b/changedetectionio/tests/test_auth.py
@ -23,7 +23,7 @@ def test_basic_auth(client, live_server):
    # Check form validation
    res = client.post(
        url_for("edit_page", uuid="first"),
-        data={"css_filter": "", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
+        data={"include_filters": "", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
        follow_redirects=True
    )
    assert b"Updated watch." in res.data
--- a/changedetectionio/tests/test_css_selector.py
+++ b/changedetectionio/tests/test_css_selector.py
@ -46,22 +46,23 @@ def set_modified_response():


 # Test that the CSS extraction works how we expect, important here is the right placing of new lines \n's
-def test_css_filter_output():
-    from changedetectionio import fetch_site_status
+def test_include_filters_output():
    from inscriptis import get_text

    # Check text with sub-parts renders correctly
    content = """<html> <body><div id="thingthing" >  Some really <b>bold</b> text  </div> </body> </html>"""
-    html_blob = css_filter(css_filter="#thingthing", html_content=content)
+    html_blob = include_filters(include_filters="#thingthing", html_content=content)
    text = get_text(html_blob)
    assert text == "  Some really bold text"

    content = """<html> <body>
    <p>foo bar blah</p>
-    <div class="parts">Block A</div> <div class="parts">Block B</div></body> 
+    <DIV class="parts">Block A</DiV> <div class="parts">Block B</DIV></body> 
    </html>
 """
-    html_blob = css_filter(css_filter=".parts", html_content=content)
+
+    # in xPath this would be //*[@class='parts']
+    html_blob = include_filters(include_filters=".parts", html_content=content)
    text = get_text(html_blob)

    # Divs are converted to 4 whitespaces by inscriptis
@ -69,10 +70,10 @@ def test_css_filter_output():


 # Tests the whole stack works with the CSS Filter
-def test_check_markup_css_filter_restriction(client, live_server):
+def test_check_markup_include_filters_restriction(client, live_server):
    sleep_time_for_fetch_thread = 3

-    css_filter = "#sametext"
+    include_filters = "#sametext"

    set_original_response()

@ -98,7 +99,7 @@ def test_check_markup_css_filter_restriction(client, live_server):
    # Add our URL to the import page
    res = client.post(
        url_for("edit_page", uuid="first"),
-        data={"css_filter": css_filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
+        data={"include_filters": include_filters, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
        follow_redirects=True
    )
    assert b"Updated watch." in res.data
@ -107,7 +108,7 @@ def test_check_markup_css_filter_restriction(client, live_server):
    res = client.get(
        url_for("edit_page", uuid="first"),
    )
-    assert bytes(css_filter.encode('utf-8')) in res.data
+    assert bytes(include_filters.encode('utf-8')) in res.data

    # Trigger a check
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
@ -126,3 +127,58 @@ def test_check_markup_css_filter_restriction(client, live_server):
    # Because it should be looking at only that 'sametext' id
    res = client.get(url_for("index"))
    assert b'unviewed' in res.data
+
+
+# Tests the whole stack works with the CSS Filter
+def test_check_multiple_filters(client, live_server):
+    sleep_time_for_fetch_thread = 3
+
+    include_filters = "#blob-a\r\nxpath://*[contains(@id,'blob-b')]"
+
+    with open("test-datastore/endpoint-content.txt", "w") as f:
+        f.write("""<html><body>
+     <div id="blob-a">Blob A</div>
+     <div id="blob-b">Blob B</div>
+     <div id="blob-c">Blob C</div>
+     </body>
+     </html>
+    """)
+
+    # Give the endpoint time to spin up
+    time.sleep(1)
+
+    # Add our URL to the import page
+    test_url = url_for('test_endpoint', _external=True)
+    res = client.post(
+        url_for("import_page"),
+        data={"urls": test_url},
+        follow_redirects=True
+    )
+    assert b"1 Imported" in res.data
+    time.sleep(1)
+
+    # Goto the edit page, add our ignore text
+    # Add our URL to the import page
+    res = client.post(
+        url_for("edit_page", uuid="first"),
+        data={"include_filters": include_filters,
+              "url": test_url,
+              "tag": "",
+              "headers": "",
+              'fetch_backend': "html_requests"},
+        follow_redirects=True
+    )
+    assert b"Updated watch." in res.data
+
+    # Give the thread time to pick it up
+    time.sleep(sleep_time_for_fetch_thread)
+
+    res = client.get(
+        url_for("preview_page", uuid="first"),
+        follow_redirects=True
+    )
+
+    # Only the two blobs should be here
+    assert b"Blob A" in res.data # CSS was ok
+    assert b"Blob B" in res.data # xPath was ok
+    assert b"Blob C" not in res.data # Should not be included
--- a/changedetectionio/tests/test_extract_regex.py
+++ b/changedetectionio/tests/test_extract_regex.py
@ -88,7 +88,7 @@ def test_check_filter_multiline(client, live_server):
    # Add our URL to the import page
    res = client.post(
        url_for("edit_page", uuid="first"),
-        data={"css_filter": '',
+        data={"include_filters": '',
              'extract_text': '/something.+?6 billion.+?lines/si',
              "url": test_url,
              "tag": "",
@ -116,7 +116,7 @@ def test_check_filter_multiline(client, live_server):

 def test_check_filter_and_regex_extract(client, live_server):
    sleep_time_for_fetch_thread = 3
-    css_filter = ".changetext"
+    include_filters = ".changetext"

    set_original_response()

@ -143,7 +143,7 @@ def test_check_filter_and_regex_extract(client, live_server):
    # Add our URL to the import page
    res = client.post(
        url_for("edit_page", uuid="first"),
-        data={"css_filter": css_filter,
+        data={"include_filters": include_filters,
              'extract_text': '\d+ online\r\n\d+ guests\r\n/somecase insensitive \d+/i\r\n/somecase insensitive (345\d)/i',
              "url": test_url,
              "tag": "",
--- a/changedetectionio/tests/test_filter_exist_changes.py
+++ b/changedetectionio/tests/test_filter_exist_changes.py
@ -92,7 +92,7 @@ def test_filter_doesnt_exist_then_exists_should_get_notification(client, live_se
        "tag": "my tag",
        "title": "my title",
        "headers": "",
-        "css_filter": '.ticket-available',
+        "include_filters": '.ticket-available',
        "fetch_backend": "html_requests"})

    res = client.post(
--- a/changedetectionio/tests/test_filter_failure_notification.py
+++ b/changedetectionio/tests/test_filter_failure_notification.py
@ -76,7 +76,7 @@ def run_filter_test(client, content_filter):
        "title": "my title",
        "headers": "",
        "filter_failure_notification_send": 'y',
-        "css_filter": content_filter,
+        "include_filters": content_filter,
        "fetch_backend": "html_requests"})

    res = client.post(
@ -95,7 +95,7 @@ def run_filter_test(client, content_filter):
        time.sleep(3)

    # We should see something in the frontend
-    assert b'Warning, filter' in res.data
+    assert b'Warning, no filters were found' in res.data

    # Now it should exist and contain our "filter not found" alert
    assert os.path.isfile("test-datastore/notification.txt")
@ -131,7 +131,7 @@ def run_filter_test(client, content_filter):
 def test_setup(live_server):
    live_server_setup(live_server)

-def test_check_css_filter_failure_notification(client, live_server):
+def test_check_include_filters_failure_notification(client, live_server):
    set_original_response()
    time.sleep(1)
    run_filter_test(client, '#nope-doesnt-exist')
--- a/changedetectionio/tests/test_jsonpath_jq_selector.py
+++ b/changedetectionio/tests/test_jsonpath_jq_selector.py
@ -132,7 +132,7 @@ def set_original_response():
    return None


-def set_response_with_html():
+def set_json_response_with_html():
    test_return_data = """
    {
      "test": [
@ -176,7 +176,7 @@ def set_modified_response():
 def test_check_json_without_filter(client, live_server):
    # Request a JSON document from a application/json source containing HTML
    # and be sure it doesn't get chewed up by instriptis
-    set_response_with_html()
+    set_json_response_with_html()

    # Give the endpoint time to spin up
    time.sleep(1)
@ -189,9 +189,6 @@ def test_check_json_without_filter(client, live_server):
        follow_redirects=True
    )

-    # Trigger a check
-    client.get(url_for("form_watch_checknow"), follow_redirects=True)
-
    # Give the thread time to pick it up
    time.sleep(3)

@ -200,6 +197,7 @@ def test_check_json_without_filter(client, live_server):
        follow_redirects=True
    )

+    # Should still see '"html": "<b>"'
    assert b'&#34;&lt;b&gt;' in res.data
    assert res.data.count(b'{\n') >= 2

@ -221,9 +219,6 @@ def check_json_filter(json_filter, client, live_server):
    )
    assert b"1 Imported" in res.data

-    # Trigger a check
-    client.get(url_for("form_watch_checknow"), follow_redirects=True)
-
    # Give the thread time to pick it up
    time.sleep(3)

@ -231,7 +226,7 @@ def check_json_filter(json_filter, client, live_server):
    # Add our URL to the import page
    res = client.post(
        url_for("edit_page", uuid="first"),
-        data={"css_filter": json_filter,
+        data={"include_filters": json_filter,
              "url": test_url,
              "tag": "",
              "headers": "",
@ -247,9 +242,6 @@ def check_json_filter(json_filter, client, live_server):
    )
    assert bytes(escape(json_filter).encode('utf-8')) in res.data

-    # Trigger a check
-    client.get(url_for("form_watch_checknow"), follow_redirects=True)
-
    # Give the thread time to pick it up
    time.sleep(3)
    #  Make a change
@ -301,7 +293,7 @@ def check_json_filter_bool_val(json_filter, client, live_server):
    # Add our URL to the import page
    res = client.post(
        url_for("edit_page", uuid="first"),
-        data={"css_filter": json_filter,
+        data={"include_filters": json_filter,
              "url": test_url,
              "tag": "",
              "headers": "",
@ -311,11 +303,6 @@ def check_json_filter_bool_val(json_filter, client, live_server):
    )
    assert b"Updated watch." in res.data

-    time.sleep(3)
-
-    # Trigger a check
-    client.get(url_for("form_watch_checknow"), follow_redirects=True)
-
    # Give the thread time to pick it up
    time.sleep(3)
    #  Make a change
@ -360,9 +347,6 @@ def check_json_ext_filter(json_filter, client, live_server):
    )
    assert b"1 Imported" in res.data

-    # Trigger a check
-    client.get(url_for("form_watch_checknow"), follow_redirects=True)
-
    # Give the thread time to pick it up
    time.sleep(3)

@ -370,7 +354,7 @@ def check_json_ext_filter(json_filter, client, live_server):
    # Add our URL to the import page
    res = client.post(
        url_for("edit_page", uuid="first"),
-        data={"css_filter": json_filter,
+        data={"include_filters": json_filter,
              "url": test_url,
              "tag": "",
              "headers": "",
@ -386,9 +370,6 @@ def check_json_ext_filter(json_filter, client, live_server):
    )
    assert bytes(escape(json_filter).encode('utf-8')) in res.data

-    # Trigger a check
-    client.get(url_for("form_watch_checknow"), follow_redirects=True)
-
    # Give the thread time to pick it up
    time.sleep(3)
    #  Make a change
--- a/changedetectionio/tests/test_share_watch.py
+++ b/changedetectionio/tests/test_share_watch.py
@ -14,7 +14,7 @@ def test_share_watch(client, live_server):
    live_server_setup(live_server)

    test_url = url_for('test_endpoint', _external=True)
-    css_filter = ".nice-filter"
+    include_filters = ".nice-filter"

    # Add our URL to the import page
    res = client.post(
@ -29,7 +29,7 @@ def test_share_watch(client, live_server):
    # Add our URL to the import page
    res = client.post(
        url_for("edit_page", uuid="first"),
-        data={"css_filter": css_filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
+        data={"include_filters": include_filters, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
        follow_redirects=True
    )
    assert b"Updated watch." in res.data
@ -37,7 +37,7 @@ def test_share_watch(client, live_server):
    res = client.get(
        url_for("edit_page", uuid="first"),
    )
-    assert bytes(css_filter.encode('utf-8')) in res.data
+    assert bytes(include_filters.encode('utf-8')) in res.data

    # click share the link
    res = client.get(
@ -73,4 +73,8 @@ def test_share_watch(client, live_server):
    res = client.get(
        url_for("edit_page", uuid="first"),
    )
-    assert bytes(css_filter.encode('utf-8')) in res.data
+    assert bytes(include_filters.encode('utf-8')) in res.data
+
+    # Check it saved the URL
+    res = client.get(url_for("index"))
+    assert bytes(test_url.encode('utf-8')) in res.data
--- a/changedetectionio/tests/test_source.py
+++ b/changedetectionio/tests/test_source.py
@ -57,10 +57,9 @@ def test_check_basic_change_detection_functionality_source(client, live_server):



-
+# `subtractive_selectors` should still work in `source:` type requests
 def test_check_ignore_elements(client, live_server):
    set_original_response()
-
    time.sleep(2)
    test_url = 'source:'+url_for('test_endpoint', _external=True)
    # Add our URL to the import page
@ -77,9 +76,9 @@ def test_check_ignore_elements(client, live_server):
    #####################
    # We want <span> and <p> ONLY, but ignore span with .foobar-detection

-    res = client.post(
+    client.post(
        url_for("edit_page", uuid="first"),
-        data={"css_filter": 'span,p', "url": test_url, "tag": "", "subtractive_selectors": ".foobar-detection", 'fetch_backend': "html_requests"},
+        data={"include_filters": 'span,p', "url": test_url, "tag": "", "subtractive_selectors": ".foobar-detection", 'fetch_backend': "html_requests"},
        follow_redirects=True
    )

@ -89,7 +88,6 @@ def test_check_ignore_elements(client, live_server):
        url_for("preview_page", uuid="first"),
        follow_redirects=True
    )
-
    assert b'foobar-detection' not in res.data
    assert b'&lt;br' not in res.data
    assert b'&lt;p' in res.data
--- a/changedetectionio/tests/test_trigger_regex_with_filter.py
+++ b/changedetectionio/tests/test_trigger_regex_with_filter.py
@ -49,7 +49,7 @@ def test_trigger_regex_functionality_with_filter(client, live_server):
        url_for("edit_page", uuid="first"),
        data={"trigger_text": "/cool.stuff/",
              "url": test_url,
-              "css_filter": '#in-here',
+              "include_filters": '#in-here',
              "fetch_backend": "html_requests"},
        follow_redirects=True
    )
--- a/changedetectionio/tests/test_watch_fields_storage.py
+++ b/changedetectionio/tests/test_watch_fields_storage.py
@ -22,7 +22,7 @@ def test_check_watch_field_storage(client, live_server):
        url_for("edit_page", uuid="first"),
        data={ "notification_urls": "json://127.0.0.1:30000\r\njson://128.0.0.1\r\n",
               "time_between_check-minutes": 126,
-               "css_filter" : ".fooclass",
+               "include_filters" : ".fooclass",
               "title" : "My title",
               "ignore_text" : "ignore this",
               "url": test_url,
--- a/changedetectionio/tests/test_xpath_selector.py
+++ b/changedetectionio/tests/test_xpath_selector.py
@ -89,7 +89,7 @@ def test_check_xpath_filter_utf8(client, live_server):
    time.sleep(1)
    res = client.post(
        url_for("edit_page", uuid="first"),
-        data={"css_filter": filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
+        data={"include_filters": filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
        follow_redirects=True
    )
    assert b"Updated watch." in res.data
@ -143,7 +143,7 @@ def test_check_xpath_text_function_utf8(client, live_server):
    time.sleep(1)
    res = client.post(
        url_for("edit_page", uuid="first"),
-        data={"css_filter": filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
+        data={"include_filters": filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
        follow_redirects=True
    )
    assert b"Updated watch." in res.data
@ -182,9 +182,6 @@ def test_check_markup_xpath_filter_restriction(client, live_server):
    )
    assert b"1 Imported" in res.data

-    # Trigger a check
-    client.get(url_for("form_watch_checknow"), follow_redirects=True)
-
    # Give the thread time to pick it up
    time.sleep(sleep_time_for_fetch_thread)

@ -192,7 +189,7 @@ def test_check_markup_xpath_filter_restriction(client, live_server):
    # Add our URL to the import page
    res = client.post(
        url_for("edit_page", uuid="first"),
-        data={"css_filter": xpath_filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
+        data={"include_filters": xpath_filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
        follow_redirects=True
    )
    assert b"Updated watch." in res.data
@ -230,10 +227,11 @@ def test_xpath_validation(client, live_server):
        follow_redirects=True
    )
    assert b"1 Imported" in res.data
+    time.sleep(2)

    res = client.post(
        url_for("edit_page", uuid="first"),
-        data={"css_filter": "/something horrible", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
+        data={"include_filters": "/something horrible", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
        follow_redirects=True
    )
    assert b"is not a valid XPath expression" in res.data
@ -242,7 +240,7 @@ def test_xpath_validation(client, live_server):


 # actually only really used by the distll.io importer, but could be handy too
-def test_check_with_prefix_css_filter(client, live_server):
+def test_check_with_prefix_include_filters(client, live_server):
    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
    assert b'Deleted' in res.data

@ -263,7 +261,7 @@ def test_check_with_prefix_css_filter(client, live_server):

    res = client.post(
        url_for("edit_page", uuid="first"),
-        data={"css_filter":  "xpath://*[contains(@class, 'sametext')]", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
+        data={"include_filters":  "xpath://*[contains(@class, 'sametext')]", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
        follow_redirects=True
    )

--- a/changedetectionio/update_worker.py
+++ b/changedetectionio/update_worker.py
@ -4,7 +4,7 @@ import queue
 import time

 from changedetectionio import content_fetcher
-from changedetectionio.html_tools import FilterNotFoundInResponse
+from changedetectionio.fetch_site_status import FilterNotFoundInResponse

 # A single update worker
 #
@ -91,8 +91,8 @@ class update_worker(threading.Thread):
            return

        n_object = {'notification_title': 'Changedetection.io - Alert - CSS/xPath filter was not present in the page',
-                    'notification_body': "Your configured CSS/xPath filter of '{}' for {{watch_url}} did not appear on the page after {} attempts, did the page change layout?\n\nLink: {{base_url}}/edit/{{watch_uuid}}\n\nThanks - Your omniscient changedetection.io installation :)\n".format(
-                        watch['css_filter'],
+                    'notification_body': "Your configured CSS/xPath filters of '{}' for {{watch_url}} did not appear on the page after {} attempts, did the page change layout?\n\nLink: {{base_url}}/edit/{{watch_uuid}}\n\nThanks - Your omniscient changedetection.io installation :)\n".format(
+                        ", ".join(watch['include_filters']),
                        threshold),
                    'notification_format': 'text'}

@ -189,7 +189,7 @@ class update_worker(threading.Thread):
                        if not self.datastore.data['watching'].get(uuid):
                            continue

-                        err_text = "Warning, filter '{}' not found".format(str(e))
+                        err_text = "Warning, no filters were found, no change detection ran."
                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
                                                                           # So that we get a trigger when the content is added again
                                                                           'previous_md5': ''})