Adding validation

4 months ago · 591dd5b570
parent 919812bf8b
commit 591dd5b570
3 changed files with 68 additions and 29 deletions
--- a/changedetectionio/flask_app.py
+++ b/changedetectionio/flask_app.py
@ -729,6 +729,12 @@ def changedetection_app(config=None, datastore_o=None):
            for p in datastore.proxy_list:
                form.proxy.choices.append(tuple((p, datastore.proxy_list[p]['label'])))
        # Add some HTML to be used for form validation
        if datastore.data['watching'][uuid].history.keys():
            timestamp = list(datastore.data['watching'][uuid].history.keys())[-1]
            form.last_html_for_form_validation = datastore.data['watching'][uuid].get_fetched_html(timestamp)
        else:
            form.last_html_for_form_validation = "<html><body></body></html>"
        if request.method == 'POST' and form.validate():
--- a/changedetectionio/forms.py
+++ b/changedetectionio/forms.py
@ -1,6 +1,9 @@
 import os
 import re
 import elementpath
 from changedetectionio.html_tools import xpath_filter, xpath1_filter
 from changedetectionio.strtobool import strtobool
 from wtforms import (
@ -329,45 +332,32 @@ class ValidateCSSJSONXPATHInput(object):
            data = field.data
        for line in data:
-        # Nothing to see here
+            line = line.strip()
            if not len(line.strip()):
                return
-            # Does it look like XPath?
+            if not line:
-            if line.strip()[0] == '/' or line.strip().startswith('xpath:'):
+                continue
            if line.startswith('xpath') or line.startswith('/'):
                if not self.allow_xpath:
                    raise ValidationError("XPath not permitted in this field!")
-                from lxml import etree, html
+
-                import elementpath
+                if line.startswith('xpath1:'):
-                # xpath 2.0-3.1
+                    filter_function = xpath1_filter
-                from elementpath.xpath3 import XPath3Parser
+                else:
                tree = html.fromstring("<html></html>")
                    line = line.replace('xpath:', '')
                    filter_function = xpath_filter
                try:
-                    elementpath.select(tree, line.strip(), parser=XPath3Parser)
+                    # Call the determined function
                    res = filter_function(xpath_filter=line.strip(), html_content=form.last_html_for_form_validation)
                    # It's OK if this is an empty result, we just want to check that it doesn't crash the parser
                except elementpath.ElementPathError as e:
                    message = field.gettext('\'%s\' is not a valid XPath expression. (%s)')
                    raise ValidationError(message % (line, str(e)))
                except:
                    raise ValidationError("A system-error occurred when validating your XPath expression")
-            if line.strip().startswith('xpath1:'):
+            elif 'json:' in line:
                if not self.allow_xpath:
                    raise ValidationError("XPath not permitted in this field!")
                from lxml import etree, html
                tree = html.fromstring("<html></html>")
                line = re.sub(r'^xpath1:', '', line)
                try:
                    tree.xpath(line.strip())
                except etree.XPathEvalError as e:
                    message = field.gettext('\'%s\' is not a valid XPath expression. (%s)')
                    raise ValidationError(message % (line, str(e)))
                except:
                    raise ValidationError("A system-error occurred when validating your XPath expression")
            if 'json:' in line:
                if not self.allow_json:
                    raise ValidationError("JSONPath not permitted in this field!")
@ -392,7 +382,7 @@ class ValidateCSSJSONXPATHInput(object):
                if not self.allow_json:
                    raise ValidationError("jq not permitted in this field!")
-            if 'jq:' in line:
+            elif line.startswith('jq:'):
                try:
                    import jq
                except ModuleNotFoundError:
--- a/changedetectionio/tests/test_rss.py
+++ b/changedetectionio/tests/test_rss.py
@ -164,3 +164,46 @@ def test_rss_xpath_filtering(client, live_server, measure_memory_usage):
    assert b'Some other description' not in res.data  # Should NOT be selected by the xpath
    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
 def test_namespace_selectors(live_server, client):
    set_original_cdata_xml()
    #live_server_setup(live_server)
    test_url = url_for('test_endpoint', content_type="application/xml", _external=True)
    res = client.post(
        url_for("import_page"),
        data={"urls": test_url},
        follow_redirects=True
    )
    assert b"1 Imported" in res.data
    wait_for_all_checks(client)
    uuid = extract_UUID_from_client(client)
    # because it will look for the namespaced stuff during form validation, but on the first check it wont exist..
    res = client.post(
        url_for("edit_page", uuid=uuid),
        data={
            "include_filters": "//media:thumbnail/@url",
            "fetch_backend": "html_requests",
            "headers": "",
            "proxy": "no-proxy",
            "tags": "",
            "url": test_url,
        },
        follow_redirects=True
    )
    wait_for_all_checks(client)
    res = client.get(
        url_for("preview_page", uuid="first"),
        follow_redirects=True
    )
    assert b'CDATA' not in res.data
    assert b'<![' not in res.data
    assert b'https://testsite.com/thumbnail-c224e10d81488e818701c981da04869e.jpg' in res.data
    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)