Adding validation

4 months ago · 591dd5b570
parent 919812bf8b
commit 591dd5b570
3 changed files with 68 additions and 29 deletions
--- a/changedetectionio/flask_app.py
+++ b/changedetectionio/flask_app.py
@ -729,6 +729,12 @@ def changedetection_app(config=None, datastore_o=None):
            for p in datastore.proxy_list:
                form.proxy.choices.append(tuple((p, datastore.proxy_list[p]['label'])))

+        # Add some HTML to be used for form validation
+        if datastore.data['watching'][uuid].history.keys():
+            timestamp = list(datastore.data['watching'][uuid].history.keys())[-1]
+            form.last_html_for_form_validation = datastore.data['watching'][uuid].get_fetched_html(timestamp)
+        else:
+            form.last_html_for_form_validation = "<html><body></body></html>"

        if request.method == 'POST' and form.validate():

--- a/changedetectionio/forms.py
+++ b/changedetectionio/forms.py
@ -1,6 +1,9 @@
 import os
 import re

+import elementpath
+
+from changedetectionio.html_tools import xpath_filter, xpath1_filter
 from changedetectionio.strtobool import strtobool

 from wtforms import (
@ -329,45 +332,32 @@ class ValidateCSSJSONXPATHInput(object):
            data = field.data

        for line in data:
-        # Nothing to see here
-            if not len(line.strip()):
-                return
+            line = line.strip()

-            # Does it look like XPath?
-            if line.strip()[0] == '/' or line.strip().startswith('xpath:'):
+            if not line:
+                continue
+
+            if line.startswith('xpath') or line.startswith('/'):
                if not self.allow_xpath:
                    raise ValidationError("XPath not permitted in this field!")
-                from lxml import etree, html
-                import elementpath
-                # xpath 2.0-3.1
-                from elementpath.xpath3 import XPath3Parser
-                tree = html.fromstring("<html></html>")
+
+                if line.startswith('xpath1:'):
+                    filter_function = xpath1_filter
+                else:
                    line = line.replace('xpath:', '')
+                    filter_function = xpath_filter

                try:
-                    elementpath.select(tree, line.strip(), parser=XPath3Parser)
+                    # Call the determined function
+                    res = filter_function(xpath_filter=line.strip(), html_content=form.last_html_for_form_validation)
+                    # It's OK if this is an empty result, we just want to check that it doesn't crash the parser
                except elementpath.ElementPathError as e:
                    message = field.gettext('\'%s\' is not a valid XPath expression. (%s)')
                    raise ValidationError(message % (line, str(e)))
                except:
                    raise ValidationError("A system-error occurred when validating your XPath expression")

-            if line.strip().startswith('xpath1:'):
-                if not self.allow_xpath:
-                    raise ValidationError("XPath not permitted in this field!")
-                from lxml import etree, html
-                tree = html.fromstring("<html></html>")
-                line = re.sub(r'^xpath1:', '', line)
-
-                try:
-                    tree.xpath(line.strip())
-                except etree.XPathEvalError as e:
-                    message = field.gettext('\'%s\' is not a valid XPath expression. (%s)')
-                    raise ValidationError(message % (line, str(e)))
-                except:
-                    raise ValidationError("A system-error occurred when validating your XPath expression")
-
-            if 'json:' in line:
+            elif 'json:' in line:
                if not self.allow_json:
                    raise ValidationError("JSONPath not permitted in this field!")

@ -392,7 +382,7 @@ class ValidateCSSJSONXPATHInput(object):
                if not self.allow_json:
                    raise ValidationError("jq not permitted in this field!")

-            if 'jq:' in line:
+            elif line.startswith('jq:'):
                try:
                    import jq
                except ModuleNotFoundError:
--- a/changedetectionio/tests/test_rss.py
+++ b/changedetectionio/tests/test_rss.py
@ -164,3 +164,46 @@ def test_rss_xpath_filtering(client, live_server, measure_memory_usage):
    assert b'Some other description' not in res.data  # Should NOT be selected by the xpath

    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
+
+def test_namespace_selectors(live_server, client):
+    set_original_cdata_xml()
+    #live_server_setup(live_server)
+
+    test_url = url_for('test_endpoint', content_type="application/xml", _external=True)
+
+    res = client.post(
+        url_for("import_page"),
+        data={"urls": test_url},
+        follow_redirects=True
+    )
+
+    assert b"1 Imported" in res.data
+
+    wait_for_all_checks(client)
+
+    uuid = extract_UUID_from_client(client)
+    # because it will look for the namespaced stuff during form validation, but on the first check it wont exist..
+    res = client.post(
+        url_for("edit_page", uuid=uuid),
+        data={
+            "include_filters": "//media:thumbnail/@url",
+            "fetch_backend": "html_requests",
+            "headers": "",
+            "proxy": "no-proxy",
+            "tags": "",
+            "url": test_url,
+        },
+        follow_redirects=True
+    )
+
+    wait_for_all_checks(client)
+
+    res = client.get(
+        url_for("preview_page", uuid="first"),
+        follow_redirects=True
+    )
+    assert b'CDATA' not in res.data
+    assert b'<![' not in res.data
+    assert b'https://testsite.com/thumbnail-c224e10d81488e818701c981da04869e.jpg' in res.data
+
+    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)