diff --git a/changedetectionio/flask_app.py b/changedetectionio/flask_app.py index fd12393a..d58521b3 100644 --- a/changedetectionio/flask_app.py +++ b/changedetectionio/flask_app.py @@ -729,6 +729,12 @@ def changedetection_app(config=None, datastore_o=None): for p in datastore.proxy_list: form.proxy.choices.append(tuple((p, datastore.proxy_list[p]['label']))) + # Add some HTML to be used for form validation + if datastore.data['watching'][uuid].history.keys(): + timestamp = list(datastore.data['watching'][uuid].history.keys())[-1] + form.last_html_for_form_validation = datastore.data['watching'][uuid].get_fetched_html(timestamp) + else: + form.last_html_for_form_validation = "" if request.method == 'POST' and form.validate(): diff --git a/changedetectionio/forms.py b/changedetectionio/forms.py index b0b19f99..7f292976 100644 --- a/changedetectionio/forms.py +++ b/changedetectionio/forms.py @@ -1,6 +1,9 @@ import os import re +import elementpath + +from changedetectionio.html_tools import xpath_filter, xpath1_filter from changedetectionio.strtobool import strtobool from wtforms import ( @@ -329,45 +332,32 @@ class ValidateCSSJSONXPATHInput(object): data = field.data for line in data: - # Nothing to see here - if not len(line.strip()): - return - - # Does it look like XPath? - if line.strip()[0] == '/' or line.strip().startswith('xpath:'): - if not self.allow_xpath: - raise ValidationError("XPath not permitted in this field!") - from lxml import etree, html - import elementpath - # xpath 2.0-3.1 - from elementpath.xpath3 import XPath3Parser - tree = html.fromstring("") - line = line.replace('xpath:', '') + line = line.strip() - try: - elementpath.select(tree, line.strip(), parser=XPath3Parser) - except elementpath.ElementPathError as e: - message = field.gettext('\'%s\' is not a valid XPath expression. (%s)') - raise ValidationError(message % (line, str(e))) - except: - raise ValidationError("A system-error occurred when validating your XPath expression") + if not line: + continue - if line.strip().startswith('xpath1:'): + if line.startswith('xpath') or line.startswith('/'): if not self.allow_xpath: raise ValidationError("XPath not permitted in this field!") - from lxml import etree, html - tree = html.fromstring("") - line = re.sub(r'^xpath1:', '', line) + + if line.startswith('xpath1:'): + filter_function = xpath1_filter + else: + line = line.replace('xpath:', '') + filter_function = xpath_filter try: - tree.xpath(line.strip()) - except etree.XPathEvalError as e: + # Call the determined function + res = filter_function(xpath_filter=line.strip(), html_content=form.last_html_for_form_validation) + # It's OK if this is an empty result, we just want to check that it doesn't crash the parser + except elementpath.ElementPathError as e: message = field.gettext('\'%s\' is not a valid XPath expression. (%s)') raise ValidationError(message % (line, str(e))) except: raise ValidationError("A system-error occurred when validating your XPath expression") - if 'json:' in line: + elif 'json:' in line: if not self.allow_json: raise ValidationError("JSONPath not permitted in this field!") @@ -392,7 +382,7 @@ class ValidateCSSJSONXPATHInput(object): if not self.allow_json: raise ValidationError("jq not permitted in this field!") - if 'jq:' in line: + elif line.startswith('jq:'): try: import jq except ModuleNotFoundError: diff --git a/changedetectionio/tests/test_rss.py b/changedetectionio/tests/test_rss.py index e249e0ad..e1117b55 100644 --- a/changedetectionio/tests/test_rss.py +++ b/changedetectionio/tests/test_rss.py @@ -164,3 +164,46 @@ def test_rss_xpath_filtering(client, live_server, measure_memory_usage): assert b'Some other description' not in res.data # Should NOT be selected by the xpath res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) + +def test_namespace_selectors(live_server, client): + set_original_cdata_xml() + #live_server_setup(live_server) + + test_url = url_for('test_endpoint', content_type="application/xml", _external=True) + + res = client.post( + url_for("import_page"), + data={"urls": test_url}, + follow_redirects=True + ) + + assert b"1 Imported" in res.data + + wait_for_all_checks(client) + + uuid = extract_UUID_from_client(client) + # because it will look for the namespaced stuff during form validation, but on the first check it wont exist.. + res = client.post( + url_for("edit_page", uuid=uuid), + data={ + "include_filters": "//media:thumbnail/@url", + "fetch_backend": "html_requests", + "headers": "", + "proxy": "no-proxy", + "tags": "", + "url": test_url, + }, + follow_redirects=True + ) + + wait_for_all_checks(client) + + res = client.get( + url_for("preview_page", uuid="first"), + follow_redirects=True + ) + assert b'CDATA' not in res.data + assert b'