Adding validation

enhanced-RSS-and-register-all-namespaces
dgtlmoon 4 months ago
parent 919812bf8b
commit 591dd5b570

@ -729,6 +729,12 @@ def changedetection_app(config=None, datastore_o=None):
for p in datastore.proxy_list:
form.proxy.choices.append(tuple((p, datastore.proxy_list[p]['label'])))
# Add some HTML to be used for form validation
if datastore.data['watching'][uuid].history.keys():
timestamp = list(datastore.data['watching'][uuid].history.keys())[-1]
form.last_html_for_form_validation = datastore.data['watching'][uuid].get_fetched_html(timestamp)
else:
form.last_html_for_form_validation = "<html><body></body></html>"
if request.method == 'POST' and form.validate():

@ -1,6 +1,9 @@
import os
import re
import elementpath
from changedetectionio.html_tools import xpath_filter, xpath1_filter
from changedetectionio.strtobool import strtobool
from wtforms import (
@ -329,45 +332,32 @@ class ValidateCSSJSONXPATHInput(object):
data = field.data
for line in data:
# Nothing to see here
if not len(line.strip()):
return
# Does it look like XPath?
if line.strip()[0] == '/' or line.strip().startswith('xpath:'):
if not self.allow_xpath:
raise ValidationError("XPath not permitted in this field!")
from lxml import etree, html
import elementpath
# xpath 2.0-3.1
from elementpath.xpath3 import XPath3Parser
tree = html.fromstring("<html></html>")
line = line.replace('xpath:', '')
line = line.strip()
try:
elementpath.select(tree, line.strip(), parser=XPath3Parser)
except elementpath.ElementPathError as e:
message = field.gettext('\'%s\' is not a valid XPath expression. (%s)')
raise ValidationError(message % (line, str(e)))
except:
raise ValidationError("A system-error occurred when validating your XPath expression")
if not line:
continue
if line.strip().startswith('xpath1:'):
if line.startswith('xpath') or line.startswith('/'):
if not self.allow_xpath:
raise ValidationError("XPath not permitted in this field!")
from lxml import etree, html
tree = html.fromstring("<html></html>")
line = re.sub(r'^xpath1:', '', line)
if line.startswith('xpath1:'):
filter_function = xpath1_filter
else:
line = line.replace('xpath:', '')
filter_function = xpath_filter
try:
tree.xpath(line.strip())
except etree.XPathEvalError as e:
# Call the determined function
res = filter_function(xpath_filter=line.strip(), html_content=form.last_html_for_form_validation)
# It's OK if this is an empty result, we just want to check that it doesn't crash the parser
except elementpath.ElementPathError as e:
message = field.gettext('\'%s\' is not a valid XPath expression. (%s)')
raise ValidationError(message % (line, str(e)))
except:
raise ValidationError("A system-error occurred when validating your XPath expression")
if 'json:' in line:
elif 'json:' in line:
if not self.allow_json:
raise ValidationError("JSONPath not permitted in this field!")
@ -392,7 +382,7 @@ class ValidateCSSJSONXPATHInput(object):
if not self.allow_json:
raise ValidationError("jq not permitted in this field!")
if 'jq:' in line:
elif line.startswith('jq:'):
try:
import jq
except ModuleNotFoundError:

@ -164,3 +164,46 @@ def test_rss_xpath_filtering(client, live_server, measure_memory_usage):
assert b'Some other description' not in res.data # Should NOT be selected by the xpath
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
def test_namespace_selectors(live_server, client):
set_original_cdata_xml()
#live_server_setup(live_server)
test_url = url_for('test_endpoint', content_type="application/xml", _external=True)
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
wait_for_all_checks(client)
uuid = extract_UUID_from_client(client)
# because it will look for the namespaced stuff during form validation, but on the first check it wont exist..
res = client.post(
url_for("edit_page", uuid=uuid),
data={
"include_filters": "//media:thumbnail/@url",
"fetch_backend": "html_requests",
"headers": "",
"proxy": "no-proxy",
"tags": "",
"url": test_url,
},
follow_redirects=True
)
wait_for_all_checks(client)
res = client.get(
url_for("preview_page", uuid="first"),
follow_redirects=True
)
assert b'CDATA' not in res.data
assert b'<![' not in res.data
assert b'https://testsite.com/thumbnail-c224e10d81488e818701c981da04869e.jpg' in res.data
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)

Loading…
Cancel
Save