diff --git a/backend/fetch_site_status.py b/backend/fetch_site_status.py
index 12216e19..dcefab15 100644
--- a/backend/fetch_site_status.py
+++ b/backend/fetch_site_status.py
@@ -88,12 +88,27 @@ class perform_site_check():
html = r.text
- # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
+ is_html = True
css_filter_rule = self.datastore.data['watching'][uuid]['css_filter']
if css_filter_rule and len(css_filter_rule.strip()):
- html = html_tools.css_filter(css_filter=css_filter_rule, html_content=r.content)
+ if 'json:' in css_filter_rule:
+ # POC hack, @todo rename vars, see how it fits in with the javascript version
+ import json
+ from jsonpath_ng import jsonpath, parse
- stripped_text_from_html = get_text(html)
+ json_data = json.loads(html)
+ jsonpath_expression = parse(css_filter_rule.replace('json:',''))
+ match = jsonpath_expression.find(json_data)
+ stripped_text_from_html = json.dumps(match[0].value, indent=4)
+
+ is_html = False
+
+ else:
+ # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
+ html = html_tools.css_filter(css_filter=css_filter_rule, html_content=r.content)
+
+ if is_html:
+ stripped_text_from_html = get_text(html)
# Usually from networkIO/requests level
except (requests.exceptions.ConnectionError, requests.exceptions.ReadTimeout) as e:
diff --git a/backend/forms.py b/backend/forms.py
index ea8fd995..72bda422 100644
--- a/backend/forms.py
+++ b/backend/forms.py
@@ -82,7 +82,7 @@ class StringDictKeyValue(StringField):
else:
self.data = {}
-class ListRegex(object):
+class ValidateListRegex(object):
"""
Validates that anything that looks like a regex passes as a regex
"""
@@ -102,6 +102,28 @@ class ListRegex(object):
message = field.gettext('RegEx \'%s\' is not a valid regular expression.')
raise ValidationError(message % (line))
+class ValidateCSSJSONInput(object):
+ """
+ Filter validation
+ @todo CSS validator ;)
+ """
+
+ def __init__(self, message=None):
+ self.message = message
+
+ def __call__(self, form, field):
+ if 'json:' in field.data:
+ from jsonpath_ng.exceptions import JsonPathParserError
+ from jsonpath_ng import jsonpath, parse
+
+ input = field.data.replace('json:', '')
+
+ try:
+ parse(input)
+ except JsonPathParserError as e:
+ message = field.gettext('\'%s\' is not a valid JSONPath expression. (%s)')
+ raise ValidationError(message % (input, str(e)))
+
class watchForm(Form):
# https://wtforms.readthedocs.io/en/2.3.x/fields/#module-wtforms.fields.html5
@@ -111,10 +133,10 @@ class watchForm(Form):
tag = StringField('Tag', [validators.Optional(), validators.Length(max=35)])
minutes_between_check = html5.IntegerField('Maximum time in minutes until recheck',
[validators.Optional(), validators.NumberRange(min=1)])
- css_filter = StringField('CSS Filter')
+ css_filter = StringField('CSS/JSON Filter', [ValidateCSSJSONInput()])
title = StringField('Title')
- ignore_text = StringListField('Ignore Text', [ListRegex()])
+ ignore_text = StringListField('Ignore Text', [ValidateListRegex()])
notification_urls = StringListField('Notification URL List')
headers = StringDictKeyValue('Request Headers')
trigger_check = BooleanField('Send test notification on save')
diff --git a/backend/templates/edit.html b/backend/templates/edit.html
index 3e828e49..e42dc022 100644
--- a/backend/templates/edit.html
+++ b/backend/templates/edit.html
@@ -23,9 +23,12 @@
{{ render_field(form.css_filter, size=25, placeholder=".class-name or #some-id, or other CSS selector rule.") }}
-
Limit text to this CSS rule, only text matching this CSS rule is included.
- Please be sure that you thoroughly understand how to write CSS selector rules before filing an issue on GitHub!
- Go here for more CSS selector help
+
+
+ - CSS - Limit text to this CSS rule, only text matching this CSS rule is included.
+ - JSON - Limit text to this JSON rule, using JSONPath, prefix with "json:", test your JSONPath here
+
+ Please be sure that you thoroughly understand how to write CSS or JSONPath selector rules before filing an issue on GitHub! here for more CSS selector help.
diff --git a/backend/tests/test_jsonpath_selector.py b/backend/tests/test_jsonpath_selector.py
new file mode 100644
index 00000000..e955dc28
--- /dev/null
+++ b/backend/tests/test_jsonpath_selector.py
@@ -0,0 +1,121 @@
+#!/usr/bin/python3
+
+import time
+from flask import url_for
+from . util import live_server_setup
+
+def test_setup(live_server):
+ live_server_setup(live_server)
+
+def set_original_response():
+ test_return_data = """
+ {
+ "employees": [
+ {
+ "id": 1,
+ "name": "Pankaj",
+ "salary": "10000"
+ },
+ {
+ "name": "David",
+ "salary": "5000",
+ "id": 2
+ }
+ ],
+ "boss": {
+ "name": "Fat guy"
+ }
+ }
+ """
+ with open("test-datastore/output.txt", "w") as f:
+ f.write(test_return_data)
+ return None
+
+def set_modified_response():
+ test_return_data = """
+ {
+ "employees": [
+ {
+ "id": 1,
+ "name": "Pankaj",
+ "salary": "10000"
+ },
+ {
+ "name": "David",
+ "salary": "5000",
+ "id": 2
+ }
+ ],
+ "boss": {
+ "name": "Foobar"
+ }
+ }
+ """
+
+ with open("test-datastore/output.txt", "w") as f:
+ f.write(test_return_data)
+
+ return None
+
+
+
+def test_check_json_filter(client, live_server):
+
+ json_filter = 'json:boss.name'
+
+ set_original_response()
+
+ # Give the endpoint time to spin up
+ time.sleep(1)
+
+ # Add our URL to the import page
+ test_url = url_for('test_endpoint', _external=True)
+ res = client.post(
+ url_for("import_page"),
+ data={"urls": test_url},
+ follow_redirects=True
+ )
+ assert b"1 Imported" in res.data
+
+ # Trigger a check
+ client.get(url_for("api_watch_checknow"), follow_redirects=True)
+
+ # Give the thread time to pick it up
+ time.sleep(3)
+
+ # Goto the edit page, add our ignore text
+ # Add our URL to the import page
+ res = client.post(
+ url_for("edit_page", uuid="first"),
+ data={"css_filter": json_filter, "url": test_url, "tag": "", "headers": ""},
+ follow_redirects=True
+ )
+ assert b"Updated watch." in res.data
+
+ # Check it saved
+ res = client.get(
+ url_for("edit_page", uuid="first"),
+ )
+ assert bytes(json_filter.encode('utf-8')) in res.data
+
+ # Trigger a check
+ client.get(url_for("api_watch_checknow"), follow_redirects=True)
+
+ # Give the thread time to pick it up
+ time.sleep(3)
+ # Make a change
+ set_modified_response()
+
+ # Trigger a check
+ client.get(url_for("api_watch_checknow"), follow_redirects=True)
+ # Give the thread time to pick it up
+ time.sleep(3)
+
+ # It should have 'unviewed' still
+ res = client.get(url_for("index"))
+ assert b'unviewed' in res.data
+
+ # Should not see this, because its not in the JSONPath we entered
+ res = client.get(url_for("diff_history_page", uuid="first"))
+ # But the change should be there, tho its hard to test the change was detected because it will show old and new versions
+ assert b'Foobar' in res.data
diff --git a/requirements.txt b/requirements.txt
index 5f8e4049..1d4c4b11 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -12,7 +12,7 @@ flask-login ~= 0.5
pytz
urllib3
wtforms ~= 2.3.3
-
+jsonpath-ng ~= 1.5.3
# Notification library