diff --git a/backend/fetch_site_status.py b/backend/fetch_site_status.py index 12216e19..dcefab15 100644 --- a/backend/fetch_site_status.py +++ b/backend/fetch_site_status.py @@ -88,12 +88,27 @@ class perform_site_check(): html = r.text - # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text + is_html = True css_filter_rule = self.datastore.data['watching'][uuid]['css_filter'] if css_filter_rule and len(css_filter_rule.strip()): - html = html_tools.css_filter(css_filter=css_filter_rule, html_content=r.content) + if 'json:' in css_filter_rule: + # POC hack, @todo rename vars, see how it fits in with the javascript version + import json + from jsonpath_ng import jsonpath, parse - stripped_text_from_html = get_text(html) + json_data = json.loads(html) + jsonpath_expression = parse(css_filter_rule.replace('json:','')) + match = jsonpath_expression.find(json_data) + stripped_text_from_html = json.dumps(match[0].value, indent=4) + + is_html = False + + else: + # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text + html = html_tools.css_filter(css_filter=css_filter_rule, html_content=r.content) + + if is_html: + stripped_text_from_html = get_text(html) # Usually from networkIO/requests level except (requests.exceptions.ConnectionError, requests.exceptions.ReadTimeout) as e: diff --git a/backend/forms.py b/backend/forms.py index ea8fd995..72bda422 100644 --- a/backend/forms.py +++ b/backend/forms.py @@ -82,7 +82,7 @@ class StringDictKeyValue(StringField): else: self.data = {} -class ListRegex(object): +class ValidateListRegex(object): """ Validates that anything that looks like a regex passes as a regex """ @@ -102,6 +102,28 @@ class ListRegex(object): message = field.gettext('RegEx \'%s\' is not a valid regular expression.') raise ValidationError(message % (line)) +class ValidateCSSJSONInput(object): + """ + Filter validation + @todo CSS validator ;) + """ + + def __init__(self, message=None): + self.message = message + + def __call__(self, form, field): + if 'json:' in field.data: + from jsonpath_ng.exceptions import JsonPathParserError + from jsonpath_ng import jsonpath, parse + + input = field.data.replace('json:', '') + + try: + parse(input) + except JsonPathParserError as e: + message = field.gettext('\'%s\' is not a valid JSONPath expression. (%s)') + raise ValidationError(message % (input, str(e))) + class watchForm(Form): # https://wtforms.readthedocs.io/en/2.3.x/fields/#module-wtforms.fields.html5 @@ -111,10 +133,10 @@ class watchForm(Form): tag = StringField('Tag', [validators.Optional(), validators.Length(max=35)]) minutes_between_check = html5.IntegerField('Maximum time in minutes until recheck', [validators.Optional(), validators.NumberRange(min=1)]) - css_filter = StringField('CSS Filter') + css_filter = StringField('CSS/JSON Filter', [ValidateCSSJSONInput()]) title = StringField('Title') - ignore_text = StringListField('Ignore Text', [ListRegex()]) + ignore_text = StringListField('Ignore Text', [ValidateListRegex()]) notification_urls = StringListField('Notification URL List') headers = StringDictKeyValue('Request Headers') trigger_check = BooleanField('Send test notification on save') diff --git a/backend/templates/edit.html b/backend/templates/edit.html index 3e828e49..e42dc022 100644 --- a/backend/templates/edit.html +++ b/backend/templates/edit.html @@ -23,9 +23,12 @@
{{ render_field(form.css_filter, size=25, placeholder=".class-name or #some-id, or other CSS selector rule.") }} - Limit text to this CSS rule, only text matching this CSS rule is included.
- Please be sure that you thoroughly understand how to write CSS selector rules before filing an issue on GitHub!
- Go here for more CSS selector help + +
    +
  • CSS - Limit text to this CSS rule, only text matching this CSS rule is included.
  • +
  • JSON - Limit text to this JSON rule, using JSONPath, prefix with "json:", test your JSONPath here
  • +
+ Please be sure that you thoroughly understand how to write CSS or JSONPath selector rules before filing an issue on GitHub! here for more CSS selector help.
diff --git a/backend/tests/test_jsonpath_selector.py b/backend/tests/test_jsonpath_selector.py new file mode 100644 index 00000000..e955dc28 --- /dev/null +++ b/backend/tests/test_jsonpath_selector.py @@ -0,0 +1,121 @@ +#!/usr/bin/python3 + +import time +from flask import url_for +from . util import live_server_setup + +def test_setup(live_server): + live_server_setup(live_server) + +def set_original_response(): + test_return_data = """ + { + "employees": [ + { + "id": 1, + "name": "Pankaj", + "salary": "10000" + }, + { + "name": "David", + "salary": "5000", + "id": 2 + } + ], + "boss": { + "name": "Fat guy" + } + } + """ + with open("test-datastore/output.txt", "w") as f: + f.write(test_return_data) + return None + +def set_modified_response(): + test_return_data = """ + { + "employees": [ + { + "id": 1, + "name": "Pankaj", + "salary": "10000" + }, + { + "name": "David", + "salary": "5000", + "id": 2 + } + ], + "boss": { + "name": "Foobar" + } + } + """ + + with open("test-datastore/output.txt", "w") as f: + f.write(test_return_data) + + return None + + + +def test_check_json_filter(client, live_server): + + json_filter = 'json:boss.name' + + set_original_response() + + # Give the endpoint time to spin up + time.sleep(1) + + # Add our URL to the import page + test_url = url_for('test_endpoint', _external=True) + res = client.post( + url_for("import_page"), + data={"urls": test_url}, + follow_redirects=True + ) + assert b"1 Imported" in res.data + + # Trigger a check + client.get(url_for("api_watch_checknow"), follow_redirects=True) + + # Give the thread time to pick it up + time.sleep(3) + + # Goto the edit page, add our ignore text + # Add our URL to the import page + res = client.post( + url_for("edit_page", uuid="first"), + data={"css_filter": json_filter, "url": test_url, "tag": "", "headers": ""}, + follow_redirects=True + ) + assert b"Updated watch." in res.data + + # Check it saved + res = client.get( + url_for("edit_page", uuid="first"), + ) + assert bytes(json_filter.encode('utf-8')) in res.data + + # Trigger a check + client.get(url_for("api_watch_checknow"), follow_redirects=True) + + # Give the thread time to pick it up + time.sleep(3) + # Make a change + set_modified_response() + + # Trigger a check + client.get(url_for("api_watch_checknow"), follow_redirects=True) + # Give the thread time to pick it up + time.sleep(3) + + # It should have 'unviewed' still + res = client.get(url_for("index")) + assert b'unviewed' in res.data + + # Should not see this, because its not in the JSONPath we entered + res = client.get(url_for("diff_history_page", uuid="first")) + # But the change should be there, tho its hard to test the change was detected because it will show old and new versions + assert b'Foobar' in res.data diff --git a/requirements.txt b/requirements.txt index 5f8e4049..1d4c4b11 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,7 +12,7 @@ flask-login ~= 0.5 pytz urllib3 wtforms ~= 2.3.3 - +jsonpath-ng ~= 1.5.3 # Notification library