From a8e92e2226df77cb8959b0266fec9d978248a57e Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Wed, 27 Oct 2021 09:24:08 +0200 Subject: [PATCH] Re #265 - extended jsonpath support (#266) * Re #265 - Use extended JSONpath support, Allow a JSONPath selector to not match anything (yet) Adding test Correctly capture invalid JSONPath query error --- changedetectionio/forms.py | 4 +- changedetectionio/html_tools.py | 9 +- .../tests/test_jsonpath_selector.py | 112 ++++++++++++++++++ 3 files changed, 120 insertions(+), 5 deletions(-) diff --git a/changedetectionio/forms.py b/changedetectionio/forms.py index 7f0be2e8..99c2c53b 100644 --- a/changedetectionio/forms.py +++ b/changedetectionio/forms.py @@ -179,7 +179,7 @@ class ValidateCSSJSONInput(object): def __call__(self, form, field): if 'json:' in field.data: from jsonpath_ng.exceptions import JsonPathParserError, JsonPathLexerError - from jsonpath_ng import jsonpath, parse + from jsonpath_ng.ext import parse input = field.data.replace('json:', '') @@ -189,6 +189,8 @@ class ValidateCSSJSONInput(object): message = field.gettext('\'%s\' is not a valid JSONPath expression. (%s)') raise ValidationError(message % (input, str(e))) + # Re #265 - maybe in the future fetch the page and offer a + # warning/notice that its possible the rule doesnt yet match anything? class quickWatchForm(Form): # https://wtforms.readthedocs.io/en/2.3.x/fields/#module-wtforms.fields.html5 diff --git a/changedetectionio/html_tools.py b/changedetectionio/html_tools.py index c905f677..5d34c4cd 100644 --- a/changedetectionio/html_tools.py +++ b/changedetectionio/html_tools.py @@ -1,6 +1,6 @@ import json from bs4 import BeautifulSoup -from jsonpath_ng import parse +from jsonpath_ng.ext import parse class JSONNotFound(ValueError): @@ -47,8 +47,8 @@ def _parse_json(json_data, jsonpath_filter): # Re #257 - Better handling where it does not exist, in the case the original 's' value was False.. if not match: - # Maybe better to just allow it through, maybe they are waiting on a value to become available? - raise JSONNotFound("No Matching JSON could be found for the rule {}".format(jsonpath_filter.replace('json:', ''))) + # Re 265 - Just return an empty string when filter not found + return '' stripped_text_from_html = json.dumps(s, indent=4) @@ -87,6 +87,7 @@ def extract_json_as_string(content, jsonpath_filter): break if not stripped_text_from_html: - raise JSONNotFound("No JSON matching the rule '%s' found" % jsonpath_filter.replace('json:','')) + # Re 265 - Just return an empty string when filter not found + return '' return stripped_text_from_html diff --git a/changedetectionio/tests/test_jsonpath_selector.py b/changedetectionio/tests/test_jsonpath_selector.py index 30dd59e8..39529642 100644 --- a/changedetectionio/tests/test_jsonpath_selector.py +++ b/changedetectionio/tests/test_jsonpath_selector.py @@ -46,6 +46,45 @@ and it can also be repeated with pytest.raises(html_tools.JSONNotFound) as e_info: html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "$.id") +def set_original_ext_response(): + data = """ + [ + { + "isPriceLowered": false, + "status": "ForSale", + "statusOrig": "for sale" + }, + { + "_id": "5e7b3e1fb3262d306323ff1e", + "listingsType": "consumer", + "status": "ForSale", + "statusOrig": "for sale" + } + ] + """ + + with open("test-datastore/endpoint-content.txt", "w") as f: + f.write(data) + +def set_modified_ext_response(): + data = """ + [ + { + "isPriceLowered": false, + "status": "Sold", + "statusOrig": "sold" + }, + { + "_id": "5e7b3e1fb3262d306323ff1e", + "listingsType": "consumer", + "isPriceLowered": false, + "status": "Sold" + } +] + """ + + with open("test-datastore/endpoint-content.txt", "w") as f: + f.write(data) def set_original_response(): test_return_data = """ @@ -213,3 +252,76 @@ def test_check_json_filter_bool_val(client, live_server): res = client.get(url_for("diff_history_page", uuid="first")) # But the change should be there, tho its hard to test the change was detected because it will show old and new versions assert b'false' in res.data + +# Re #265 - Extended JSON selector test +# Stuff to consider here +# - Selector should be allowed to return empty when it doesnt match (people might wait for some condition) +# - The 'diff' tab could show the old and new content +# - Form should let us enter a selector that doesnt (yet) match anything +def test_check_json_ext_filter(client, live_server): + json_filter = 'json:$[?(@.status==Sold)]' + + set_original_ext_response() + + # Give the endpoint time to spin up + time.sleep(1) + + # Add our URL to the import page + test_url = url_for('test_endpoint', _external=True) + res = client.post( + url_for("import_page"), + data={"urls": test_url}, + follow_redirects=True + ) + assert b"1 Imported" in res.data + + # Trigger a check + client.get(url_for("api_watch_checknow"), follow_redirects=True) + + # Give the thread time to pick it up + time.sleep(3) + + # Goto the edit page, add our ignore text + # Add our URL to the import page + res = client.post( + url_for("edit_page", uuid="first"), + data={"css_filter": json_filter, + "url": test_url, + "tag": "", + "headers": "", + "fetch_backend": "html_requests" + }, + follow_redirects=True + ) + assert b"Updated watch." in res.data + + # Check it saved + res = client.get( + url_for("edit_page", uuid="first"), + ) + assert bytes(json_filter.encode('utf-8')) in res.data + + # Trigger a check + client.get(url_for("api_watch_checknow"), follow_redirects=True) + + # Give the thread time to pick it up + time.sleep(3) + # Make a change + set_modified_ext_response() + + # Trigger a check + client.get(url_for("api_watch_checknow"), follow_redirects=True) + # Give the thread time to pick it up + time.sleep(4) + + # It should have 'unviewed' + res = client.get(url_for("index")) + assert b'unviewed' in res.data + + res = client.get(url_for("diff_history_page", uuid="first")) + + # We should never see 'ForSale' because we are selecting on 'Sold' in the rule, + # But we should know it triggered ('unviewed' assert above) + assert b'ForSale' not in res.data + assert b'Sold' in res.data +