From 0e291de045b1ef444cdea1b46677f326e8d79be0 Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Sun, 11 Jul 2021 19:36:44 +1000 Subject: [PATCH 1/5] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 03e76a5e..0252a9d4 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,7 @@ Know when ... - Festivals with changes - Realestate listing changes - COVID related news from government websites +- Detect and monitor changes in JSON API responses _Need an actual Chrome runner with Javascript support? see the experimental Javascript/Chrome support changedetection.io branch!_ From f2643c1b65c7d124bdfbe4b05085807ee83646e9 Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Sun, 11 Jul 2021 19:38:54 +1000 Subject: [PATCH 2/5] Update README.md --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 0252a9d4..f4e92180 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,8 @@ Know when ... - Festivals with changes - Realestate listing changes - COVID related news from government websites -- Detect and monitor changes in JSON API responses +- Detect and monitor changes in JSON API responses +- API monitoring and alerting _Need an actual Chrome runner with Javascript support? see the experimental Javascript/Chrome support changedetection.io branch!_ From e073521f4d1cd0f4b9ec447786b34eb059b05a01 Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Sun, 11 Jul 2021 22:07:39 +1000 Subject: [PATCH 3/5] Re #117 Jsonpath based JSON change detection filter (#125) * Re #117 - Experimental JSON selector support by using 'json:' prefix and any JSONpath rule --- backend/fetch_site_status.py | 21 +++- backend/forms.py | 28 +++++- backend/templates/edit.html | 9 +- backend/tests/test_jsonpath_selector.py | 121 ++++++++++++++++++++++++ requirements.txt | 2 +- 5 files changed, 171 insertions(+), 10 deletions(-) create mode 100644 backend/tests/test_jsonpath_selector.py diff --git a/backend/fetch_site_status.py b/backend/fetch_site_status.py index 12216e19..dcefab15 100644 --- a/backend/fetch_site_status.py +++ b/backend/fetch_site_status.py @@ -88,12 +88,27 @@ class perform_site_check(): html = r.text - # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text + is_html = True css_filter_rule = self.datastore.data['watching'][uuid]['css_filter'] if css_filter_rule and len(css_filter_rule.strip()): - html = html_tools.css_filter(css_filter=css_filter_rule, html_content=r.content) + if 'json:' in css_filter_rule: + # POC hack, @todo rename vars, see how it fits in with the javascript version + import json + from jsonpath_ng import jsonpath, parse - stripped_text_from_html = get_text(html) + json_data = json.loads(html) + jsonpath_expression = parse(css_filter_rule.replace('json:','')) + match = jsonpath_expression.find(json_data) + stripped_text_from_html = json.dumps(match[0].value, indent=4) + + is_html = False + + else: + # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text + html = html_tools.css_filter(css_filter=css_filter_rule, html_content=r.content) + + if is_html: + stripped_text_from_html = get_text(html) # Usually from networkIO/requests level except (requests.exceptions.ConnectionError, requests.exceptions.ReadTimeout) as e: diff --git a/backend/forms.py b/backend/forms.py index ea8fd995..72bda422 100644 --- a/backend/forms.py +++ b/backend/forms.py @@ -82,7 +82,7 @@ class StringDictKeyValue(StringField): else: self.data = {} -class ListRegex(object): +class ValidateListRegex(object): """ Validates that anything that looks like a regex passes as a regex """ @@ -102,6 +102,28 @@ class ListRegex(object): message = field.gettext('RegEx \'%s\' is not a valid regular expression.') raise ValidationError(message % (line)) +class ValidateCSSJSONInput(object): + """ + Filter validation + @todo CSS validator ;) + """ + + def __init__(self, message=None): + self.message = message + + def __call__(self, form, field): + if 'json:' in field.data: + from jsonpath_ng.exceptions import JsonPathParserError + from jsonpath_ng import jsonpath, parse + + input = field.data.replace('json:', '') + + try: + parse(input) + except JsonPathParserError as e: + message = field.gettext('\'%s\' is not a valid JSONPath expression. (%s)') + raise ValidationError(message % (input, str(e))) + class watchForm(Form): # https://wtforms.readthedocs.io/en/2.3.x/fields/#module-wtforms.fields.html5 @@ -111,10 +133,10 @@ class watchForm(Form): tag = StringField('Tag', [validators.Optional(), validators.Length(max=35)]) minutes_between_check = html5.IntegerField('Maximum time in minutes until recheck', [validators.Optional(), validators.NumberRange(min=1)]) - css_filter = StringField('CSS Filter') + css_filter = StringField('CSS/JSON Filter', [ValidateCSSJSONInput()]) title = StringField('Title') - ignore_text = StringListField('Ignore Text', [ListRegex()]) + ignore_text = StringListField('Ignore Text', [ValidateListRegex()]) notification_urls = StringListField('Notification URL List') headers = StringDictKeyValue('Request Headers') trigger_check = BooleanField('Send test notification on save') diff --git a/backend/templates/edit.html b/backend/templates/edit.html index 3e828e49..e42dc022 100644 --- a/backend/templates/edit.html +++ b/backend/templates/edit.html @@ -23,9 +23,12 @@
{{ render_field(form.css_filter, size=25, placeholder=".class-name or #some-id, or other CSS selector rule.") }} - Limit text to this CSS rule, only text matching this CSS rule is included.
- Please be sure that you thoroughly understand how to write CSS selector rules before filing an issue on GitHub!
- Go here for more CSS selector help + +
    +
  • CSS - Limit text to this CSS rule, only text matching this CSS rule is included.
  • +
  • JSON - Limit text to this JSON rule, using JSONPath, prefix with "json:", test your JSONPath here
  • +
+ Please be sure that you thoroughly understand how to write CSS or JSONPath selector rules before filing an issue on GitHub! here for more CSS selector help.
diff --git a/backend/tests/test_jsonpath_selector.py b/backend/tests/test_jsonpath_selector.py new file mode 100644 index 00000000..e955dc28 --- /dev/null +++ b/backend/tests/test_jsonpath_selector.py @@ -0,0 +1,121 @@ +#!/usr/bin/python3 + +import time +from flask import url_for +from . util import live_server_setup + +def test_setup(live_server): + live_server_setup(live_server) + +def set_original_response(): + test_return_data = """ + { + "employees": [ + { + "id": 1, + "name": "Pankaj", + "salary": "10000" + }, + { + "name": "David", + "salary": "5000", + "id": 2 + } + ], + "boss": { + "name": "Fat guy" + } + } + """ + with open("test-datastore/output.txt", "w") as f: + f.write(test_return_data) + return None + +def set_modified_response(): + test_return_data = """ + { + "employees": [ + { + "id": 1, + "name": "Pankaj", + "salary": "10000" + }, + { + "name": "David", + "salary": "5000", + "id": 2 + } + ], + "boss": { + "name": "Foobar" + } + } + """ + + with open("test-datastore/output.txt", "w") as f: + f.write(test_return_data) + + return None + + + +def test_check_json_filter(client, live_server): + + json_filter = 'json:boss.name' + + set_original_response() + + # Give the endpoint time to spin up + time.sleep(1) + + # Add our URL to the import page + test_url = url_for('test_endpoint', _external=True) + res = client.post( + url_for("import_page"), + data={"urls": test_url}, + follow_redirects=True + ) + assert b"1 Imported" in res.data + + # Trigger a check + client.get(url_for("api_watch_checknow"), follow_redirects=True) + + # Give the thread time to pick it up + time.sleep(3) + + # Goto the edit page, add our ignore text + # Add our URL to the import page + res = client.post( + url_for("edit_page", uuid="first"), + data={"css_filter": json_filter, "url": test_url, "tag": "", "headers": ""}, + follow_redirects=True + ) + assert b"Updated watch." in res.data + + # Check it saved + res = client.get( + url_for("edit_page", uuid="first"), + ) + assert bytes(json_filter.encode('utf-8')) in res.data + + # Trigger a check + client.get(url_for("api_watch_checknow"), follow_redirects=True) + + # Give the thread time to pick it up + time.sleep(3) + # Make a change + set_modified_response() + + # Trigger a check + client.get(url_for("api_watch_checknow"), follow_redirects=True) + # Give the thread time to pick it up + time.sleep(3) + + # It should have 'unviewed' still + res = client.get(url_for("index")) + assert b'unviewed' in res.data + + # Should not see this, because its not in the JSONPath we entered + res = client.get(url_for("diff_history_page", uuid="first")) + # But the change should be there, tho its hard to test the change was detected because it will show old and new versions + assert b'Foobar' in res.data diff --git a/requirements.txt b/requirements.txt index 5f8e4049..1d4c4b11 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,7 +12,7 @@ flask-login ~= 0.5 pytz urllib3 wtforms ~= 2.3.3 - +jsonpath-ng ~= 1.5.3 # Notification library From 306814f47f978e188f7465ae16ee2356bbddb6cd Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Sun, 11 Jul 2021 22:10:49 +1000 Subject: [PATCH 4/5] Adding text about JSON API Monitoring --- README.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/README.md b/README.md index f4e92180..08013401 100644 --- a/README.md +++ b/README.md @@ -90,6 +90,18 @@ Just some examples Self-hosted web page change monitoring notifications +### JSON API Monitoring + +Detect changes and monitor data in JSON API's by using the built-in JSONPath selectors as a filter. + + +![image](https://user-images.githubusercontent.com/275001/125165842-0ce01980-e1dc-11eb-9e73-d8137dd162dc.png) + +This will re-parse the JSON and apply indent to the text, making it super easy to monitor and detect changes in JSON API results + +![image](https://user-images.githubusercontent.com/275001/125165995-d9ea5580-e1dc-11eb-8030-f0deced2661a.png) + + ### Proxy A proxy for ChangeDectection.io can be configured by setting environment the From dc2896c452f3b7c7f93eadec280778465de7327a Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Sun, 11 Jul 2021 22:11:53 +1000 Subject: [PATCH 5/5] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 08013401..395e3ac8 100644 --- a/README.md +++ b/README.md @@ -104,7 +104,7 @@ This will re-parse the JSON and apply indent to the text, making it super easy t ### Proxy -A proxy for ChangeDectection.io can be configured by setting environment the +A proxy for ChangeDetection.io can be configured by setting environment the `HTTP_PROXY`, `HTTPS_PROXY` variables, examples are also in the `docker-compose.yml` `NO_PROXY` exclude list can be specified by following `"localhost,192.168.0.0/24"`