Re #117 Jsonpath based JSON change detection filter (#125)

* Re #117 - Experimental JSON selector support by using 'json:' prefix and any JSONpath rule
pull/128/head
dgtlmoon 4 years ago committed by GitHub
parent f2643c1b65
commit e073521f4d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -88,12 +88,27 @@ class perform_site_check():
html = r.text
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
is_html = True
css_filter_rule = self.datastore.data['watching'][uuid]['css_filter']
if css_filter_rule and len(css_filter_rule.strip()):
html = html_tools.css_filter(css_filter=css_filter_rule, html_content=r.content)
if 'json:' in css_filter_rule:
# POC hack, @todo rename vars, see how it fits in with the javascript version
import json
from jsonpath_ng import jsonpath, parse
stripped_text_from_html = get_text(html)
json_data = json.loads(html)
jsonpath_expression = parse(css_filter_rule.replace('json:',''))
match = jsonpath_expression.find(json_data)
stripped_text_from_html = json.dumps(match[0].value, indent=4)
is_html = False
else:
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
html = html_tools.css_filter(css_filter=css_filter_rule, html_content=r.content)
if is_html:
stripped_text_from_html = get_text(html)
# Usually from networkIO/requests level
except (requests.exceptions.ConnectionError, requests.exceptions.ReadTimeout) as e:

@ -82,7 +82,7 @@ class StringDictKeyValue(StringField):
else:
self.data = {}
class ListRegex(object):
class ValidateListRegex(object):
"""
Validates that anything that looks like a regex passes as a regex
"""
@ -102,6 +102,28 @@ class ListRegex(object):
message = field.gettext('RegEx \'%s\' is not a valid regular expression.')
raise ValidationError(message % (line))
class ValidateCSSJSONInput(object):
"""
Filter validation
@todo CSS validator ;)
"""
def __init__(self, message=None):
self.message = message
def __call__(self, form, field):
if 'json:' in field.data:
from jsonpath_ng.exceptions import JsonPathParserError
from jsonpath_ng import jsonpath, parse
input = field.data.replace('json:', '')
try:
parse(input)
except JsonPathParserError as e:
message = field.gettext('\'%s\' is not a valid JSONPath expression. (%s)')
raise ValidationError(message % (input, str(e)))
class watchForm(Form):
# https://wtforms.readthedocs.io/en/2.3.x/fields/#module-wtforms.fields.html5
@ -111,10 +133,10 @@ class watchForm(Form):
tag = StringField('Tag', [validators.Optional(), validators.Length(max=35)])
minutes_between_check = html5.IntegerField('Maximum time in minutes until recheck',
[validators.Optional(), validators.NumberRange(min=1)])
css_filter = StringField('CSS Filter')
css_filter = StringField('CSS/JSON Filter', [ValidateCSSJSONInput()])
title = StringField('Title')
ignore_text = StringListField('Ignore Text', [ListRegex()])
ignore_text = StringListField('Ignore Text', [ValidateListRegex()])
notification_urls = StringListField('Notification URL List')
headers = StringDictKeyValue('Request Headers')
trigger_check = BooleanField('Send test notification on save')

@ -23,9 +23,12 @@
</div>
<div class="pure-control-group">
{{ render_field(form.css_filter, size=25, placeholder=".class-name or #some-id, or other CSS selector rule.") }}
<span class="pure-form-message-inline">Limit text to this CSS rule, only text matching this CSS rule is included.<br/>
Please be sure that you thoroughly understand how to write CSS selector rules before filing an issue on GitHub!<br/>
Go <a href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>
<span class="pure-form-message-inline">
<ul>
<li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
<li>JSON - Limit text to this JSON rule, using <a href="https://pypi.org/project/jsonpath-ng/">JSONPath</a>, prefix with <b>"json:"</b>, <a href="https://jsonpath.com/" target="new">test your JSONPath here</a></li>
</ul>
Please be sure that you thoroughly understand how to write CSS or JSONPath selector rules before filing an issue on GitHub! <a href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br/>
</span>
</div>
<!-- @todo: move to tabs --->

@ -0,0 +1,121 @@
#!/usr/bin/python3
import time
from flask import url_for
from . util import live_server_setup
def test_setup(live_server):
live_server_setup(live_server)
def set_original_response():
test_return_data = """
{
"employees": [
{
"id": 1,
"name": "Pankaj",
"salary": "10000"
},
{
"name": "David",
"salary": "5000",
"id": 2
}
],
"boss": {
"name": "Fat guy"
}
}
"""
with open("test-datastore/output.txt", "w") as f:
f.write(test_return_data)
return None
def set_modified_response():
test_return_data = """
{
"employees": [
{
"id": 1,
"name": "Pankaj",
"salary": "10000"
},
{
"name": "David",
"salary": "5000",
"id": 2
}
],
"boss": {
"name": "Foobar"
}
}
"""
with open("test-datastore/output.txt", "w") as f:
f.write(test_return_data)
return None
def test_check_json_filter(client, live_server):
json_filter = 'json:boss.name'
set_original_response()
# Give the endpoint time to spin up
time.sleep(1)
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
# Trigger a check
client.get(url_for("api_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(3)
# Goto the edit page, add our ignore text
# Add our URL to the import page
res = client.post(
url_for("edit_page", uuid="first"),
data={"css_filter": json_filter, "url": test_url, "tag": "", "headers": ""},
follow_redirects=True
)
assert b"Updated watch." in res.data
# Check it saved
res = client.get(
url_for("edit_page", uuid="first"),
)
assert bytes(json_filter.encode('utf-8')) in res.data
# Trigger a check
client.get(url_for("api_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(3)
# Make a change
set_modified_response()
# Trigger a check
client.get(url_for("api_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(3)
# It should have 'unviewed' still
res = client.get(url_for("index"))
assert b'unviewed' in res.data
# Should not see this, because its not in the JSONPath we entered
res = client.get(url_for("diff_history_page", uuid="first"))
# But the change should be there, tho its hard to test the change was detected because it will show old and new versions
assert b'Foobar' in res.data

@ -12,7 +12,7 @@ flask-login ~= 0.5
pytz
urllib3
wtforms ~= 2.3.3
jsonpath-ng ~= 1.5.3
# Notification library

Loading…
Cancel
Save