From b7a2501d64611c1530259599b7611d6b5531e8a5 Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Thu, 15 Dec 2022 09:13:09 +0100 Subject: [PATCH] Fetching - Always sort the key order of JSON content for less false alerts (May cause an alert on upgrade, but will be better going forwards) #1219 --- changedetectionio/fetch_site_status.py | 11 +++++ .../tests/test_jsonpath_jq_selector.py | 42 +++++++++++++++++++ 2 files changed, 53 insertions(+) diff --git a/changedetectionio/fetch_site_status.py b/changedetectionio/fetch_site_status.py index 7dfd38b6..fec54337 100644 --- a/changedetectionio/fetch_site_status.py +++ b/changedetectionio/fetch_site_status.py @@ -1,4 +1,5 @@ import hashlib +import json import logging import os import re @@ -167,6 +168,14 @@ class perform_site_check(): include_filters_rule.append("json:$") has_filter_rule = True + if is_json: + # Sort the JSON so we dont get false alerts when the content is just re-ordered + try: + fetcher.content = json.dumps(json.loads(fetcher.content), sort_keys=True) + except Exception as e: + # Might have just been a snippet, or otherwise bad JSON, continue + pass + if has_filter_rule: json_filter_prefixes = ['json:', 'jq:'] for filter in include_filters_rule: @@ -174,6 +183,8 @@ class perform_site_check(): stripped_text_from_html += html_tools.extract_json_as_string(content=fetcher.content, json_filter=filter) is_html = False + + if is_html or is_source: # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text diff --git a/changedetectionio/tests/test_jsonpath_jq_selector.py b/changedetectionio/tests/test_jsonpath_jq_selector.py index 7dc75208..4ace6015 100644 --- a/changedetectionio/tests/test_jsonpath_jq_selector.py +++ b/changedetectionio/tests/test_jsonpath_jq_selector.py @@ -394,6 +394,48 @@ def check_json_ext_filter(json_filter, client, live_server): res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) assert b'Deleted' in res.data +def test_ignore_json_order(client, live_server): + # A change in order shouldn't trigger a notification + + with open("test-datastore/endpoint-content.txt", "w") as f: + f.write('{"hello" : 123, "world": 123}') + + + # Add our URL to the import page + test_url = url_for('test_endpoint', content_type="application/json", _external=True) + res = client.post( + url_for("import_page"), + data={"urls": test_url}, + follow_redirects=True + ) + assert b"1 Imported" in res.data + + time.sleep(2) + + with open("test-datastore/endpoint-content.txt", "w") as f: + f.write('{"world" : 123, "hello": 123}') + + # Trigger a check + client.get(url_for("form_watch_checknow"), follow_redirects=True) + time.sleep(2) + + res = client.get(url_for("index")) + assert b'unviewed' not in res.data + + # Just to be sure it still works + with open("test-datastore/endpoint-content.txt", "w") as f: + f.write('{"world" : 123, "hello": 124}') + + # Trigger a check + client.get(url_for("form_watch_checknow"), follow_redirects=True) + time.sleep(2) + + res = client.get(url_for("index")) + assert b'unviewed' in res.data + + res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) + assert b'Deleted' in res.data + def test_check_jsonpath_ext_filter(client, live_server): check_json_ext_filter('json:$[?(@.status==Sold)]', client, live_server)