From 4ae3519d66ae78f5f44c8a20de94588b71ba020a Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Wed, 4 Oct 2023 15:04:31 +0200 Subject: [PATCH] Handle case-insensitive matching of LDJSON offers data test improvements tweaks --- changedetectionio/html_tools.py | 11 +- .../test_automatic_follow_ldjson_price.py | 104 ++++++++++++++++-- 2 files changed, 104 insertions(+), 11 deletions(-) diff --git a/changedetectionio/html_tools.py b/changedetectionio/html_tools.py index dad56823..0bef32f4 100644 --- a/changedetectionio/html_tools.py +++ b/changedetectionio/html_tools.py @@ -186,6 +186,7 @@ def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None for json_data in bs_jsons: stripped_text_from_html = _parse_json(json_data, json_filter) + if ensure_is_ldjson_info_type: # Could sometimes be list, string or something else random if isinstance(json_data, dict): @@ -293,8 +294,14 @@ def html_to_text(html_content: str, render_anchor_tag_content=False) -> str: # Does LD+JSON exist with a @type=='product' and a .price set anywhere? def has_ldjson_product_info(content): try: - pricing_data = extract_json_as_string(content=content, json_filter=LD_JSON_PRODUCT_OFFER_SELECTOR, ensure_is_ldjson_info_type="product") - except JSONNotFound as e: + if not 'application/ld+json' in content: + return False + + # Always lowercase the content so the json_filter for finding $..offers matches + pricing_data = extract_json_as_string(content=content.lower(), + json_filter=LD_JSON_PRODUCT_OFFER_SELECTOR, + ensure_is_ldjson_info_type="product") + except Exception as e: # Totally fine return False x=bool(pricing_data) diff --git a/changedetectionio/tests/test_automatic_follow_ldjson_price.py b/changedetectionio/tests/test_automatic_follow_ldjson_price.py index c95e8fcf..b6ce0c4e 100644 --- a/changedetectionio/tests/test_automatic_follow_ldjson_price.py +++ b/changedetectionio/tests/test_automatic_follow_ldjson_price.py @@ -2,7 +2,8 @@ import time from flask import url_for -from .util import live_server_setup, extract_UUID_from_client, extract_api_key_from_UI +from .util import live_server_setup, extract_UUID_from_client, extract_api_key_from_UI, wait_for_all_checks + def set_response_with_ldjson(): test_return_data = """ @@ -27,7 +28,7 @@ def set_response_with_ldjson(): "description":"You dont need it", "mpn":"111111", "sku":"22222", - "offers":{ + "oFFerS":{ "@type":"AggregateOffer", "lowPrice":8097000, "highPrice":8099900, @@ -79,9 +80,6 @@ def set_response_without_ldjson(): def test_check_ldjson_price_autodetect(client, live_server): live_server_setup(live_server) - # Give the endpoint time to spin up - time.sleep(1) - set_response_with_ldjson() # Add our URL to the import page @@ -92,7 +90,7 @@ def test_check_ldjson_price_autodetect(client, live_server): follow_redirects=True ) assert b"1 Imported" in res.data - time.sleep(3) + wait_for_all_checks(client) # Should get a notice that it's available res = client.get(url_for("index")) @@ -102,11 +100,11 @@ def test_check_ldjson_price_autodetect(client, live_server): uuid = extract_UUID_from_client(client) client.get(url_for('price_data_follower.accept', uuid=uuid, follow_redirects=True)) - time.sleep(2) + wait_for_all_checks(client) # Trigger a check client.get(url_for("form_watch_checknow"), follow_redirects=True) - time.sleep(2) + wait_for_all_checks(client) # Offer should be gone res = client.get(url_for("index")) assert b'Embedded price data' not in res.data @@ -138,9 +136,97 @@ def test_check_ldjson_price_autodetect(client, live_server): follow_redirects=True ) assert b"1 Imported" in res.data - time.sleep(3) + wait_for_all_checks(client) res = client.get(url_for("index")) assert b'ldjson-price-track-offer' not in res.data ########################################################################################## client.get(url_for("form_delete", uuid="all"), follow_redirects=True) + + +def _test_runner_check_bad_format_ignored(live_server, client, has_ldjson_price_data): + + test_url = url_for('test_endpoint', _external=True) + res = client.post( + url_for("import_page"), + data={"urls": test_url}, + follow_redirects=True + ) + assert b"1 Imported" in res.data + wait_for_all_checks(client) + + for k,v in client.application.config.get('DATASTORE').data['watching'].items(): + assert v.get('last_error') == False + assert v.get('has_ldjson_price_data') == has_ldjson_price_data + + + ########################################################################################## + client.get(url_for("form_delete", uuid="all"), follow_redirects=True) + + +def test_bad_ldjson_is_correctly_ignored(client, live_server): + live_server_setup(live_server) + test_return_data = """ + + + + + +
Some extra stuff
+ + """ + with open("test-datastore/endpoint-content.txt", "w") as f: + f.write(test_return_data) + + _test_runner_check_bad_format_ignored(live_server=live_server, client=client, has_ldjson_price_data=True) + test_return_data = """ + + + + + +
Some extra stuff
+ + """ + with open("test-datastore/endpoint-content.txt", "w") as f: + f.write(test_return_data) + + _test_runner_check_bad_format_ignored(live_server=live_server, client=client, has_ldjson_price_data=False) +