diff --git a/changedetectionio/html_tools.py b/changedetectionio/html_tools.py index 0bef32f4..8df14f32 100644 --- a/changedetectionio/html_tools.py +++ b/changedetectionio/html_tools.py @@ -7,13 +7,14 @@ from typing import List import json import re + # HTML added to be sure each result matching a filter (.example) gets converted to a new line by Inscriptis TEXT_FILTER_LIST_LINE_SUFFIX = "
" PERL_STYLE_REGEX = r'^/(.*?)/([a-z]*)?$' # 'price' , 'lowPrice', 'highPrice' are usually under here -# all of those may or may not appear on different websites -LD_JSON_PRODUCT_OFFER_SELECTOR = "json:$..offers" +# All of those may or may not appear on different websites - I didnt find a way todo case-insensitive searching here +LD_JSON_PRODUCT_OFFER_SELECTORS = ["json:$..offers", "json:$..Offers"] class JSONNotFound(ValueError): def __init__(self, msg): @@ -293,14 +294,17 @@ def html_to_text(html_content: str, render_anchor_tag_content=False) -> str: # Does LD+JSON exist with a @type=='product' and a .price set anywhere? def has_ldjson_product_info(content): + pricing_data = '' + try: if not 'application/ld+json' in content: return False - # Always lowercase the content so the json_filter for finding $..offers matches - pricing_data = extract_json_as_string(content=content.lower(), - json_filter=LD_JSON_PRODUCT_OFFER_SELECTOR, - ensure_is_ldjson_info_type="product") + for filter in LD_JSON_PRODUCT_OFFER_SELECTORS: + pricing_data += extract_json_as_string(content=content, + json_filter=filter, + ensure_is_ldjson_info_type="product") + except Exception as e: # Totally fine return False diff --git a/changedetectionio/processors/text_json_diff.py b/changedetectionio/processors/text_json_diff.py index 19ef78da..bada0a1d 100644 --- a/changedetectionio/processors/text_json_diff.py +++ b/changedetectionio/processors/text_json_diff.py @@ -17,7 +17,7 @@ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) name = 'Webpage Text/HTML, JSON and PDF changes' description = 'Detects all text changes where possible' - +json_filter_prefixes = ['json:', 'jq:'] class FilterNotFoundInResponse(ValueError): def __init__(self, msg): @@ -196,7 +196,7 @@ class perform_site_check(difference_detection_processor): # Inject a virtual LD+JSON price tracker rule if watch.get('track_ldjson_price_data', '') == PRICE_DATA_TRACK_ACCEPT: - include_filters_rule.append(html_tools.LD_JSON_PRODUCT_OFFER_SELECTOR) + include_filters_rule += html_tools.LD_JSON_PRODUCT_OFFER_SELECTORS has_filter_rule = len(include_filters_rule) and len(include_filters_rule[0].strip()) has_subtractive_selectors = len(subtractive_selectors) and len(subtractive_selectors[0].strip()) @@ -214,7 +214,6 @@ class perform_site_check(difference_detection_processor): pass if has_filter_rule: - json_filter_prefixes = ['json:', 'jq:'] for filter in include_filters_rule: if any(prefix in filter for prefix in json_filter_prefixes): stripped_text_from_html += html_tools.extract_json_as_string(content=fetcher.content, json_filter=filter) diff --git a/changedetectionio/tests/test_automatic_follow_ldjson_price.py b/changedetectionio/tests/test_automatic_follow_ldjson_price.py index d2e2de34..ff1e6330 100644 --- a/changedetectionio/tests/test_automatic_follow_ldjson_price.py +++ b/changedetectionio/tests/test_automatic_follow_ldjson_price.py @@ -28,7 +28,7 @@ def set_response_with_ldjson(): "description":"You dont need it", "mpn":"111111", "sku":"22222", - "oFFerS":{ + "Offers":{ "@type":"AggregateOffer", "lowPrice":8097000, "highPrice":8099900, @@ -177,7 +177,7 @@ def test_bad_ldjson_is_correctly_ignored(client, live_server): "@type": ["Product", "SubType"], "name": "My test product", "description": "", - "OffeRS": { + "offers": { "note" : "You can see the case-insensitive OffERS key, it should work", "@type": "Offer", "offeredBy": {