Handle case-insensitive matching of LDJSON offers data

test improvements
tweaks
1833-ldjson-fix
dgtlmoon 1 year ago
parent da92a12d92
commit 4ae3519d66

@ -186,6 +186,7 @@ def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None
for json_data in bs_jsons:
stripped_text_from_html = _parse_json(json_data, json_filter)
if ensure_is_ldjson_info_type:
# Could sometimes be list, string or something else random
if isinstance(json_data, dict):
@ -293,8 +294,14 @@ def html_to_text(html_content: str, render_anchor_tag_content=False) -> str:
# Does LD+JSON exist with a @type=='product' and a .price set anywhere?
def has_ldjson_product_info(content):
try:
pricing_data = extract_json_as_string(content=content, json_filter=LD_JSON_PRODUCT_OFFER_SELECTOR, ensure_is_ldjson_info_type="product")
except JSONNotFound as e:
if not 'application/ld+json' in content:
return False
# Always lowercase the content so the json_filter for finding $..offers matches
pricing_data = extract_json_as_string(content=content.lower(),
json_filter=LD_JSON_PRODUCT_OFFER_SELECTOR,
ensure_is_ldjson_info_type="product")
except Exception as e:
# Totally fine
return False
x=bool(pricing_data)

@ -2,7 +2,8 @@
import time
from flask import url_for
from .util import live_server_setup, extract_UUID_from_client, extract_api_key_from_UI
from .util import live_server_setup, extract_UUID_from_client, extract_api_key_from_UI, wait_for_all_checks
def set_response_with_ldjson():
test_return_data = """<html>
@ -27,7 +28,7 @@ def set_response_with_ldjson():
"description":"You dont need it",
"mpn":"111111",
"sku":"22222",
"offers":{
"oFFerS":{
"@type":"AggregateOffer",
"lowPrice":8097000,
"highPrice":8099900,
@ -79,9 +80,6 @@ def set_response_without_ldjson():
def test_check_ldjson_price_autodetect(client, live_server):
live_server_setup(live_server)
# Give the endpoint time to spin up
time.sleep(1)
set_response_with_ldjson()
# Add our URL to the import page
@ -92,7 +90,7 @@ def test_check_ldjson_price_autodetect(client, live_server):
follow_redirects=True
)
assert b"1 Imported" in res.data
time.sleep(3)
wait_for_all_checks(client)
# Should get a notice that it's available
res = client.get(url_for("index"))
@ -102,11 +100,11 @@ def test_check_ldjson_price_autodetect(client, live_server):
uuid = extract_UUID_from_client(client)
client.get(url_for('price_data_follower.accept', uuid=uuid, follow_redirects=True))
time.sleep(2)
wait_for_all_checks(client)
# Trigger a check
client.get(url_for("form_watch_checknow"), follow_redirects=True)
time.sleep(2)
wait_for_all_checks(client)
# Offer should be gone
res = client.get(url_for("index"))
assert b'Embedded price data' not in res.data
@ -138,9 +136,97 @@ def test_check_ldjson_price_autodetect(client, live_server):
follow_redirects=True
)
assert b"1 Imported" in res.data
time.sleep(3)
wait_for_all_checks(client)
res = client.get(url_for("index"))
assert b'ldjson-price-track-offer' not in res.data
##########################################################################################
client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
def _test_runner_check_bad_format_ignored(live_server, client, has_ldjson_price_data):
test_url = url_for('test_endpoint', _external=True)
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
wait_for_all_checks(client)
for k,v in client.application.config.get('DATASTORE').data['watching'].items():
assert v.get('last_error') == False
assert v.get('has_ldjson_price_data') == has_ldjson_price_data
##########################################################################################
client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
def test_bad_ldjson_is_correctly_ignored(client, live_server):
live_server_setup(live_server)
test_return_data = """
<html>
<head>
<script type="application/ld+json">
{
"@context": "http://schema.org",
"@type": ["Product", "SubType"],
"name": "My test product",
"description": "",
"OffeRS": {
"note" : "You can see the case-insensitive OffERS key, it should work",
"@type": "Offer",
"offeredBy": {
"@type": "Organization",
"name":"Person",
"telephone":"+1 999 999 999"
},
"price": "1",
"priceCurrency": "EUR",
"url": "/some/url"
}
}
</script>
</head>
<body>
<div class="yes">Some extra stuff</div>
</body></html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
_test_runner_check_bad_format_ignored(live_server=live_server, client=client, has_ldjson_price_data=True)
test_return_data = """
<html>
<head>
<script type="application/ld+json">
{
"@context": "http://schema.org",
"@type": ["Product", "SubType"],
"name": "My test product",
"description": "",
"BrokenOffers": {
"@type": "Offer",
"offeredBy": {
"@type": "Organization",
"name":"Person",
"telephone":"+1 999 999 999"
},
"price": "1",
"priceCurrency": "EUR",
"url": "/some/url"
}
}
</script>
</head>
<body>
<div class="yes">Some extra stuff</div>
</body></html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
_test_runner_check_bad_format_ignored(live_server=live_server, client=client, has_ldjson_price_data=False)

Loading…
Cancel
Save