Restock multiprice improvements (#2698)

remove-same-checksum-skip
dgtlmoon 3 months ago committed by GitHub
parent d39530b261
commit a278fa22f2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -27,22 +27,27 @@ def _search_prop_by_value(matches, value):
return prop[1] # Yield the desired value and exit the function return prop[1] # Yield the desired value and exit the function
def _deduplicate_prices(data): def _deduplicate_prices(data):
seen = set() import re
unique_data = []
'''
Some price data has multiple entries, OR it has a single entry with ['$159', '159', 159, "$ 159"] or just "159"
Get all the values, clean it and add it to a set then return the unique values
'''
unique_data = set()
# Return the complete 'datum' where its price was not seen before
for datum in data: for datum in data:
# Convert 'value' to float if it can be a numeric string, otherwise leave it as is
try:
normalized_value = float(datum.value) if isinstance(datum.value, str) and datum.value.replace('.', '', 1).isdigit() else datum.value
except ValueError:
normalized_value = datum.value
# If the normalized value hasn't been seen yet, add it to unique data if isinstance(datum.value, list):
if normalized_value not in seen: # Process each item in the list
unique_data.append(datum) normalized_value = set([float(re.sub(r'[^\d.]', '', str(item))) for item in datum.value])
seen.add(normalized_value) unique_data.update(normalized_value)
else:
# Process single value
v = float(re.sub(r'[^\d.]', '', str(datum.value)))
unique_data.add(v)
return unique_data return list(unique_data)
# should return Restock() # should return Restock()
@ -83,14 +88,13 @@ def get_itemprop_availability(html_content) -> Restock:
if price_result: if price_result:
# Right now, we just support single product items, maybe we will store the whole actual metadata seperately in teh future and # Right now, we just support single product items, maybe we will store the whole actual metadata seperately in teh future and
# parse that for the UI? # parse that for the UI?
prices_found = set(str(item.value).replace('$', '') for item in price_result) if len(price_result) > 1 and len(price_result) > 1:
if len(price_result) > 1 and len(prices_found) > 1:
# See of all prices are different, in the case that one product has many embedded data types with the same price # See of all prices are different, in the case that one product has many embedded data types with the same price
# One might have $121.95 and another 121.95 etc # One might have $121.95 and another 121.95 etc
logger.warning(f"More than one price found {prices_found}, throwing exception, cant use this plugin.") logger.warning(f"More than one price found {price_result}, throwing exception, cant use this plugin.")
raise MoreThanOnePriceFound() raise MoreThanOnePriceFound()
value['price'] = price_result[0].value value['price'] = price_result[0]
pricecurrency_result = pricecurrency_parse.find(data) pricecurrency_result = pricecurrency_parse.find(data)
if pricecurrency_result: if pricecurrency_result:
@ -220,7 +224,7 @@ class perform_site_check(difference_detection_processor):
itemprop_availability['original_price'] = itemprop_availability.get('price') itemprop_availability['original_price'] = itemprop_availability.get('price')
update_obj['restock']["original_price"] = itemprop_availability.get('price') update_obj['restock']["original_price"] = itemprop_availability.get('price')
if not self.fetcher.instock_data and not itemprop_availability.get('availability'): if not self.fetcher.instock_data and not itemprop_availability.get('availability') and not itemprop_availability.get('price'):
raise ProcessorException( raise ProcessorException(
message=f"Unable to extract restock data for this page unfortunately. (Got code {self.fetcher.get_last_status_code()} from server), no embedded stock information was found and nothing interesting in the text, try using this watch with Chrome.", message=f"Unable to extract restock data for this page unfortunately. (Got code {self.fetcher.get_last_status_code()} from server), no embedded stock information was found and nothing interesting in the text, try using this watch with Chrome.",
url=watch.get('url'), url=watch.get('url'),

@ -0,0 +1,6 @@
# A list of real world examples!
Always the price should be 666.66 for our tests
see test_restock_itemprop.py::test_special_prop_examples

@ -0,0 +1,25 @@
<div class="PriceSection PriceSection_PriceSection__Vx1_Q PriceSection_variantHuge__P9qxg PdpPriceSection"
data-testid="price-section"
data-optly-product-tile-price-section="true"><span
class="PriceRange ProductPrice variant-huge" itemprop="offers"
itemscope="" itemtype="http://schema.org/Offer"><div
class="VisuallyHidden_VisuallyHidden__VBD83">$155.55</div><span
aria-hidden="true" class="Price variant-huge" data-testid="price"
itemprop="price"><sup class="sup" data-testid="price-symbol"
itemprop="priceCurrency" content="AUD">$</sup><span
class="dollars" data-testid="price-value" itemprop="price"
content="155.55">155.55</span><span class="extras"><span class="sup"
data-testid="price-sup"></span></span></span></span>
</div>
<script type="application/ld+json">{
"@type": "Product",
"@context": "https://schema.org",
"name": "test",
"description": "test",
"offers": {
"@type": "Offer",
"priceCurrency": "AUD",
"price": 155.55
},
}</script>

@ -3,7 +3,7 @@ import os
import time import time
from flask import url_for from flask import url_for
from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client, wait_for_notification_endpoint_output from .util import live_server_setup, wait_for_all_checks, wait_for_notification_endpoint_output
from ..notification import default_notification_format from ..notification import default_notification_format
instock_props = [ instock_props = [
@ -413,3 +413,31 @@ def test_data_sanity(client, live_server):
res = client.get( res = client.get(
url_for("edit_page", uuid="first")) url_for("edit_page", uuid="first"))
assert test_url2.encode('utf-8') in res.data assert test_url2.encode('utf-8') in res.data
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
# All examples should give a prive of 666.66
def test_special_prop_examples(client, live_server):
import glob
#live_server_setup(live_server)
test_url = url_for('test_endpoint', _external=True)
check_path = os.path.join(os.path.dirname(__file__), "itemprop_test_examples", "*.txt")
files = glob.glob(check_path)
assert files
for test_example_filename in files:
with open(test_example_filename, 'r') as example_f:
with open("test-datastore/endpoint-content.txt", "w") as test_f:
test_f.write(f"<html><body>{example_f.read()}</body></html>")
# Now fetch it and check the price worked
client.post(
url_for("form_quick_watch_add"),
data={"url": test_url, "tags": 'restock tests', 'processor': 'restock_diff'},
follow_redirects=True
)
wait_for_all_checks(client)
res = client.get(url_for("index"))
assert b'ception' not in res.data
assert b'155.55' in res.data

Loading…
Cancel
Save