From a278fa22f2cf7f41f44dc1de1af69ab47eab4661 Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Fri, 11 Oct 2024 11:43:35 +0200 Subject: [PATCH] Restock multiprice improvements (#2698) --- .../processors/restock_diff/processor.py | 40 ++++++++++--------- .../tests/itemprop_test_examples/README.md | 6 +++ .../tests/itemprop_test_examples/a.txt | 25 ++++++++++++ .../tests/test_restock_itemprop.py | 30 +++++++++++++- 4 files changed, 82 insertions(+), 19 deletions(-) create mode 100644 changedetectionio/tests/itemprop_test_examples/README.md create mode 100644 changedetectionio/tests/itemprop_test_examples/a.txt diff --git a/changedetectionio/processors/restock_diff/processor.py b/changedetectionio/processors/restock_diff/processor.py index 911e1838..ce55f5d2 100644 --- a/changedetectionio/processors/restock_diff/processor.py +++ b/changedetectionio/processors/restock_diff/processor.py @@ -27,22 +27,27 @@ def _search_prop_by_value(matches, value): return prop[1] # Yield the desired value and exit the function def _deduplicate_prices(data): - seen = set() - unique_data = [] + import re + + ''' + Some price data has multiple entries, OR it has a single entry with ['$159', '159', 159, "$ 159"] or just "159" + Get all the values, clean it and add it to a set then return the unique values + ''' + unique_data = set() + # Return the complete 'datum' where its price was not seen before for datum in data: - # Convert 'value' to float if it can be a numeric string, otherwise leave it as is - try: - normalized_value = float(datum.value) if isinstance(datum.value, str) and datum.value.replace('.', '', 1).isdigit() else datum.value - except ValueError: - normalized_value = datum.value - # If the normalized value hasn't been seen yet, add it to unique data - if normalized_value not in seen: - unique_data.append(datum) - seen.add(normalized_value) - - return unique_data + if isinstance(datum.value, list): + # Process each item in the list + normalized_value = set([float(re.sub(r'[^\d.]', '', str(item))) for item in datum.value]) + unique_data.update(normalized_value) + else: + # Process single value + v = float(re.sub(r'[^\d.]', '', str(datum.value))) + unique_data.add(v) + + return list(unique_data) # should return Restock() @@ -83,14 +88,13 @@ def get_itemprop_availability(html_content) -> Restock: if price_result: # Right now, we just support single product items, maybe we will store the whole actual metadata seperately in teh future and # parse that for the UI? - prices_found = set(str(item.value).replace('$', '') for item in price_result) - if len(price_result) > 1 and len(prices_found) > 1: + if len(price_result) > 1 and len(price_result) > 1: # See of all prices are different, in the case that one product has many embedded data types with the same price # One might have $121.95 and another 121.95 etc - logger.warning(f"More than one price found {prices_found}, throwing exception, cant use this plugin.") + logger.warning(f"More than one price found {price_result}, throwing exception, cant use this plugin.") raise MoreThanOnePriceFound() - value['price'] = price_result[0].value + value['price'] = price_result[0] pricecurrency_result = pricecurrency_parse.find(data) if pricecurrency_result: @@ -220,7 +224,7 @@ class perform_site_check(difference_detection_processor): itemprop_availability['original_price'] = itemprop_availability.get('price') update_obj['restock']["original_price"] = itemprop_availability.get('price') - if not self.fetcher.instock_data and not itemprop_availability.get('availability'): + if not self.fetcher.instock_data and not itemprop_availability.get('availability') and not itemprop_availability.get('price'): raise ProcessorException( message=f"Unable to extract restock data for this page unfortunately. (Got code {self.fetcher.get_last_status_code()} from server), no embedded stock information was found and nothing interesting in the text, try using this watch with Chrome.", url=watch.get('url'), diff --git a/changedetectionio/tests/itemprop_test_examples/README.md b/changedetectionio/tests/itemprop_test_examples/README.md new file mode 100644 index 00000000..83db8c01 --- /dev/null +++ b/changedetectionio/tests/itemprop_test_examples/README.md @@ -0,0 +1,6 @@ +# A list of real world examples! + +Always the price should be 666.66 for our tests + +see test_restock_itemprop.py::test_special_prop_examples + diff --git a/changedetectionio/tests/itemprop_test_examples/a.txt b/changedetectionio/tests/itemprop_test_examples/a.txt new file mode 100644 index 00000000..69eaa9ae --- /dev/null +++ b/changedetectionio/tests/itemprop_test_examples/a.txt @@ -0,0 +1,25 @@ +
$155.55
+
+ + \ No newline at end of file diff --git a/changedetectionio/tests/test_restock_itemprop.py b/changedetectionio/tests/test_restock_itemprop.py index eb2e731b..7d0ad7cb 100644 --- a/changedetectionio/tests/test_restock_itemprop.py +++ b/changedetectionio/tests/test_restock_itemprop.py @@ -3,7 +3,7 @@ import os import time from flask import url_for -from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client, wait_for_notification_endpoint_output +from .util import live_server_setup, wait_for_all_checks, wait_for_notification_endpoint_output from ..notification import default_notification_format instock_props = [ @@ -413,3 +413,31 @@ def test_data_sanity(client, live_server): res = client.get( url_for("edit_page", uuid="first")) assert test_url2.encode('utf-8') in res.data + + res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) + assert b'Deleted' in res.data + +# All examples should give a prive of 666.66 +def test_special_prop_examples(client, live_server): + import glob + #live_server_setup(live_server) + + test_url = url_for('test_endpoint', _external=True) + check_path = os.path.join(os.path.dirname(__file__), "itemprop_test_examples", "*.txt") + files = glob.glob(check_path) + assert files + for test_example_filename in files: + with open(test_example_filename, 'r') as example_f: + with open("test-datastore/endpoint-content.txt", "w") as test_f: + test_f.write(f"{example_f.read()}") + + # Now fetch it and check the price worked + client.post( + url_for("form_quick_watch_add"), + data={"url": test_url, "tags": 'restock tests', 'processor': 'restock_diff'}, + follow_redirects=True + ) + wait_for_all_checks(client) + res = client.get(url_for("index")) + assert b'ception' not in res.data + assert b'155.55' in res.data