Handle multiple products error

pull/2041/head
dgtlmoon 4 months ago
parent a855b3dd51
commit 3da6e74a60

@ -0,0 +1,10 @@
class ProcessorException(Exception):
def __init__(self, message=None, status_code=None, url=None, screenshot=None, has_filters=False, html_content='', xpath_data=None):
self.message = message
self.status_code = status_code
self.url = url
self.screenshot = screenshot
self.has_filters = has_filters
self.html_content = html_content
self.xpath_data = xpath_data
return

@ -6,7 +6,7 @@ from babel.numbers import parse_decimal
class Restock(dict): class Restock(dict):
def parse_currency(self, raw_value: str) -> float: def parse_currency(self, raw_value: str) -> float:
# Clean and standardize the value # Clean and standardize the value (ie 1,400.00 should be 1400.00), even better would be store the whole thing as an integer.
standardized_value = raw_value standardized_value = raw_value
if ',' in standardized_value and '.' in standardized_value: if ',' in standardized_value and '.' in standardized_value:

@ -1,4 +1,5 @@
from .. import difference_detection_processor from .. import difference_detection_processor
from ..exceptions import ProcessorException
from . import Restock from . import Restock
from loguru import logger from loguru import logger
import hashlib import hashlib
@ -16,6 +17,10 @@ class UnableToExtractRestockData(Exception):
self.status_code = status_code self.status_code = status_code
return return
class MoreThanOnePriceFound(Exception):
def __init__(self):
return
def _search_prop_by_value(matches, value): def _search_prop_by_value(matches, value):
for properties in matches: for properties in matches:
for prop in properties: for prop in properties:
@ -54,6 +59,9 @@ def get_itemprop_availability(html_content) -> Restock:
price_result = price_parse.find(data) price_result = price_parse.find(data)
if price_result: if price_result:
if len(price_result) > 1:
raise MoreThanOnePriceFound()
value['price'] = price_result[0].value value['price'] = price_result[0].value
pricecurrency_result = pricecurrency_parse.find(data) pricecurrency_result = pricecurrency_parse.find(data)
@ -119,7 +127,18 @@ class perform_site_check(difference_detection_processor):
update_obj['content_type'] = self.fetcher.headers.get('Content-Type', '') update_obj['content_type'] = self.fetcher.headers.get('Content-Type', '')
update_obj["last_check_status"] = self.fetcher.get_last_status_code() update_obj["last_check_status"] = self.fetcher.get_last_status_code()
itemprop_availability = {}
try:
itemprop_availability = get_itemprop_availability(html_content=self.fetcher.content) itemprop_availability = get_itemprop_availability(html_content=self.fetcher.content)
except MoreThanOnePriceFound as e:
# Add the real data
raise ProcessorException(message="Cannot run, more than one price detected, this plugin is only for product pages with ONE product, try the content-change detection mode.",
url=watch.get('url'),
status_code=self.fetcher.get_last_status_code(),
screenshot=self.fetcher.screenshot,
xpath_data=self.fetcher.xpath_data
)
# Something valid in get_itemprop_availability() by scraping metadata ? # Something valid in get_itemprop_availability() by scraping metadata ?
if itemprop_availability.get('price') or itemprop_availability.get('availability'): if itemprop_availability.get('price') or itemprop_availability.get('availability'):
# Store for other usage # Store for other usage

@ -154,6 +154,7 @@
<span class="watch-tag-list">{{ watch_tag.title }}</span> <span class="watch-tag-list">{{ watch_tag.title }}</span>
{% endfor %} {% endfor %}
</td> </td>
<!-- @todo make it so any watch handler obj can expose this --->
{% if any_has_restock_price_processor %} {% if any_has_restock_price_processor %}
<td class="restock-and-price"> <td class="restock-and-price">
{% if watch['processor'] == 'restock_diff' %} {% if watch['processor'] == 'restock_diff' %}
@ -173,8 +174,6 @@
{% elif not watch.has_restock_info %} {% elif not watch.has_restock_info %}
<span class="restock-label error">No information</span> <span class="restock-label error">No information</span>
{% endif %} {% endif %}
{% endif %} {% endif %}
</td> </td>
{% endif %} {% endif %}

@ -1,4 +1,6 @@
from .processors.exceptions import ProcessorException
from . import content_fetchers from . import content_fetchers
from .processors.restock_diff.processor import UnableToExtractRestockData from .processors.restock_diff.processor import UnableToExtractRestockData
from changedetectionio.processors.text_json_diff.processor import FilterNotFoundInResponse from changedetectionio.processors.text_json_diff.processor import FilterNotFoundInResponse
from changedetectionio import html_tools from changedetectionio import html_tools
@ -287,6 +289,16 @@ class update_worker(threading.Thread):
logger.critical(f"File permission error updating file, watch: {uuid}") logger.critical(f"File permission error updating file, watch: {uuid}")
logger.critical(str(e)) logger.critical(str(e))
process_changedetection_results = False process_changedetection_results = False
# A generic other-exception thrown by processors
except ProcessorException as e:
if e.screenshot:
watch.save_screenshot(screenshot=e.screenshot)
if e.xpath_data:
watch.save_xpath_data(data=e.xpath_data)
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': e.message})
process_changedetection_results = False
except content_fetchers.exceptions.ReplyWithContentButNoText as e: except content_fetchers.exceptions.ReplyWithContentButNoText as e:
# Totally fine, it's by choice - just continue on, nothing more to care about # Totally fine, it's by choice - just continue on, nothing more to care about
# Page had elements/content but no renderable text # Page had elements/content but no renderable text

Loading…
Cancel
Save