From e173954cddd743f88273d914651b579a0723903c Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Fri, 20 Sep 2024 09:19:57 +0200 Subject: [PATCH] Restock monitor - Only try to process restock information (like scraping for "out of stock" keywords) if the page was actually rendered correctly. (#2645) --- .../processors/restock_diff/processor.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/changedetectionio/processors/restock_diff/processor.py b/changedetectionio/processors/restock_diff/processor.py index 88203b51..a933db1f 100644 --- a/changedetectionio/processors/restock_diff/processor.py +++ b/changedetectionio/processors/restock_diff/processor.py @@ -158,6 +158,20 @@ class perform_site_check(difference_detection_processor): update_obj['content_type'] = self.fetcher.headers.get('Content-Type', '') update_obj["last_check_status"] = self.fetcher.get_last_status_code() + # Only try to process restock information (like scraping for keywords) if the page was actually rendered correctly. + # Otherwise it will assume "in stock" because nothing suggesting the opposite was found + from ...html_tools import html_to_text + text = html_to_text(self.fetcher.content) + logger.debug(f"Length of text after conversion: {len(text)}") + if not len(text): + from ...content_fetchers.exceptions import ReplyWithContentButNoText + raise ReplyWithContentButNoText(url=watch.link, + status_code=self.fetcher.get_last_status_code(), + screenshot=self.fetcher.screenshot, + html_content=self.fetcher.content, + xpath_data=self.fetcher.xpath_data + ) + # Which restock settings to compare against? restock_settings = watch.get('restock_settings', {})