From 8bad0b06ffe3ea47a757fcfd78824f5ba29c12da Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Thu, 19 Sep 2024 22:13:28 +0200 Subject: [PATCH] Only try to process restock information (like scraping for keywords) if the page was actually rendered correctly. --- .../processors/restock_diff/processor.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/changedetectionio/processors/restock_diff/processor.py b/changedetectionio/processors/restock_diff/processor.py index 88203b51..a933db1f 100644 --- a/changedetectionio/processors/restock_diff/processor.py +++ b/changedetectionio/processors/restock_diff/processor.py @@ -158,6 +158,20 @@ class perform_site_check(difference_detection_processor): update_obj['content_type'] = self.fetcher.headers.get('Content-Type', '') update_obj["last_check_status"] = self.fetcher.get_last_status_code() + # Only try to process restock information (like scraping for keywords) if the page was actually rendered correctly. + # Otherwise it will assume "in stock" because nothing suggesting the opposite was found + from ...html_tools import html_to_text + text = html_to_text(self.fetcher.content) + logger.debug(f"Length of text after conversion: {len(text)}") + if not len(text): + from ...content_fetchers.exceptions import ReplyWithContentButNoText + raise ReplyWithContentButNoText(url=watch.link, + status_code=self.fetcher.get_last_status_code(), + screenshot=self.fetcher.screenshot, + html_content=self.fetcher.content, + xpath_data=self.fetcher.xpath_data + ) + # Which restock settings to compare against? restock_settings = watch.get('restock_settings', {})