diff --git a/changedetectionio/content_fetcher.py b/changedetectionio/content_fetcher.py index d7a4f835..15416f72 100644 --- a/changedetectionio/content_fetcher.py +++ b/changedetectionio/content_fetcher.py @@ -77,11 +77,12 @@ class ScreenshotUnavailable(Exception): class ReplyWithContentButNoText(Exception): - def __init__(self, status_code, url, screenshot=None): + def __init__(self, status_code, url, screenshot=None, has_filters=False): # Set this so we can use it in other parts of the app self.status_code = status_code self.url = url self.screenshot = screenshot + self.has_filters = has_filters return diff --git a/changedetectionio/processors/text_json_diff.py b/changedetectionio/processors/text_json_diff.py index fb810f0c..dc0f5053 100644 --- a/changedetectionio/processors/text_json_diff.py +++ b/changedetectionio/processors/text_json_diff.py @@ -314,7 +314,7 @@ class perform_site_check(difference_detection_processor): # Treat pages with no renderable text content as a change? No by default empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False) if not is_json and not empty_pages_are_a_change and len(stripped_text_from_html.strip()) == 0: - raise content_fetcher.ReplyWithContentButNoText(url=url, status_code=fetcher.get_last_status_code(), screenshot=screenshot) + raise content_fetcher.ReplyWithContentButNoText(url=url, status_code=fetcher.get_last_status_code(), screenshot=screenshot, has_filters=has_filter_rule) # We rely on the actual text in the html output.. many sites have random script vars etc, # in the future we'll implement other mechanisms. diff --git a/changedetectionio/update_worker.py b/changedetectionio/update_worker.py index 63a0aab4..11ad5054 100644 --- a/changedetectionio/update_worker.py +++ b/changedetectionio/update_worker.py @@ -251,7 +251,11 @@ class update_worker(threading.Thread): # Totally fine, it's by choice - just continue on, nothing more to care about # Page had elements/content but no renderable text # Backend (not filters) gave zero output - self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Got HTML content but no text found (With {} reply code).".format(e.status_code)}) + if e.has_filters: + self.datastore.update_watch(uuid=uuid, update_obj={'last_error': f"Got HTML content but no text found (With {e.status_code} reply code), it's possible that the filters you have give an empty result or contain only an image more help here"}) + else: + self.datastore.update_watch(uuid=uuid, update_obj={'last_error': f"Got HTML content but no text found (With {e.status_code} reply code)."}) + if e.screenshot: self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot) process_changedetection_results = False