Merge branch 'master' into 2039-restock-use-itemprop

6 months ago · 6a96adb7c3
parent 85aa23d590 f07ff9b55e
commit 6a96adb7c3
3 changed files with 17 additions and 6 deletions
--- a/changedetectionio/content_fetchers/exceptions/init.py
+++ b/changedetectionio/content_fetchers/exceptions/init.py
@ -87,11 +87,12 @@ class ScreenshotUnavailable(Exception):


 class ReplyWithContentButNoText(Exception):
-    def __init__(self, status_code, url, screenshot=None, has_filters=False, html_content=''):
+    def __init__(self, status_code, url, screenshot=None, has_filters=False, html_content='', xpath_data=None):
        # Set this so we can use it in other parts of the app
        self.status_code = status_code
        self.url = url
        self.screenshot = screenshot
        self.has_filters = has_filters
        self.html_content = html_content
+        self.xpath_data = xpath_data
        return
--- a/changedetectionio/processors/text_json_diff.py
+++ b/changedetectionio/processors/text_json_diff.py
@ -19,8 +19,9 @@ description = 'Detects all text changes where possible'
 json_filter_prefixes = ['json:', 'jq:', 'jqraw:']

 class FilterNotFoundInResponse(ValueError):
-    def __init__(self, msg, screenshot=None):
+    def __init__(self, msg, screenshot=None, xpath_data=None):
        self.screenshot = screenshot
+        self.xpath_data = xpath_data
        ValueError.__init__(self, msg)


@ -185,7 +186,7 @@ class perform_site_check(difference_detection_processor):
                                                                       append_pretty_line_formatting=not watch.is_source_type_url)

                    if not html_content.strip():
-                        raise FilterNotFoundInResponse(msg=include_filters_rule, screenshot=self.fetcher.screenshot)
+                        raise FilterNotFoundInResponse(msg=include_filters_rule, screenshot=self.fetcher.screenshot, xpath_data=self.fetcher.xpath_data)

                if has_subtractive_selectors:
                    html_content = html_tools.element_removal(subtractive_selectors, html_content)
@ -243,9 +244,10 @@ class perform_site_check(difference_detection_processor):
        if not is_json and not empty_pages_are_a_change and len(stripped_text_from_html.strip()) == 0:
            raise content_fetchers.exceptions.ReplyWithContentButNoText(url=url,
                                                            status_code=self.fetcher.get_last_status_code(),
-                                                            screenshot=screenshot,
+                                                            screenshot=self.fetcher.screenshot,
                                                            has_filters=has_filter_rule,
-                                                            html_content=html_content
+                                                            html_content=html_content,
+                                                            xpath_data=self.fetcher.xpath_data
                                                            )

        # We rely on the actual text in the html output.. many sites have random script vars etc,
--- a/changedetectionio/update_worker.py
+++ b/changedetectionio/update_worker.py
@ -314,6 +314,9 @@ class update_worker(threading.Thread):
                        if e.screenshot:
                            watch.save_screenshot(screenshot=e.screenshot, as_error=True)

+                        if e.xpath_data:
+                            watch.save_xpath_data(data=e.xpath_data)
+                            
                        process_changedetection_results = False

                    except content_fetchers.exceptions.Non200ErrorCodeReceived as e:
@ -344,8 +347,13 @@ class update_worker(threading.Thread):

                        err_text = "Warning, no filters were found, no change detection ran - Did the page change layout? update your Visual Filter if necessary."
                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
+
+                        # Filter wasnt found, but we should still update the visual selector so that they can have a chance to set it up again
                        if e.screenshot:
-                            watch.save_screenshot(screenshot=e.screenshot, as_error=True)
+                            watch.save_screenshot(screenshot=e.screenshot)
+
+                        if e.xpath_data:
+                            watch.save_xpath_data(data=e.xpath_data)

                        # Only when enabled, send the notification
                        if watch.get('filter_failure_notification_send', False):