Merge branch 'master' into 2039-restock-use-itemprop

pull/2041/head
dgtlmoon 6 months ago
commit 6a96adb7c3

@ -87,11 +87,12 @@ class ScreenshotUnavailable(Exception):
class ReplyWithContentButNoText(Exception): class ReplyWithContentButNoText(Exception):
def __init__(self, status_code, url, screenshot=None, has_filters=False, html_content=''): def __init__(self, status_code, url, screenshot=None, has_filters=False, html_content='', xpath_data=None):
# Set this so we can use it in other parts of the app # Set this so we can use it in other parts of the app
self.status_code = status_code self.status_code = status_code
self.url = url self.url = url
self.screenshot = screenshot self.screenshot = screenshot
self.has_filters = has_filters self.has_filters = has_filters
self.html_content = html_content self.html_content = html_content
self.xpath_data = xpath_data
return return

@ -19,8 +19,9 @@ description = 'Detects all text changes where possible'
json_filter_prefixes = ['json:', 'jq:', 'jqraw:'] json_filter_prefixes = ['json:', 'jq:', 'jqraw:']
class FilterNotFoundInResponse(ValueError): class FilterNotFoundInResponse(ValueError):
def __init__(self, msg, screenshot=None): def __init__(self, msg, screenshot=None, xpath_data=None):
self.screenshot = screenshot self.screenshot = screenshot
self.xpath_data = xpath_data
ValueError.__init__(self, msg) ValueError.__init__(self, msg)
@ -185,7 +186,7 @@ class perform_site_check(difference_detection_processor):
append_pretty_line_formatting=not watch.is_source_type_url) append_pretty_line_formatting=not watch.is_source_type_url)
if not html_content.strip(): if not html_content.strip():
raise FilterNotFoundInResponse(msg=include_filters_rule, screenshot=self.fetcher.screenshot) raise FilterNotFoundInResponse(msg=include_filters_rule, screenshot=self.fetcher.screenshot, xpath_data=self.fetcher.xpath_data)
if has_subtractive_selectors: if has_subtractive_selectors:
html_content = html_tools.element_removal(subtractive_selectors, html_content) html_content = html_tools.element_removal(subtractive_selectors, html_content)
@ -243,9 +244,10 @@ class perform_site_check(difference_detection_processor):
if not is_json and not empty_pages_are_a_change and len(stripped_text_from_html.strip()) == 0: if not is_json and not empty_pages_are_a_change and len(stripped_text_from_html.strip()) == 0:
raise content_fetchers.exceptions.ReplyWithContentButNoText(url=url, raise content_fetchers.exceptions.ReplyWithContentButNoText(url=url,
status_code=self.fetcher.get_last_status_code(), status_code=self.fetcher.get_last_status_code(),
screenshot=screenshot, screenshot=self.fetcher.screenshot,
has_filters=has_filter_rule, has_filters=has_filter_rule,
html_content=html_content html_content=html_content,
xpath_data=self.fetcher.xpath_data
) )
# We rely on the actual text in the html output.. many sites have random script vars etc, # We rely on the actual text in the html output.. many sites have random script vars etc,

@ -314,6 +314,9 @@ class update_worker(threading.Thread):
if e.screenshot: if e.screenshot:
watch.save_screenshot(screenshot=e.screenshot, as_error=True) watch.save_screenshot(screenshot=e.screenshot, as_error=True)
if e.xpath_data:
watch.save_xpath_data(data=e.xpath_data)
process_changedetection_results = False process_changedetection_results = False
except content_fetchers.exceptions.Non200ErrorCodeReceived as e: except content_fetchers.exceptions.Non200ErrorCodeReceived as e:
@ -344,8 +347,13 @@ class update_worker(threading.Thread):
err_text = "Warning, no filters were found, no change detection ran - Did the page change layout? update your Visual Filter if necessary." err_text = "Warning, no filters were found, no change detection ran - Did the page change layout? update your Visual Filter if necessary."
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text}) self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
# Filter wasnt found, but we should still update the visual selector so that they can have a chance to set it up again
if e.screenshot: if e.screenshot:
watch.save_screenshot(screenshot=e.screenshot, as_error=True) watch.save_screenshot(screenshot=e.screenshot)
if e.xpath_data:
watch.save_xpath_data(data=e.xpath_data)
# Only when enabled, send the notification # Only when enabled, send the notification
if watch.get('filter_failure_notification_send', False): if watch.get('filter_failure_notification_send', False):

Loading…
Cancel
Save