tidy up methods

pull/1941/head
dgtlmoon 1 year ago
parent 97e591fa24
commit 68d1e2736c

@ -667,6 +667,7 @@ class html_requests(Fetcher):
fetcher_description = "Basic fast Plaintext/HTTP Client" fetcher_description = "Basic fast Plaintext/HTTP Client"
def __init__(self, proxy_override=None): def __init__(self, proxy_override=None):
super().__init__()
self.proxy_override = proxy_override self.proxy_override = proxy_override
def run(self, def run(self,

@ -15,15 +15,18 @@ class difference_detection_processor():
def __init__(self, *args, datastore, watch_uuid, **kwargs): def __init__(self, *args, datastore, watch_uuid, **kwargs):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
self.datastore = datastore self.datastore = datastore
self.watch = self.datastore.data['watching'].get(watch_uuid)
watch = self.datastore.data['watching'].get(watch_uuid)
url = watch.link def call_browser(self):
url = self.watch.link
# Requests, playwright, other browser via wss:// etc, fetch_extra_something # Requests, playwright, other browser via wss:// etc, fetch_extra_something
prefer_fetch_backend = watch.get('fetch_backend', 'system') prefer_fetch_backend = self.watch.get('fetch_backend', 'system')
# Proxy ID "key" # Proxy ID "key"
preferred_proxy_id = self.datastore.get_preferred_proxy_for_watch(uuid=watch_uuid) preferred_proxy_id = self.datastore.get_preferred_proxy_for_watch(uuid=self.watch.get('uuid'))
# Pluggable content self.fetcher # Pluggable content self.fetcher
if not prefer_fetch_backend or prefer_fetch_backend == 'system': if not prefer_fetch_backend or prefer_fetch_backend == 'system':
@ -47,14 +50,14 @@ class difference_detection_processor():
#browser_url_extra/configurable browser url=... #browser_url_extra/configurable browser url=...
) )
if watch.has_browser_steps: if self.watch.has_browser_steps:
self.fetcher.browser_steps = watch.get('browser_steps', []) self.fetcher.browser_steps = self.watch.get('browser_steps', [])
self.fetcher.browser_steps_screenshot_path = os.path.join(self.datastore.datastore_path, watch_uuid) self.fetcher.browser_steps_screenshot_path = os.path.join(self.datastore.datastore_path, self.watch.get('uuid'))
# Tweak the base config with the per-watch ones # Tweak the base config with the per-watch ones
request_headers = watch.get('headers', []) request_headers = self.watch.get('headers', [])
request_headers.update(self.datastore.get_all_base_headers()) request_headers.update(self.datastore.get_all_base_headers())
request_headers.update(self.datastore.get_all_headers_in_textfile_for_watch(uuid=watch_uuid)) request_headers.update(self.datastore.get_all_headers_in_textfile_for_watch(uuid=self.watch.get('uuid')))
# https://github.com/psf/requests/issues/4525 # https://github.com/psf/requests/issues/4525
# Requests doesnt yet support brotli encoding, so don't put 'br' here, be totally sure that the user cannot # Requests doesnt yet support brotli encoding, so don't put 'br' here, be totally sure that the user cannot
@ -64,32 +67,32 @@ class difference_detection_processor():
timeout = self.datastore.data['settings']['requests'].get('timeout') timeout = self.datastore.data['settings']['requests'].get('timeout')
request_body = watch.get('body') request_body = self.watch.get('body')
request_method = watch.get('method') request_method = self.watch.get('method')
ignore_status_codes = watch.get('ignore_status_codes', False) ignore_status_codes = self.watch.get('ignore_status_codes', False)
# Configurable per-watch or global extra delay before extracting text (for webDriver types) # Configurable per-watch or global extra delay before extracting text (for webDriver types)
system_webdriver_delay = self.datastore.data['settings']['application'].get('webdriver_delay', None) system_webdriver_delay = self.datastore.data['settings']['application'].get('webdriver_delay', None)
if watch['webdriver_delay'] is not None: if self.watch.get('webdriver_delay'):
self.fetcher.render_extract_delay = watch.get('webdriver_delay') self.fetcher.render_extract_delay = self.watch.get('webdriver_delay')
elif system_webdriver_delay is not None: elif system_webdriver_delay is not None:
self.fetcher.render_extract_delay = system_webdriver_delay self.fetcher.render_extract_delay = system_webdriver_delay
if watch.get('webdriver_js_execute_code') is not None and watch.get('webdriver_js_execute_code').strip(): if self.watch.get('webdriver_js_execute_code') is not None and self.watch.get('webdriver_js_execute_code').strip():
self.fetcher.webdriver_js_execute_code = watch.get('webdriver_js_execute_code') self.fetcher.webdriver_js_execute_code = self.watch.get('webdriver_js_execute_code')
# Requests for PDF's, images etc should be passwd the is_binary flag # Requests for PDF's, images etc should be passwd the is_binary flag
is_binary = watch.is_pdf is_binary = self.watch.is_pdf
# And here we go! call the right browser with browser-specific settings # And here we go! call the right browser with browser-specific settings
self.fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, watch.get('include_filters'), self.fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, self.watch.get('include_filters'),
is_binary=is_binary) is_binary=is_binary)
self.fetcher.quit() self.fetcher.quit()
# After init, call run() which will do the actual change-detection # After init, call run() which will do the actual change-detection
@abstractmethod @abstractmethod
def run(self, uuid, skip_when_checksum_same=True): def run_changedetection(self, uuid, skip_when_checksum_same=True):
update_obj = {'last_notification_error': False, 'last_error': False} update_obj = {'last_notification_error': False, 'last_error': False}
some_data = 'xxxxx' some_data = 'xxxxx'
update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest() update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest()

@ -19,7 +19,7 @@ class perform_site_check(difference_detection_processor):
screenshot = None screenshot = None
xpath_data = None xpath_data = None
def run(self, uuid, skip_when_checksum_same=True): def run_changedetection(self, uuid, skip_when_checksum_same=True):
# DeepCopy so we can be sure we don't accidently change anything by reference # DeepCopy so we can be sure we don't accidently change anything by reference
watch = deepcopy(self.datastore.data['watching'].get(uuid)) watch = deepcopy(self.datastore.data['watching'].get(uuid))

@ -33,8 +33,9 @@ class PDFToHTMLToolNotFound(ValueError):
# (set_proxy_from_list) # (set_proxy_from_list)
class perform_site_check(difference_detection_processor): class perform_site_check(difference_detection_processor):
def run(self, uuid, skip_when_checksum_same=True): def run_changedetection(self, uuid, skip_when_checksum_same=True):
changed_detected = False changed_detected = False
html_content = ""
screenshot = False # as bytes screenshot = False # as bytes
stripped_text_from_html = "" stripped_text_from_html = ""

@ -209,6 +209,7 @@ class update_worker(threading.Thread):
from .processors import text_json_diff, restock_diff from .processors import text_json_diff, restock_diff
while not self.app.config.exit.is_set(): while not self.app.config.exit.is_set():
update_handler = None
try: try:
queued_item_data = self.q.get(block=False) queued_item_data = self.q.get(block=False)
@ -253,7 +254,9 @@ class update_worker(threading.Thread):
# Clear last errors (move to preflight func?) # Clear last errors (move to preflight func?)
self.datastore.data['watching'][uuid]['browser_steps_last_error_step'] = None self.datastore.data['watching'][uuid]['browser_steps_last_error_step'] = None
changed_detected, update_obj, contents = update_handler.run(uuid, update_handler.call_browser()
changed_detected, update_obj, contents = update_handler.run_changedetection(uuid,
skip_when_checksum_same=skip_when_same_checksum, skip_when_checksum_same=skip_when_same_checksum,
) )
@ -407,6 +410,8 @@ class update_worker(threading.Thread):
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)}) self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
# Other serious error # Other serious error
process_changedetection_results = False process_changedetection_results = False
# the thread is still running??
else: else:
# Crash protection, the watch entry could have been removed by this point (during a slow chrome fetch etc) # Crash protection, the watch entry could have been removed by this point (during a slow chrome fetch etc)
if not self.datastore.data['watching'].get(uuid): if not self.datastore.data['watching'].get(uuid):

Loading…
Cancel
Save