More tidyup

fetchers-abstract
dgtlmoon 2 years ago
parent f59b198ffb
commit 687cf9beb4

@ -1,4 +1,7 @@
class fetch_processor(): class fetch_processor():
contents = b''
screenshot = None
""" """
base class for all fetch processors base class for all fetch processors
- json_html_plaintext - json_html_plaintext

@ -14,7 +14,7 @@ from . import fetch_processor
# Some common stuff here that can be moved to a base class # Some common stuff here that can be moved to a base class
# (set_proxy_from_list) # (set_proxy_from_list)
class perform_site_check(fetch_processor): class perform_site_check(fetch_processor):
screenshot = None
xpath_data = None xpath_data = None
# Doesn't look like python supports forward slash auto enclosure in re.findall # Doesn't look like python supports forward slash auto enclosure in re.findall
@ -284,5 +284,6 @@ class perform_site_check(fetch_processor):
if not watch.get('previous_md5'): if not watch.get('previous_md5'):
watch['previous_md5'] = fetched_md5 watch['previous_md5'] = fetched_md5
# @todo text_content_before_ignored_filter can be removed? save it here? self.contents = text_content_before_ignored_filter
return changed_detected, update_obj, text_content_before_ignored_filter
return changed_detected, update_obj

@ -132,22 +132,20 @@ class update_worker(threading.Thread):
self.current_uuid = uuid self.current_uuid = uuid
if uuid in list(self.datastore.data['watching'].keys()): if uuid in list(self.datastore.data['watching'].keys()):
update_handler = None # Interface object
changed_detected = False changed_detected = False
contents = b''
screenshot = False
update_obj= {} update_obj= {}
xpath_data = False
process_changedetection_results = True process_changedetection_results = True
print("> Processing UUID {} Priority {} URL {}".format(uuid, priority, self.datastore.data['watching'][uuid]['url'])) print("> Processing UUID {} Priority {} URL {}".format(uuid, priority, self.datastore.data['watching'][uuid]['url']))
now = time.time() now = time.time()
try: try:
update_handler = json_html_plaintext.perform_site_check(datastore=self.datastore) update_handler = json_html_plaintext.perform_site_check(datastore=self.datastore)
changed_detected, update_obj, contents = update_handler.run(uuid) changed_detected, update_obj = update_handler.run(uuid)
# Re #342 # Re #342
# In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes. # In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.
# We then convert/.decode('utf-8') for the notification etc # We then convert/.decode('utf-8') for the notification etc
if not isinstance(contents, (bytes, bytearray)): if not isinstance(update_handler.contents, (bytes, bytearray)):
raise Exception("Error - returned data from the fetch handler SHOULD be bytes") raise Exception("Error - returned data from the fetch handler SHOULD be bytes")
except PermissionError as e: except PermissionError as e:
self.app.logger.error("File permission error updating", uuid, str(e)) self.app.logger.error("File permission error updating", uuid, str(e))
@ -257,7 +255,7 @@ class update_worker(threading.Thread):
# For the FIRST time we check a site, or a change detected, save the snapshot. # For the FIRST time we check a site, or a change detected, save the snapshot.
if changed_detected or not watch['last_checked']: if changed_detected or not watch['last_checked']:
# A change was detected # A change was detected
watch.save_history_text(contents=contents, timestamp=str(round(time.time()))) watch.save_history_text(contents=update_handler.contents, timestamp=str(round(time.time())))
self.datastore.update_watch(uuid=uuid, update_obj=update_obj) self.datastore.update_watch(uuid=uuid, update_obj=update_obj)

Loading…
Cancel
Save