Re #342 notification encoding (#343)

* Re #342 - check for accidental python byte encoding of non-utf8/string, check return type of fetcher and fix encoding of notification content
pull/337/head
dgtlmoon 3 years ago committed by GitHub
parent d4dc3466dc
commit 489671dcca
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -14,7 +14,7 @@ class EmptyReply(Exception):
class Fetcher(): class Fetcher():
error = None error = None
status_code = None status_code = None
content = None # Should be bytes? content = None # Should always be bytes.
fetcher_description ="No description" fetcher_description ="No description"
@ -129,7 +129,6 @@ class html_webdriver(Fetcher):
# driver.quit() seems to cause better exceptions # driver.quit() seems to cause better exceptions
driver.quit() driver.quit()
return True return True
# "html_requests" is listed as the default fetcher in store.py! # "html_requests" is listed as the default fetcher in store.py!
@ -146,6 +145,8 @@ class html_requests(Fetcher):
timeout=timeout, timeout=timeout,
verify=False) verify=False)
# https://stackoverflow.com/questions/44203397/python-requests-get-returns-improperly-decoded-text-instead-of-utf-8
# Return bytes here
html = r.text html = r.text

@ -367,6 +367,10 @@ class ChangeDetectionStore:
import uuid import uuid
output_path = "{}/{}".format(self.datastore_path, watch_uuid) output_path = "{}/{}".format(self.datastore_path, watch_uuid)
# Incase the operator deleted it, check and create.
if not os.path.isdir(output_path):
mkdir(output_path)
fname = "{}/{}.stripped.txt".format(output_path, uuid.uuid4()) fname = "{}/{}.stripped.txt".format(output_path, uuid.uuid4())
with open(fname, 'wb') as f: with open(fname, 'wb') as f:
f.write(contents) f.write(contents)

@ -159,6 +159,9 @@ def test_check_notification(client, live_server):
with open("test-datastore/notification.txt", "r") as f: with open("test-datastore/notification.txt", "r") as f:
notification_submission = f.read() notification_submission = f.read()
print ("Notification submission was:", notification_submission)
# Re #342 - check for accidental python byte encoding of non-utf8/string
assert "b'" not in notification_submission
assert re.search('Watch UUID: [0-9a-f]{8}(-[0-9a-f]{4}){3}-[0-9a-f]{12}', notification_submission, re.IGNORECASE) assert re.search('Watch UUID: [0-9a-f]{8}(-[0-9a-f]{4}){3}-[0-9a-f]{12}', notification_submission, re.IGNORECASE)
assert "Watch title: my title" in notification_submission assert "Watch title: my title" in notification_submission

@ -2,7 +2,12 @@ import threading
import queue import queue
import time import time
# Requests for checking on the site use a pool of thread Workers managed by a Queue. # A single update worker
#
# Requests for checking on a single site(watch) from a queue of watches
# (another process inserts watches into the queue that are time-ready for checking)
class update_worker(threading.Thread): class update_worker(threading.Thread):
current_uuid = None current_uuid = None
@ -39,6 +44,13 @@ class update_worker(threading.Thread):
now = time.time() now = time.time()
changed_detected, update_obj, contents = update_handler.run(uuid) changed_detected, update_obj, contents = update_handler.run(uuid)
# Re #342
# In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.
# We then convert/.decode('utf-8') for the notification etc
if not isinstance(contents, (bytes, bytearray)):
raise Exception("Error - returned data from the fetch handler SHOULD be bytes")
# Always record that we atleast tried # Always record that we atleast tried
self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3)}) self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3)})
@ -111,7 +123,7 @@ class update_worker(threading.Thread):
n_object.update({ n_object.update({
'watch_url': watch['url'], 'watch_url': watch['url'],
'uuid': uuid, 'uuid': uuid,
'current_snapshot': str(contents), 'current_snapshot': contents.decode('utf-8'),
'diff_full': diff.render_diff(prev_fname, fname, line_feed_sep=line_feed_sep), 'diff_full': diff.render_diff(prev_fname, fname, line_feed_sep=line_feed_sep),
'diff': diff.render_diff(prev_fname, fname, True, line_feed_sep=line_feed_sep) 'diff': diff.render_diff(prev_fname, fname, True, line_feed_sep=line_feed_sep)
}) })

Loading…
Cancel
Save