Better handling of EmptyReply exception, always bump 'last_checked' in the case of an error (#354)

* Better handling of EmptyReply exception, always bump 'last_checked' in the case of an error, adds test
pull/359/head
dgtlmoon 3 years ago committed by GitHub
parent f166ab1e30
commit f87f7077a6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -9,6 +9,12 @@ import urllib3.exceptions
class EmptyReply(Exception): class EmptyReply(Exception):
def __init__(self, status_code, url):
# Set this so we can use it in other parts of the app
self.status_code = status_code
self.url = url
return
pass pass
class Fetcher(): class Fetcher():
@ -110,6 +116,8 @@ class html_webdriver(Fetcher):
# @todo - how to check this? is it possible? # @todo - how to check this? is it possible?
self.status_code = 200 self.status_code = 200
# @todo somehow we should try to get this working for WebDriver
# raise EmptyReply(url=url, status_code=r.status_code)
# @todo - dom wait loaded? # @todo - dom wait loaded?
time.sleep(5) time.sleep(5)
@ -151,10 +159,10 @@ class html_requests(Fetcher):
# Return bytes here # Return bytes here
html = r.text html = r.text
# @todo test this # @todo test this
# @todo maybe you really want to test zero-byte return pages?
if not r or not html or not len(html): if not r or not html or not len(html):
raise EmptyReply(url) raise EmptyReply(url=url, status_code=r.status_code)
self.status_code = r.status_code self.status_code = r.status_code
self.content = html self.content = html

@ -58,9 +58,7 @@ class perform_site_check():
watch = self.datastore.data['watching'][uuid] watch = self.datastore.data['watching'][uuid]
update_obj = { update_obj = {}
"last_checked": timestamp
}
extra_headers = self.datastore.get_val(uuid, 'headers') extra_headers = self.datastore.get_val(uuid, 'headers')

@ -16,7 +16,6 @@ def css_filter(css_filter, html_content):
return html_block + "\n" return html_block + "\n"
# Extract/find element # Extract/find element
def extract_element(find='title', html_content=''): def extract_element(find='title', html_content=''):

@ -0,0 +1,38 @@
#!/usr/bin/python3
import time
from flask import url_for
from . util import live_server_setup
from ..html_tools import *
def test_setup(live_server):
live_server_setup(live_server)
def test_error_handler(client, live_server):
# Give the endpoint time to spin up
time.sleep(1)
# Add our URL to the import page
test_url = url_for('test_endpoint_403_error', _external=True)
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
# Trigger a check
client.get(url_for("api_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(3)
res = client.get(url_for("index"))
assert b'unviewed' not in res.data
assert b'Status Code 403' in res.data
assert bytes("just now".encode('utf-8')) in res.data

@ -54,6 +54,13 @@ def live_server_setup(live_server):
resp.headers['Content-Type'] = 'application/json' resp.headers['Content-Type'] = 'application/json'
return resp return resp
@live_server.app.route('/test-403')
def test_endpoint_403_error():
from flask import make_response
resp = make_response('', 403)
return resp
# Just return the headers in the request # Just return the headers in the request
@live_server.app.route('/test-headers') @live_server.app.route('/test-headers')
def test_headers(): def test_headers():

@ -39,9 +39,10 @@ class update_worker(threading.Thread):
changed_detected = False changed_detected = False
contents = "" contents = ""
update_obj= {} update_obj= {}
now = time.time()
try: try:
now = time.time()
changed_detected, update_obj, contents = update_handler.run(uuid) changed_detected, update_obj, contents = update_handler.run(uuid)
# Re #342 # Re #342
@ -51,14 +52,13 @@ class update_worker(threading.Thread):
raise Exception("Error - returned data from the fetch handler SHOULD be bytes") raise Exception("Error - returned data from the fetch handler SHOULD be bytes")
# Always record that we atleast tried
self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3)})
except PermissionError as e: except PermissionError as e:
self.app.logger.error("File permission error updating", uuid, str(e)) self.app.logger.error("File permission error updating", uuid, str(e))
except content_fetcher.EmptyReply as e: except content_fetcher.EmptyReply as e:
self.datastore.update_watch(uuid=uuid, update_obj={'last_error':str(e)}) # Some kind of custom to-str handler in the exception handler that does this?
err_text = "EmptyReply: Status Code {}".format(e.status_code)
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
'last_check_status': e.status_code})
except Exception as e: except Exception as e:
self.app.logger.error("Exception reached processing watch UUID:%s - %s", uuid, str(e)) self.app.logger.error("Exception reached processing watch UUID:%s - %s", uuid, str(e))
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)}) self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
@ -66,13 +66,14 @@ class update_worker(threading.Thread):
else: else:
try: try:
watch = self.datastore.data['watching'][uuid] watch = self.datastore.data['watching'][uuid]
fname = "" # Saved history text filename
# For the FIRST time we check a site, or a change detected, save the snapshot. # For the FIRST time we check a site, or a change detected, save the snapshot.
if changed_detected or not watch['last_checked']: if changed_detected or not watch['last_checked']:
# A change was detected # A change was detected
fname = self.datastore.save_history_text(watch_uuid=uuid, contents=contents) fname = self.datastore.save_history_text(watch_uuid=uuid, contents=contents)
# Should always be keyed by string(timestamp) # Should always be keyed by string(timestamp)
self.datastore.update_watch(uuid, {"history": {str(update_obj["last_checked"]): fname}}) self.datastore.update_watch(uuid, {"history": {str(round(time.time())): fname}})
# Generally update anything interesting returned # Generally update anything interesting returned
self.datastore.update_watch(uuid=uuid, update_obj=update_obj) self.datastore.update_watch(uuid=uuid, update_obj=update_obj)
@ -136,6 +137,11 @@ class update_worker(threading.Thread):
# Catch everything possible here, so that if a worker crashes, we don't lose it until restart! # Catch everything possible here, so that if a worker crashes, we don't lose it until restart!
print("!!!! Exception in update_worker !!!\n", e) print("!!!! Exception in update_worker !!!\n", e)
finally:
# Always record that we atleast tried
self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3),
'last_checked': round(time.time())})
self.current_uuid = None # Done self.current_uuid = None # Done
self.q.task_done() self.q.task_done()

Loading…
Cancel
Save