Massive improvements to error handling - show separate output for non HTTP 200 status replies

pull/835/head
dgtlmoon 2 years ago committed by GitHub
parent 1eb5726cbf
commit 9942107016
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -115,18 +115,19 @@ def _jinja2_filter_datetime(watch_obj, format="%Y-%m-%d %H:%M:%S"):
return timeago.format(int(watch_obj['last_checked']), time.time()) return timeago.format(int(watch_obj['last_checked']), time.time())
# @app.context_processor
# def timeago():
# def _timeago(lower_time, now):
# return timeago.format(lower_time, now)
# return dict(timeago=_timeago)
@app.template_filter('format_timestamp_timeago') @app.template_filter('format_timestamp_timeago')
def _jinja2_filter_datetimestamp(timestamp, format="%Y-%m-%d %H:%M:%S"): def _jinja2_filter_datetimestamp(timestamp, format="%Y-%m-%d %H:%M:%S"):
if timestamp == False:
return 'Not yet'
return timeago.format(timestamp, time.time()) return timeago.format(timestamp, time.time())
# return timeago.format(timestamp, time.time())
# return datetime.datetime.utcfromtimestamp(timestamp).strftime(format) @app.template_filter('format_seconds_ago')
def _jinja2_filter_seconds_precise(timestamp):
if timestamp == False:
return 'Not yet'
return format(int(time.time()-timestamp), ',d')
# When nobody is logged in Flask-Login's current_user is set to an AnonymousUser object. # When nobody is logged in Flask-Login's current_user is set to an AnonymousUser object.
class User(flask_login.UserMixin): class User(flask_login.UserMixin):
@ -830,7 +831,7 @@ def changedetection_app(config=None, datastore_o=None):
previous_version_file_contents = "Unable to read {}.\n".format(previous_file) previous_version_file_contents = "Unable to read {}.\n".format(previous_file)
screenshot_url = datastore.get_screenshot(uuid) screenshot_url = watch.get_screenshot()
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver' system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
@ -850,7 +851,11 @@ def changedetection_app(config=None, datastore_o=None):
extra_title=" - Diff - {}".format(watch['title'] if watch['title'] else watch['url']), extra_title=" - Diff - {}".format(watch['title'] if watch['title'] else watch['url']),
left_sticky=True, left_sticky=True,
screenshot=screenshot_url, screenshot=screenshot_url,
is_html_webdriver=is_html_webdriver) is_html_webdriver=is_html_webdriver,
last_error=watch['last_error'],
last_error_text=watch.get_error_text(),
last_error_screenshot=watch.get_error_snapshot()
)
return output return output
@ -865,20 +870,34 @@ def changedetection_app(config=None, datastore_o=None):
if uuid == 'first': if uuid == 'first':
uuid = list(datastore.data['watching'].keys()).pop() uuid = list(datastore.data['watching'].keys()).pop()
# Normally you would never reach this, because the 'preview' button is not available when there's no history
# However they may try to clear snapshots and reload the page
if datastore.data['watching'][uuid].history_n == 0:
flash("Preview unavailable - No fetch/check completed or triggers not reached", "error")
return redirect(url_for('index'))
extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]
try: try:
watch = datastore.data['watching'][uuid] watch = datastore.data['watching'][uuid]
except KeyError: except KeyError:
flash("No history found for the specified link, bad link?", "error") flash("No history found for the specified link, bad link?", "error")
return redirect(url_for('index')) return redirect(url_for('index'))
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]
is_html_webdriver = True if watch.get('fetch_backend') == 'html_webdriver' or (
watch.get('fetch_backend', None) is None and system_uses_webdriver) else False
# Never requested successfully, but we detected a fetch error
if datastore.data['watching'][uuid].history_n == 0 and (watch.get_error_text() or watch.get_error_snapshot()):
flash("Preview unavailable - No fetch/check completed or triggers not reached", "error")
output = render_template("preview.html",
content=content,
history_n=watch.history_n,
extra_stylesheets=extra_stylesheets,
# current_diff_url=watch['url'],
watch=watch,
uuid=uuid,
is_html_webdriver=is_html_webdriver,
last_error=watch['last_error'],
last_error_text=watch.get_error_text(),
last_error_screenshot=watch.get_error_snapshot())
return output
timestamp = list(watch.history.keys())[-1] timestamp = list(watch.history.keys())[-1]
filename = watch.history[timestamp] filename = watch.history[timestamp]
@ -913,23 +932,20 @@ def changedetection_app(config=None, datastore_o=None):
except Exception as e: except Exception as e:
content.append({'line': "File doesnt exist or unable to read file {}".format(filename), 'classes': ''}) content.append({'line': "File doesnt exist or unable to read file {}".format(filename), 'classes': ''})
screenshot_url = datastore.get_screenshot(uuid)
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
is_html_webdriver = True if watch.get('fetch_backend') == 'html_webdriver' or (
watch.get('fetch_backend', None) is None and system_uses_webdriver) else False
output = render_template("preview.html", output = render_template("preview.html",
content=content, content=content,
history_n=watch.history_n,
extra_stylesheets=extra_stylesheets, extra_stylesheets=extra_stylesheets,
ignored_line_numbers=ignored_line_numbers, ignored_line_numbers=ignored_line_numbers,
triggered_line_numbers=trigger_line_numbers, triggered_line_numbers=trigger_line_numbers,
current_diff_url=watch['url'], current_diff_url=watch['url'],
screenshot=screenshot_url, screenshot=watch.get_screenshot(),
watch=watch, watch=watch,
uuid=uuid, uuid=uuid,
is_html_webdriver=is_html_webdriver) is_html_webdriver=is_html_webdriver,
last_error=watch['last_error'],
last_error_text=watch.get_error_text(),
last_error_screenshot=watch.get_error_snapshot())
return output return output
@ -1029,11 +1045,12 @@ def changedetection_app(config=None, datastore_o=None):
if datastore.data['settings']['application']['password'] and not flask_login.current_user.is_authenticated: if datastore.data['settings']['application']['password'] and not flask_login.current_user.is_authenticated:
abort(403) abort(403)
screenshot_filename = "last-screenshot.png" if not request.args.get('error_screenshot') else "last-error-screenshot.png"
# These files should be in our subdirectory # These files should be in our subdirectory
try: try:
# set nocache, set content-type # set nocache, set content-type
watch_dir = datastore_o.datastore_path + "/" + filename response = make_response(send_from_directory(os.path.join(datastore_o.datastore_path, filename), screenshot_filename))
response = make_response(send_from_directory(filename="last-screenshot.png", directory=watch_dir, path=watch_dir + "/last-screenshot.png"))
response.headers['Content-type'] = 'image/png' response.headers['Content-type'] = 'image/png'
response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate' response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate'
response.headers['Pragma'] = 'no-cache' response.headers['Pragma'] = 'no-cache'

@ -6,38 +6,63 @@ import requests
import time import time
import sys import sys
class Non200ErrorCodeReceived(Exception):
def __init__(self, status_code, url, screenshot=None, xpath_data=None, page_html=None):
# Set this so we can use it in other parts of the app
self.status_code = status_code
self.url = url
self.screenshot = screenshot
self.xpath_data = xpath_data
self.page_text = None
if page_html:
from changedetectionio import html_tools
self.page_text = html_tools.html_to_text(page_html)
return
class JSActionExceptions(Exception):
def __init__(self, status_code, url, screenshot, message=''):
self.status_code = status_code
self.url = url
self.screenshot = screenshot
self.message = message
return
class PageUnloadable(Exception): class PageUnloadable(Exception):
def __init__(self, status_code, url): def __init__(self, status_code, url, screenshot=False):
# Set this so we can use it in other parts of the app # Set this so we can use it in other parts of the app
self.status_code = status_code self.status_code = status_code
self.url = url self.url = url
self.screenshot = screenshot
return return
pass
class EmptyReply(Exception): class EmptyReply(Exception):
def __init__(self, status_code, url): def __init__(self, status_code, url, screenshot=None):
# Set this so we can use it in other parts of the app # Set this so we can use it in other parts of the app
self.status_code = status_code self.status_code = status_code
self.url = url self.url = url
self.screenshot = screenshot
return return
pass
class ScreenshotUnavailable(Exception): class ScreenshotUnavailable(Exception):
def __init__(self, status_code, url): def __init__(self, status_code, url, page_html=None):
# Set this so we can use it in other parts of the app # Set this so we can use it in other parts of the app
self.status_code = status_code self.status_code = status_code
self.url = url self.url = url
if page_html:
from html_tools import html_to_text
self.page_text = html_to_text(page_html)
return return
pass
class ReplyWithContentButNoText(Exception): class ReplyWithContentButNoText(Exception):
def __init__(self, status_code, url): def __init__(self, status_code, url, screenshot=None):
# Set this so we can use it in other parts of the app # Set this so we can use it in other parts of the app
self.status_code = status_code self.status_code = status_code
self.url = url self.url = url
self.screenshot = screenshot
return return
pass
class Fetcher(): class Fetcher():
error = None error = None
@ -325,6 +350,7 @@ class base_html_playwright(Fetcher):
browser.close() browser.close()
# This can be ok, we will try to grab what we could retrieve # This can be ok, we will try to grab what we could retrieve
pass pass
except Exception as e: except Exception as e:
print("other exception when page.goto") print("other exception when page.goto")
print(str(e)) print(str(e))
@ -344,18 +370,27 @@ class base_html_playwright(Fetcher):
time.sleep(extra_wait) time.sleep(extra_wait)
if self.webdriver_js_execute_code is not None: if self.webdriver_js_execute_code is not None:
try:
page.evaluate(self.webdriver_js_execute_code) page.evaluate(self.webdriver_js_execute_code)
time.sleep(2) except Exception as e:
# Is it possible to get a screenshot?
error_screenshot = False
try:
page.screenshot(type='jpeg',
clip={'x': 1.0, 'y': 1.0, 'width': 1280, 'height': 1024},
quality=1)
self.content = page.content() # The actual screenshot
self.status_code = response.status error_screenshot = page.screenshot(type='jpeg',
full_page=True,
quality=int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72)))
except Exception as s:
pass
if len(self.content.strip()) == 0: raise JSActionExceptions(status_code=response.status, screenshot=error_screenshot, message=str(e), url=url)
context.close()
browser.close()
print ("Content was empty")
raise EmptyReply(url=url, status_code=None)
self.content = page.content()
self.status_code = response.status
self.headers = response.all_headers() self.headers = response.all_headers()
if current_css_filter is not None: if current_css_filter is not None:
@ -382,9 +417,17 @@ class base_html_playwright(Fetcher):
browser.close() browser.close()
raise ScreenshotUnavailable(url=url, status_code=None) raise ScreenshotUnavailable(url=url, status_code=None)
if len(self.content.strip()) == 0:
context.close()
browser.close()
print("Content was empty")
raise EmptyReply(url=url, status_code=None, screenshot=self.screenshot)
context.close() context.close()
browser.close() browser.close()
if not ignore_status_codes and self.status_code!=200:
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, page_html=self.content, screenshot=self.screenshot)
class base_html_webdriver(Fetcher): class base_html_webdriver(Fetcher):
if os.getenv("WEBDRIVER_URL"): if os.getenv("WEBDRIVER_URL"):
@ -540,10 +583,14 @@ class html_requests(Fetcher):
if encoding: if encoding:
r.encoding = encoding r.encoding = encoding
if not r.content or not len(r.content):
raise EmptyReply(url=url, status_code=r.status_code)
# @todo test this # @todo test this
# @todo maybe you really want to test zero-byte return pages? # @todo maybe you really want to test zero-byte return pages?
if (not ignore_status_codes and not r) or not r.content or not len(r.content): if r.status_code != 200 and not ignore_status_codes:
raise EmptyReply(url=url, status_code=r.status_code) # maybe check with content works?
raise Non200ErrorCodeReceived(url=url, status_code=r.status_code, page_html=r.text)
self.status_code = r.status_code self.status_code = r.status_code
self.content = r.text self.content = r.text

@ -94,7 +94,7 @@ class perform_site_check():
url = self.datastore.get_val(uuid, 'url') url = self.datastore.get_val(uuid, 'url')
request_body = self.datastore.get_val(uuid, 'body') request_body = self.datastore.get_val(uuid, 'body')
request_method = self.datastore.get_val(uuid, 'method') request_method = self.datastore.get_val(uuid, 'method')
ignore_status_code = self.datastore.get_val(uuid, 'ignore_status_codes') ignore_status_codes = self.datastore.data['watching'][uuid].get('ignore_status_codes', False)
# source: support # source: support
is_source = False is_source = False
@ -124,7 +124,7 @@ class perform_site_check():
if watch['webdriver_js_execute_code'] is not None and watch['webdriver_js_execute_code'].strip(): if watch['webdriver_js_execute_code'] is not None and watch['webdriver_js_execute_code'].strip():
fetcher.webdriver_js_execute_code = watch['webdriver_js_execute_code'] fetcher.webdriver_js_execute_code = watch['webdriver_js_execute_code']
fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_code, watch['css_filter']) fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, watch['css_filter'])
fetcher.quit() fetcher.quit()
# Fetching complete, now filters # Fetching complete, now filters
@ -210,7 +210,7 @@ class perform_site_check():
# Treat pages with no renderable text content as a change? No by default # Treat pages with no renderable text content as a change? No by default
empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False) empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False)
if not is_json and not empty_pages_are_a_change and len(stripped_text_from_html.strip()) == 0: if not is_json and not empty_pages_are_a_change and len(stripped_text_from_html.strip()) == 0:
raise content_fetcher.ReplyWithContentButNoText(url=url, status_code=200) raise content_fetcher.ReplyWithContentButNoText(url=url, status_code=fetcher.get_last_status_code(), screenshot=screenshot)
# We rely on the actual text in the html output.. many sites have random script vars etc, # We rely on the actual text in the html output.. many sites have random script vars etc,
# in the future we'll implement other mechanisms. # in the future we'll implement other mechanisms.

@ -38,6 +38,7 @@ class model(dict):
'notification_format': default_notification_format, 'notification_format': default_notification_format,
'notification_muted': False, 'notification_muted': False,
'css_filter': '', 'css_filter': '',
'last_error': False,
'extract_text': [], # Extract text by regex after filters 'extract_text': [], # Extract text by regex after filters
'subtractive_selectors': [], 'subtractive_selectors': [],
'trigger_text': [], # List of text or regex to wait for until a change is detected 'trigger_text': [], # List of text or regex to wait for until a change is detected
@ -122,19 +123,17 @@ class model(dict):
bump = self.history bump = self.history
return self.__newest_history_key return self.__newest_history_key
# Save some text file to the appropriate path and bump the history # Save some text file to the appropriate path and bump the history
# result_obj from fetch_site_status.run() # result_obj from fetch_site_status.run()
def save_history_text(self, contents, timestamp): def save_history_text(self, contents, timestamp):
import uuid import uuid
from os import mkdir, path, unlink
import logging import logging
output_path = "{}/{}".format(self.__datastore_path, self['uuid']) output_path = "{}/{}".format(self.__datastore_path, self['uuid'])
# Incase the operator deleted it, check and create. # Incase the operator deleted it, check and create.
if not os.path.isdir(output_path): if not os.path.isdir(output_path):
mkdir(output_path) os.mkdir(output_path)
snapshot_fname = "{}/{}.stripped.txt".format(output_path, uuid.uuid4()) snapshot_fname = "{}/{}.stripped.txt".format(output_path, uuid.uuid4())
logging.debug("Saving history text {}".format(snapshot_fname)) logging.debug("Saving history text {}".format(snapshot_fname))
@ -172,7 +171,7 @@ class model(dict):
return seconds return seconds
# Iterate over all history texts and see if something new exists # Iterate over all history texts and see if something new exists
def lines_contain_something_unique_compared_to_history(self, lines=[]): def lines_contain_something_unique_compared_to_history(self, lines: list):
local_lines = set([l.decode('utf-8').strip().lower() for l in lines]) local_lines = set([l.decode('utf-8').strip().lower() for l in lines])
# Compare each lines (set) against each history text file (set) looking for something new.. # Compare each lines (set) against each history text file (set) looking for something new..
@ -184,3 +183,51 @@ class model(dict):
# Check that everything in local_lines(new stuff) already exists in existing_history - it should # Check that everything in local_lines(new stuff) already exists in existing_history - it should
# if not, something new happened # if not, something new happened
return not local_lines.issubset(existing_history) return not local_lines.issubset(existing_history)
def get_screenshot(self):
fname = os.path.join(self.__datastore_path, self['uuid'], "last-screenshot.png")
if os.path.isfile(fname):
return fname
return False
def __get_file_ctime(self, filename):
fname = os.path.join(self.__datastore_path, self['uuid'], filename)
if os.path.isfile(fname):
return int(os.path.getmtime(fname))
return False
@property
def error_text_ctime(self):
return self.__get_file_ctime('last-error.txt')
@property
def snapshot_text_ctime(self):
if self.history_n==0:
return False
timestamp = list(self.history.keys())[-1]
return int(timestamp)
@property
def snapshot_screenshot_ctime(self):
return self.__get_file_ctime('last-screenshot.png')
@property
def snapshot_error_screenshot_ctime(self):
return self.__get_file_ctime('last-error-screenshot.png')
def get_error_text(self):
"""Return the text saved from a previous request that resulted in a non-200 error"""
fname = os.path.join(self.__datastore_path, self['uuid'], "last-error.txt")
if os.path.isfile(fname):
with open(fname, 'r') as f:
return f.read()
return False
def get_error_snapshot(self):
"""Return path to the screenshot that resulted in a non-200 error"""
fname = os.path.join(self.__datastore_path, self['uuid'], "last-error-screenshot.png")
if os.path.isfile(fname):
return fname
return False

@ -32,6 +32,7 @@ docker run -d --name $$-test_selenium -p 4444:4444 --rm --shm-size="2g" seleni
sleep 5 sleep 5
export WEBDRIVER_URL=http://localhost:4444/wd/hub export WEBDRIVER_URL=http://localhost:4444/wd/hub
pytest tests/fetchers/test_content.py pytest tests/fetchers/test_content.py
pytest tests/test_errorhandling.py
unset WEBDRIVER_URL unset WEBDRIVER_URL
docker kill $$-test_selenium docker kill $$-test_selenium
@ -43,5 +44,7 @@ docker run -d --name $$-test_browserless -e "DEFAULT_LAUNCH_ARGS=[\"--window-siz
sleep 5 sleep 5
export PLAYWRIGHT_DRIVER_URL=ws://127.0.0.1:3000 export PLAYWRIGHT_DRIVER_URL=ws://127.0.0.1:3000
pytest tests/fetchers/test_content.py pytest tests/fetchers/test_content.py
pytest tests/test_errorhandling.py
unset PLAYWRIGHT_DRIVER_URL unset PLAYWRIGHT_DRIVER_URL
docker kill $$-test_browserless docker kill $$-test_browserless

@ -10,7 +10,13 @@ $(document).ready(function () {
if (hash_name === '#screenshot') { if (hash_name === '#screenshot') {
$("img#screenshot-img").attr('src', screenshot_url); $("img#screenshot-img").attr('src', screenshot_url);
$("#settings").hide(); $("#settings").hide();
} else { } else if (hash_name === '#error-screenshot') {
$("img#error-screenshot-img").attr('src', error_screenshot_url);
$("#settings").hide();
}
else {
$("#settings").show(); $("#settings").show();
} }
} }

@ -1,11 +1,5 @@
// Rewrite this is a plugin.. is all this JS really 'worth it?' // Rewrite this is a plugin.. is all this JS really 'worth it?'
if(!window.location.hash) {
var tab=document.querySelectorAll("#default-tab a");
tab[0].click();
}
window.addEventListener('hashchange', function () { window.addEventListener('hashchange', function () {
var tabs = document.getElementsByClassName('active'); var tabs = document.getElementsByClassName('active');
while (tabs[0]) { while (tabs[0]) {
@ -17,8 +11,7 @@ window.addEventListener('hashchange', function() {
var has_errors = document.querySelectorAll(".messages .error"); var has_errors = document.querySelectorAll(".messages .error");
if (!has_errors.length) { if (!has_errors.length) {
if (document.location.hash == "") { if (document.location.hash == "") {
document.location.hash = "#general"; document.querySelector(".tabs ul li:first-child a").click();
document.getElementById("default-tab").className = "active";
} else { } else {
set_active_tab(); set_active_tab();
} }

@ -539,3 +539,13 @@ ul {
100% { 100% {
-webkit-transform: rotate(360deg); -webkit-transform: rotate(360deg);
transform: rotate(360deg); } } transform: rotate(360deg); } }
.snapshot-age {
padding: 4px;
background-color: #dfdfdf;
border-radius: 3px;
font-weight: bold;
margin-bottom: 4px; }
.snapshot-age.error {
background-color: #ff0000;
color: #fff; }

@ -771,3 +771,15 @@ ul {
} }
} }
.snapshot-age {
padding: 4px;
background-color: #dfdfdf;
border-radius: 3px;
font-weight: bold;
margin-bottom: 4px;
&.error {
background-color: #ff0000;
color: #fff;
}
}

@ -336,14 +336,6 @@ class ChangeDetectionStore:
self.sync_to_json() self.sync_to_json()
return new_uuid return new_uuid
def get_screenshot(self, watch_uuid):
output_path = "{}/{}".format(self.datastore_path, watch_uuid)
fname = "{}/last-screenshot.png".format(output_path)
if path.isfile(fname):
return fname
return False
def visualselector_data_is_ready(self, watch_uuid): def visualselector_data_is_ready(self, watch_uuid):
output_path = "{}/{}".format(self.datastore_path, watch_uuid) output_path = "{}/{}".format(self.datastore_path, watch_uuid)
screenshot_filename = "{}/last-screenshot.png".format(output_path) screenshot_filename = "{}/last-screenshot.png".format(output_path)
@ -354,17 +346,32 @@ class ChangeDetectionStore:
return False return False
# Save as PNG, PNG is larger but better for doing visual diff in the future # Save as PNG, PNG is larger but better for doing visual diff in the future
def save_screenshot(self, watch_uuid, screenshot: bytes): def save_screenshot(self, watch_uuid, screenshot: bytes, as_error=False):
output_path = "{}/{}".format(self.datastore_path, watch_uuid)
fname = "{}/last-screenshot.png".format(output_path) if as_error:
with open(fname, 'wb') as f: target_path = os.path.join(self.datastore_path, watch_uuid, "last-error-screenshot.png")
else:
target_path = os.path.join(self.datastore_path, watch_uuid, "last-screenshot.png")
with open(target_path, 'wb') as f:
f.write(screenshot) f.write(screenshot)
f.close() f.close()
def save_xpath_data(self, watch_uuid, data): def save_error_text(self, watch_uuid, contents):
output_path = "{}/{}".format(self.datastore_path, watch_uuid)
fname = "{}/elements.json".format(output_path) target_path = os.path.join(self.datastore_path, watch_uuid, "last-error.txt")
with open(fname, 'w') as f:
with open(target_path, 'w') as f:
f.write(contents)
def save_xpath_data(self, watch_uuid, data, as_error=False):
if as_error:
target_path = os.path.join(self.datastore_path, watch_uuid, "elements.json")
else:
target_path = os.path.join(self.datastore_path, watch_uuid, "elements-error.json")
with open(target_path, 'w') as f:
f.write(json.dumps(data)) f.write(json.dumps(data))
f.close() f.close()

@ -3,6 +3,9 @@
{% block content %} {% block content %}
<script> <script>
const screenshot_url="{{url_for('static_content', group='screenshot', filename=uuid)}}"; const screenshot_url="{{url_for('static_content', group='screenshot', filename=uuid)}}";
{% if last_error_screenshot %}
const error_screenshot_url="{{url_for('static_content', group='screenshot', filename=uuid, error_screenshot=1) }}";
{% endif %}
</script> </script>
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='diff-overview.js')}}" defer></script> <script type="text/javascript" src="{{url_for('static_content', group='js', filename='diff-overview.js')}}" defer></script>
@ -43,15 +46,31 @@
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script> <script type="text/javascript" src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
<div class="tabs"> <div class="tabs">
<ul> <ul>
<li class="tab" id="default-tab"><a href="#text">Text</a></li> {% if last_error_text %}<li class="tab" id="error-text-tab"><a href="#error-text">Error Text</a></li> {% endif %}
{% if last_error_screenshot %}<li class="tab" id="error-screenshot-tab"><a href="#error-screenshot">Error Screenshot</a></li> {% endif %}
<li class="tab" id=""><a href="#text">Text</a></li>
<li class="tab" id="screenshot-tab"><a href="#screenshot">Screenshot</a></li> <li class="tab" id="screenshot-tab"><a href="#screenshot">Screenshot</a></li>
</ul> </ul>
</div> </div>
<div id="diff-ui"> <div id="diff-ui">
<div class="tab-pane-inner" id="error-text">
<div class="snapshot-age error">{{watch_a.error_text_ctime|format_seconds_ago}} seconds ago</div>
<pre>
{{ last_error_text }}
</pre>
</div>
<div class="tab-pane-inner" id="error-screenshot">
<div class="snapshot-age error">{{watch_a.snapshot_error_screenshot_ctime|format_seconds_ago}} seconds ago</div>
<img id="error-screenshot-img" style="max-width: 80%" alt="Current error-ing screenshot from most recent request"/>
</div>
<div class="tab-pane-inner" id="text"> <div class="tab-pane-inner" id="text">
<div class="tip">Pro-tip: Use <strong>show current snapshot</strong> tab to visualise what will be ignored. <div class="tip">Pro-tip: Use <strong>show current snapshot</strong> tab to visualise what will be ignored.
</div> </div>
<div class="snapshot-age">{{watch_a.snapshot_text_ctime|format_timestamp_timeago}}</div>
<table> <table>
<tbody> <tbody>
<tr> <tr>
@ -70,9 +89,9 @@
<div class="tip"> <div class="tip">
For now, Differences are performed on text, not graphically, only the latest screenshot is available. For now, Differences are performed on text, not graphically, only the latest screenshot is available.
</div> </div>
</br>
{% if is_html_webdriver %} {% if is_html_webdriver %}
{% if screenshot %} {% if screenshot %}
<div class="snapshot-age">{{watch_a.snapshot_screenshot_ctime|format_timestamp_timeago}}</div>
<img style="max-width: 80%" id="screenshot-img" alt="Current screenshot from most recent request"/> <img style="max-width: 80%" id="screenshot-img" alt="Current screenshot from most recent request"/>
{% else %} {% else %}
No screenshot available just yet! Try rechecking the page. No screenshot available just yet! Try rechecking the page.
@ -88,7 +107,6 @@
<script defer=""> <script defer="">
var a = document.getElementById('a'); var a = document.getElementById('a');
var b = document.getElementById('b'); var b = document.getElementById('b');
var result = document.getElementById('result'); var result = document.getElementById('result');

@ -23,7 +23,7 @@
<div class="tabs collapsable"> <div class="tabs collapsable">
<ul> <ul>
<li class="tab" id="default-tab"><a href="#general">General</a></li> <li class="tab" id=""><a href="#general">General</a></li>
<li class="tab"><a href="#request">Request</a></li> <li class="tab"><a href="#request">Request</a></li>
<li class="tab"><a id="visualselector-tab" href="#visualselector">Visual Filter Selector</a></li> <li class="tab"><a id="visualselector-tab" href="#visualselector">Visual Filter Selector</a></li>
<li class="tab"><a href="#filters-and-triggers">Filters &amp; Triggers</a></li> <li class="tab"><a href="#filters-and-triggers">Filters &amp; Triggers</a></li>
@ -87,6 +87,9 @@
</span> </span>
</div> </div>
{% endif %} {% endif %}
<div class="pure-control-group inline-radio">
{{ render_checkbox_field(form.ignore_status_codes) }}
</div>
<fieldset id="webdriver-override-options"> <fieldset id="webdriver-override-options">
<div class="pure-control-group"> <div class="pure-control-group">
{{ render_field(form.webdriver_delay) }} {{ render_field(form.webdriver_delay) }}
@ -128,11 +131,7 @@ User-Agent: wonderbra 1.0") }}
\"car\":null \"car\":null
}") }} }") }}
</div> </div>
<div id="ignore-status-codes-option">
{{ render_checkbox_field(form.ignore_status_codes) }}
</div>
</fieldset> </fieldset>
<br/>
</div> </div>
<div class="tab-pane-inner" id="notifications"> <div class="tab-pane-inner" id="notifications">

@ -5,7 +5,7 @@
<div class="tabs collapsable"> <div class="tabs collapsable">
<ul> <ul>
<li class="tab" id="default-tab"><a href="#url-list">URL List</a></li> <li class="tab" id=""><a href="#url-list">URL List</a></li>
<li class="tab"><a href="#distill-io">Distill.io</a></li> <li class="tab"><a href="#distill-io">Distill.io</a></li>
</ul> </ul>
</div> </div>

@ -3,23 +3,39 @@
{% block content %} {% block content %}
<script> <script>
const screenshot_url="{{url_for('static_content', group='screenshot', filename=uuid)}}"; const screenshot_url="{{url_for('static_content', group='screenshot', filename=uuid)}}";
{% if last_error_screenshot %}
const error_screenshot_url="{{url_for('static_content', group='screenshot', filename=uuid, error_screenshot=1) }}";
{% endif %}
</script> </script>
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='diff-overview.js')}}" defer></script> <script type="text/javascript" src="{{url_for('static_content', group='js', filename='diff-overview.js')}}" defer></script>
<div id="settings">
<h1>Current - {{watch.last_checked|format_timestamp_timeago}}</h1>
</div>
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script> <script type="text/javascript" src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
<div class="tabs"> <div class="tabs">
<ul> <ul>
<li class="tab" id="default-tab"><a href="#text">Text</a></li> {% if last_error_text %}<li class="tab" id="error-text-tab"><a href="#error-text">Error Text</a></li> {% endif %}
{% if last_error_screenshot %}<li class="tab" id="error-screenshot-tab"><a href="#error-screenshot">Error Screenshot</a></li> {% endif %}
{% if history_n > 0 %}
<li class="tab" id="text-tab"><a href="#text">Text</a></li>
<li class="tab" id="screenshot-tab"><a href="#screenshot">Screenshot</a></li> <li class="tab" id="screenshot-tab"><a href="#screenshot">Screenshot</a></li>
{% endif %}
</ul> </ul>
</div> </div>
<div id="diff-ui"> <div id="diff-ui">
<div class="tab-pane-inner" id="error-text">
<div class="snapshot-age error">{{watch.error_text_ctime|format_seconds_ago}} seconds ago</div>
<pre>
{{ last_error_text }}
</pre>
</div>
<div class="tab-pane-inner" id="error-screenshot">
<div class="snapshot-age error">{{watch.snapshot_error_screenshot_ctime|format_seconds_ago}} seconds ago</div>
<img id="error-screenshot-img" style="max-width: 80%" alt="Current erroring screenshot from most recent request"/>
</div>
<div class="tab-pane-inner" id="text"> <div class="tab-pane-inner" id="text">
<div class="snapshot-age">{{watch.snapshot_text_ctime|format_timestamp_timeago}}</div>
<span class="ignored">Grey lines are ignored</span> <span class="triggered">Blue lines are triggers</span> <span class="ignored">Grey lines are ignored</span> <span class="triggered">Blue lines are triggers</span>
<table> <table>
<tbody> <tbody>
@ -33,6 +49,7 @@
</tbody> </tbody>
</table> </table>
</div> </div>
<div class="tab-pane-inner" id="screenshot"> <div class="tab-pane-inner" id="screenshot">
<div class="tip"> <div class="tip">
For now, Differences are performed on text, not graphically, only the latest screenshot is available. For now, Differences are performed on text, not graphically, only the latest screenshot is available.

@ -16,7 +16,7 @@
<div class="edit-form"> <div class="edit-form">
<div class="tabs collapsable"> <div class="tabs collapsable">
<ul> <ul>
<li class="tab" id="default-tab"><a href="#general">General</a></li> <li class="tab" id=""><a href="#general">General</a></li>
<li class="tab"><a href="#notifications">Notifications</a></li> <li class="tab"><a href="#notifications">Notifications</a></li>
<li class="tab"><a href="#fetching">Fetching</a></li> <li class="tab"><a href="#fetching">Fetching</a></li>
<li class="tab"><a href="#filters">Global Filters</a></li> <li class="tab"><a href="#filters">Global Filters</a></li>

@ -90,7 +90,7 @@
{% if watch.history_n >= 2 %} {% if watch.history_n >= 2 %}
<a href="{{ url_for('diff_history_page', uuid=watch.uuid) }}" target="{{watch.uuid}}" class="pure-button button-small pure-button-primary diff-link">Diff</a> <a href="{{ url_for('diff_history_page', uuid=watch.uuid) }}" target="{{watch.uuid}}" class="pure-button button-small pure-button-primary diff-link">Diff</a>
{% else %} {% else %}
{% if watch.history_n == 1 %} {% if watch.history_n == 1 or (watch.history_n ==0 and watch.error_text_ctime )%}
<a href="{{ url_for('preview_page', uuid=watch.uuid)}}" target="{{watch.uuid}}" class="pure-button button-small pure-button-primary">Preview</a> <a href="{{ url_for('preview_page', uuid=watch.uuid)}}" target="{{watch.uuid}}" class="pure-button button-small pure-button-primary">Preview</a>
{% endif %} {% endif %}
{% endif %} {% endif %}

@ -11,16 +11,17 @@ def test_setup(live_server):
live_server_setup(live_server) live_server_setup(live_server)
def test_error_handler(client, live_server): def _runner_test_http_errors(client, live_server, http_code, expected_text):
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write("Now you going to get a {} error code\n".format(http_code))
# Give the endpoint time to spin up
time.sleep(1)
# Add our URL to the import page # Add our URL to the import page
test_url = url_for('test_endpoint', test_url = url_for('test_endpoint',
status_code=403, status_code=http_code,
_external=True) _external=True)
res = client.post( res = client.post(
url_for("import_page"), url_for("import_page"),
data={"urls": test_url}, data={"urls": test_url},
@ -29,15 +30,38 @@ def test_error_handler(client, live_server):
assert b"1 Imported" in res.data assert b"1 Imported" in res.data
# Give the thread time to pick it up # Give the thread time to pick it up
time.sleep(3) time.sleep(2)
res = client.get(url_for("index")) res = client.get(url_for("index"))
# no change
assert b'unviewed' not in res.data assert b'unviewed' not in res.data
assert b'Status Code 403' in res.data assert bytes(expected_text.encode('utf-8')) in res.data
assert bytes("just now".encode('utf-8')) in res.data
# Error viewing tabs should appear
res = client.get(
url_for("preview_page", uuid="first"),
follow_redirects=True
)
assert b'Error Text' in res.data
# 'Error Screenshot' only when in playwright mode
#assert b'Error Screenshot' in res.data
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
def test_http_error_handler(client, live_server):
_runner_test_http_errors(client, live_server, 403, 'Access denied')
_runner_test_http_errors(client, live_server, 404, 'Page not found')
_runner_test_http_errors(client, live_server, 500, '(Internal server Error) received')
_runner_test_http_errors(client, live_server, 400, 'Error - Request returned a HTTP error code 400')
# Just to be sure error text is properly handled # Just to be sure error text is properly handled
def test_error_text_handler(client, live_server): def test_DNS_errors(client, live_server):
# Give the endpoint time to spin up # Give the endpoint time to spin up
time.sleep(1) time.sleep(1)
@ -54,5 +78,6 @@ def test_error_text_handler(client, live_server):
res = client.get(url_for("index")) res = client.get(url_for("index"))
assert b'Name or service not known' in res.data assert b'Name or service not known' in res.data
# Should always record that we tried
assert bytes("just now".encode('utf-8')) in res.data assert bytes("just now".encode('utf-8')) in res.data

@ -137,54 +137,3 @@ def test_403_page_check_works_with_ignore_status_code(client, live_server):
res = client.get(url_for("index")) res = client.get(url_for("index"))
assert b'unviewed' in res.data assert b'unviewed' in res.data
# Tests the whole stack works with staus codes ignored
def test_403_page_check_fails_without_ignore_status_code(client, live_server):
sleep_time_for_fetch_thread = 3
set_original_response()
# Give the endpoint time to spin up
time.sleep(1)
# Add our URL to the import page
test_url = url_for('test_endpoint', status_code=403, _external=True)
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
# Trigger a check
client.get(url_for("form_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
# Goto the edit page, check our ignore option
# Add our URL to the import page
res = client.post(
url_for("edit_page", uuid="first"),
data={"url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
follow_redirects=True
)
assert b"Updated watch." in res.data
# Trigger a check
client.get(url_for("form_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
# Make a change
set_some_changed_response()
# Trigger a check
client.get(url_for("form_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
# It should have 'unviewed' still
# Because it should be looking at only that 'sametext' id
res = client.get(url_for("index"))
assert b'Status Code 403' in res.data

@ -1,3 +1,4 @@
import os
import threading import threading
import queue import queue
import time import time
@ -107,6 +108,14 @@ class update_worker(threading.Thread):
self.notification_q.put(n_object) self.notification_q.put(n_object)
print("Sent filter not found notification for {}".format(watch_uuid)) print("Sent filter not found notification for {}".format(watch_uuid))
def cleanup_error_artifacts(self, uuid):
# All went fine, remove error artifacts
cleanup_files = ["last-error-screenshot.png", "last-error.txt"]
for f in cleanup_files:
full_path = os.path.join(self.datastore.datastore_path, uuid, f)
if os.path.isfile(full_path):
os.unlink(full_path)
def run(self): def run(self):
from changedetectionio import fetch_site_status from changedetectionio import fetch_site_status
@ -146,7 +155,31 @@ class update_worker(threading.Thread):
# Totally fine, it's by choice - just continue on, nothing more to care about # Totally fine, it's by choice - just continue on, nothing more to care about
# Page had elements/content but no renderable text # Page had elements/content but no renderable text
# Backend (not filters) gave zero output # Backend (not filters) gave zero output
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Got HTML content but no text found."}) self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Got HTML content but no text found (With {} reply code).".format(e.status_code)})
if e.screenshot:
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot)
process_changedetection_results = False
except content_fetcher.Non200ErrorCodeReceived as e:
if e.status_code == 403:
err_text = "Error - 403 (Access denied) received"
elif e.status_code == 404:
err_text = "Error - 404 (Page not found) received"
elif e.status_code == 500:
err_text = "Error - 500 (Internal server Error) received"
else:
err_text = "Error - Request returned a HTTP error code {}".format(str(e.status_code))
if e.screenshot:
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True)
if e.xpath_data:
self.datastore.save_xpath_data(watch_uuid=uuid, data=e.xpath_data, as_error=True)
if e.page_text:
self.datastore.save_error_text(watch_uuid=uuid, contents=e.page_text)
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
# So that we get a trigger when the content is added again
'previous_md5': ''})
process_changedetection_results = False process_changedetection_results = False
except FilterNotFoundInResponse as e: except FilterNotFoundInResponse as e:
@ -182,8 +215,17 @@ class update_worker(threading.Thread):
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
'last_check_status': e.status_code}) 'last_check_status': e.status_code})
process_changedetection_results = False process_changedetection_results = False
except content_fetcher.JSActionExceptions as e:
err_text = "Error running JS Actions - Page request - "+e.message
if e.screenshot:
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True)
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
'last_check_status': e.status_code})
except content_fetcher.PageUnloadable as e: except content_fetcher.PageUnloadable as e:
err_text = "Page request from server didnt respond correctly" err_text = "Page request from server didnt respond correctly"
if e.screenshot:
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True)
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
'last_check_status': e.status_code}) 'last_check_status': e.status_code})
except Exception as e: except Exception as e:
@ -192,9 +234,13 @@ class update_worker(threading.Thread):
# Other serious error # Other serious error
process_changedetection_results = False process_changedetection_results = False
else: else:
# Mark that we never had any failures # Mark that we never had any failures
if not self.datastore.data['watching'][uuid].get('ignore_status_codes'):
update_obj['consecutive_filter_failures'] = 0 update_obj['consecutive_filter_failures'] = 0
self.cleanup_error_artifacts(uuid)
# Crash protection, the watch entry could have been removed by this point (during a slow chrome fetch etc) # Crash protection, the watch entry could have been removed by this point (during a slow chrome fetch etc)
if not self.datastore.data['watching'].get(uuid): if not self.datastore.data['watching'].get(uuid):
continue continue

Loading…
Cancel
Save