diff --git a/changedetectionio/__init__.py b/changedetectionio/__init__.py index ec43b2be..617e9874 100644 --- a/changedetectionio/__init__.py +++ b/changedetectionio/__init__.py @@ -115,18 +115,19 @@ def _jinja2_filter_datetime(watch_obj, format="%Y-%m-%d %H:%M:%S"): return timeago.format(int(watch_obj['last_checked']), time.time()) - -# @app.context_processor -# def timeago(): -# def _timeago(lower_time, now): -# return timeago.format(lower_time, now) -# return dict(timeago=_timeago) - @app.template_filter('format_timestamp_timeago') def _jinja2_filter_datetimestamp(timestamp, format="%Y-%m-%d %H:%M:%S"): + if timestamp == False: + return 'Not yet' + return timeago.format(timestamp, time.time()) - # return timeago.format(timestamp, time.time()) - # return datetime.datetime.utcfromtimestamp(timestamp).strftime(format) + +@app.template_filter('format_seconds_ago') +def _jinja2_filter_seconds_precise(timestamp): + if timestamp == False: + return 'Not yet' + + return format(int(time.time()-timestamp), ',d') # When nobody is logged in Flask-Login's current_user is set to an AnonymousUser object. class User(flask_login.UserMixin): @@ -830,7 +831,7 @@ def changedetection_app(config=None, datastore_o=None): previous_version_file_contents = "Unable to read {}.\n".format(previous_file) - screenshot_url = datastore.get_screenshot(uuid) + screenshot_url = watch.get_screenshot() system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver' @@ -850,7 +851,11 @@ def changedetection_app(config=None, datastore_o=None): extra_title=" - Diff - {}".format(watch['title'] if watch['title'] else watch['url']), left_sticky=True, screenshot=screenshot_url, - is_html_webdriver=is_html_webdriver) + is_html_webdriver=is_html_webdriver, + last_error=watch['last_error'], + last_error_text=watch.get_error_text(), + last_error_screenshot=watch.get_error_snapshot() + ) return output @@ -865,20 +870,34 @@ def changedetection_app(config=None, datastore_o=None): if uuid == 'first': uuid = list(datastore.data['watching'].keys()).pop() - # Normally you would never reach this, because the 'preview' button is not available when there's no history - # However they may try to clear snapshots and reload the page - if datastore.data['watching'][uuid].history_n == 0: - flash("Preview unavailable - No fetch/check completed or triggers not reached", "error") - return redirect(url_for('index')) - - extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')] - try: watch = datastore.data['watching'][uuid] except KeyError: flash("No history found for the specified link, bad link?", "error") return redirect(url_for('index')) + system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver' + extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')] + + + is_html_webdriver = True if watch.get('fetch_backend') == 'html_webdriver' or ( + watch.get('fetch_backend', None) is None and system_uses_webdriver) else False + + # Never requested successfully, but we detected a fetch error + if datastore.data['watching'][uuid].history_n == 0 and (watch.get_error_text() or watch.get_error_snapshot()): + flash("Preview unavailable - No fetch/check completed or triggers not reached", "error") + output = render_template("preview.html", + content=content, + history_n=watch.history_n, + extra_stylesheets=extra_stylesheets, +# current_diff_url=watch['url'], + watch=watch, + uuid=uuid, + is_html_webdriver=is_html_webdriver, + last_error=watch['last_error'], + last_error_text=watch.get_error_text(), + last_error_screenshot=watch.get_error_snapshot()) + return output timestamp = list(watch.history.keys())[-1] filename = watch.history[timestamp] @@ -913,23 +932,20 @@ def changedetection_app(config=None, datastore_o=None): except Exception as e: content.append({'line': "File doesnt exist or unable to read file {}".format(filename), 'classes': ''}) - - screenshot_url = datastore.get_screenshot(uuid) - system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver' - - is_html_webdriver = True if watch.get('fetch_backend') == 'html_webdriver' or ( - watch.get('fetch_backend', None) is None and system_uses_webdriver) else False - output = render_template("preview.html", content=content, + history_n=watch.history_n, extra_stylesheets=extra_stylesheets, ignored_line_numbers=ignored_line_numbers, triggered_line_numbers=trigger_line_numbers, current_diff_url=watch['url'], - screenshot=screenshot_url, + screenshot=watch.get_screenshot(), watch=watch, uuid=uuid, - is_html_webdriver=is_html_webdriver) + is_html_webdriver=is_html_webdriver, + last_error=watch['last_error'], + last_error_text=watch.get_error_text(), + last_error_screenshot=watch.get_error_snapshot()) return output @@ -1029,11 +1045,12 @@ def changedetection_app(config=None, datastore_o=None): if datastore.data['settings']['application']['password'] and not flask_login.current_user.is_authenticated: abort(403) + screenshot_filename = "last-screenshot.png" if not request.args.get('error_screenshot') else "last-error-screenshot.png" + # These files should be in our subdirectory try: # set nocache, set content-type - watch_dir = datastore_o.datastore_path + "/" + filename - response = make_response(send_from_directory(filename="last-screenshot.png", directory=watch_dir, path=watch_dir + "/last-screenshot.png")) + response = make_response(send_from_directory(os.path.join(datastore_o.datastore_path, filename), screenshot_filename)) response.headers['Content-type'] = 'image/png' response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate' response.headers['Pragma'] = 'no-cache' diff --git a/changedetectionio/content_fetcher.py b/changedetectionio/content_fetcher.py index b97f1e61..049cd18a 100644 --- a/changedetectionio/content_fetcher.py +++ b/changedetectionio/content_fetcher.py @@ -6,38 +6,63 @@ import requests import time import sys + +class Non200ErrorCodeReceived(Exception): + def __init__(self, status_code, url, screenshot=None, xpath_data=None, page_html=None): + # Set this so we can use it in other parts of the app + self.status_code = status_code + self.url = url + self.screenshot = screenshot + self.xpath_data = xpath_data + self.page_text = None + + if page_html: + from changedetectionio import html_tools + self.page_text = html_tools.html_to_text(page_html) + return + + +class JSActionExceptions(Exception): + def __init__(self, status_code, url, screenshot, message=''): + self.status_code = status_code + self.url = url + self.screenshot = screenshot + self.message = message + return + class PageUnloadable(Exception): - def __init__(self, status_code, url): + def __init__(self, status_code, url, screenshot=False): # Set this so we can use it in other parts of the app self.status_code = status_code self.url = url + self.screenshot = screenshot return - pass class EmptyReply(Exception): - def __init__(self, status_code, url): + def __init__(self, status_code, url, screenshot=None): # Set this so we can use it in other parts of the app self.status_code = status_code self.url = url + self.screenshot = screenshot return - pass class ScreenshotUnavailable(Exception): - def __init__(self, status_code, url): + def __init__(self, status_code, url, page_html=None): # Set this so we can use it in other parts of the app self.status_code = status_code self.url = url + if page_html: + from html_tools import html_to_text + self.page_text = html_to_text(page_html) return - pass class ReplyWithContentButNoText(Exception): - def __init__(self, status_code, url): + def __init__(self, status_code, url, screenshot=None): # Set this so we can use it in other parts of the app self.status_code = status_code self.url = url + self.screenshot = screenshot return - pass - class Fetcher(): error = None @@ -180,7 +205,7 @@ class Fetcher(): system_https_proxy = os.getenv('HTTPS_PROXY') # Time ONTOP of the system defined env minimum time - render_extract_delay=0 + render_extract_delay = 0 @abstractmethod def get_error(self): @@ -325,9 +350,10 @@ class base_html_playwright(Fetcher): browser.close() # This can be ok, we will try to grab what we could retrieve pass + except Exception as e: - print ("other exception when page.goto") - print (str(e)) + print("other exception when page.goto") + print(str(e)) context.close() browser.close() raise PageUnloadable(url=url, status_code=None) @@ -335,27 +361,36 @@ class base_html_playwright(Fetcher): if response is None: context.close() browser.close() - print ("response object was none") + print("response object was none") raise EmptyReply(url=url, status_code=None) # Bug 2(?) Set the viewport size AFTER loading the page - page.set_viewport_size({"width": 1280, "height": 1024}) + page.set_viewport_size({"width": 1280, "height": 1024}) extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay time.sleep(extra_wait) if self.webdriver_js_execute_code is not None: - page.evaluate(self.webdriver_js_execute_code) - time.sleep(2) + try: + page.evaluate(self.webdriver_js_execute_code) + except Exception as e: + # Is it possible to get a screenshot? + error_screenshot = False + try: + page.screenshot(type='jpeg', + clip={'x': 1.0, 'y': 1.0, 'width': 1280, 'height': 1024}, + quality=1) + + # The actual screenshot + error_screenshot = page.screenshot(type='jpeg', + full_page=True, + quality=int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72))) + except Exception as s: + pass + + raise JSActionExceptions(status_code=response.status, screenshot=error_screenshot, message=str(e), url=url) self.content = page.content() self.status_code = response.status - - if len(self.content.strip()) == 0: - context.close() - browser.close() - print ("Content was empty") - raise EmptyReply(url=url, status_code=None) - self.headers = response.all_headers() if current_css_filter is not None: @@ -382,9 +417,17 @@ class base_html_playwright(Fetcher): browser.close() raise ScreenshotUnavailable(url=url, status_code=None) + if len(self.content.strip()) == 0: + context.close() + browser.close() + print("Content was empty") + raise EmptyReply(url=url, status_code=None, screenshot=self.screenshot) + context.close() browser.close() + if not ignore_status_codes and self.status_code!=200: + raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, page_html=self.content, screenshot=self.screenshot) class base_html_webdriver(Fetcher): if os.getenv("WEBDRIVER_URL"): @@ -512,7 +555,7 @@ class html_requests(Fetcher): ignore_status_codes=False, current_css_filter=None): - proxies={} + proxies = {} # Allows override the proxy on a per-request basis if self.proxy_override: @@ -540,10 +583,14 @@ class html_requests(Fetcher): if encoding: r.encoding = encoding + if not r.content or not len(r.content): + raise EmptyReply(url=url, status_code=r.status_code) + # @todo test this # @todo maybe you really want to test zero-byte return pages? - if (not ignore_status_codes and not r) or not r.content or not len(r.content): - raise EmptyReply(url=url, status_code=r.status_code) + if r.status_code != 200 and not ignore_status_codes: + # maybe check with content works? + raise Non200ErrorCodeReceived(url=url, status_code=r.status_code, page_html=r.text) self.status_code = r.status_code self.content = r.text diff --git a/changedetectionio/fetch_site_status.py b/changedetectionio/fetch_site_status.py index 48342d93..ea94181e 100644 --- a/changedetectionio/fetch_site_status.py +++ b/changedetectionio/fetch_site_status.py @@ -94,7 +94,7 @@ class perform_site_check(): url = self.datastore.get_val(uuid, 'url') request_body = self.datastore.get_val(uuid, 'body') request_method = self.datastore.get_val(uuid, 'method') - ignore_status_code = self.datastore.get_val(uuid, 'ignore_status_codes') + ignore_status_codes = self.datastore.data['watching'][uuid].get('ignore_status_codes', False) # source: support is_source = False @@ -124,7 +124,7 @@ class perform_site_check(): if watch['webdriver_js_execute_code'] is not None and watch['webdriver_js_execute_code'].strip(): fetcher.webdriver_js_execute_code = watch['webdriver_js_execute_code'] - fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_code, watch['css_filter']) + fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, watch['css_filter']) fetcher.quit() # Fetching complete, now filters @@ -210,7 +210,7 @@ class perform_site_check(): # Treat pages with no renderable text content as a change? No by default empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False) if not is_json and not empty_pages_are_a_change and len(stripped_text_from_html.strip()) == 0: - raise content_fetcher.ReplyWithContentButNoText(url=url, status_code=200) + raise content_fetcher.ReplyWithContentButNoText(url=url, status_code=fetcher.get_last_status_code(), screenshot=screenshot) # We rely on the actual text in the html output.. many sites have random script vars etc, # in the future we'll implement other mechanisms. diff --git a/changedetectionio/model/Watch.py b/changedetectionio/model/Watch.py index 458a6fd2..ad1b1b42 100644 --- a/changedetectionio/model/Watch.py +++ b/changedetectionio/model/Watch.py @@ -38,6 +38,7 @@ class model(dict): 'notification_format': default_notification_format, 'notification_muted': False, 'css_filter': '', + 'last_error': False, 'extract_text': [], # Extract text by regex after filters 'subtractive_selectors': [], 'trigger_text': [], # List of text or regex to wait for until a change is detected @@ -122,19 +123,17 @@ class model(dict): bump = self.history return self.__newest_history_key - # Save some text file to the appropriate path and bump the history # result_obj from fetch_site_status.run() def save_history_text(self, contents, timestamp): import uuid - from os import mkdir, path, unlink import logging output_path = "{}/{}".format(self.__datastore_path, self['uuid']) # Incase the operator deleted it, check and create. if not os.path.isdir(output_path): - mkdir(output_path) + os.mkdir(output_path) snapshot_fname = "{}/{}.stripped.txt".format(output_path, uuid.uuid4()) logging.debug("Saving history text {}".format(snapshot_fname)) @@ -172,7 +171,7 @@ class model(dict): return seconds # Iterate over all history texts and see if something new exists - def lines_contain_something_unique_compared_to_history(self, lines=[]): + def lines_contain_something_unique_compared_to_history(self, lines: list): local_lines = set([l.decode('utf-8').strip().lower() for l in lines]) # Compare each lines (set) against each history text file (set) looking for something new.. @@ -184,3 +183,51 @@ class model(dict): # Check that everything in local_lines(new stuff) already exists in existing_history - it should # if not, something new happened return not local_lines.issubset(existing_history) + + def get_screenshot(self): + fname = os.path.join(self.__datastore_path, self['uuid'], "last-screenshot.png") + if os.path.isfile(fname): + return fname + + return False + + def __get_file_ctime(self, filename): + fname = os.path.join(self.__datastore_path, self['uuid'], filename) + if os.path.isfile(fname): + return int(os.path.getmtime(fname)) + return False + + @property + def error_text_ctime(self): + return self.__get_file_ctime('last-error.txt') + + @property + def snapshot_text_ctime(self): + if self.history_n==0: + return False + + timestamp = list(self.history.keys())[-1] + return int(timestamp) + + @property + def snapshot_screenshot_ctime(self): + return self.__get_file_ctime('last-screenshot.png') + + @property + def snapshot_error_screenshot_ctime(self): + return self.__get_file_ctime('last-error-screenshot.png') + + def get_error_text(self): + """Return the text saved from a previous request that resulted in a non-200 error""" + fname = os.path.join(self.__datastore_path, self['uuid'], "last-error.txt") + if os.path.isfile(fname): + with open(fname, 'r') as f: + return f.read() + return False + + def get_error_snapshot(self): + """Return path to the screenshot that resulted in a non-200 error""" + fname = os.path.join(self.__datastore_path, self['uuid'], "last-error-screenshot.png") + if os.path.isfile(fname): + return fname + return False diff --git a/changedetectionio/run_all_tests.sh b/changedetectionio/run_all_tests.sh index c2bbf9aa..d111105f 100755 --- a/changedetectionio/run_all_tests.sh +++ b/changedetectionio/run_all_tests.sh @@ -32,6 +32,7 @@ docker run -d --name $$-test_selenium -p 4444:4444 --rm --shm-size="2g" seleni sleep 5 export WEBDRIVER_URL=http://localhost:4444/wd/hub pytest tests/fetchers/test_content.py +pytest tests/test_errorhandling.py unset WEBDRIVER_URL docker kill $$-test_selenium @@ -43,5 +44,7 @@ docker run -d --name $$-test_browserless -e "DEFAULT_LAUNCH_ARGS=[\"--window-siz sleep 5 export PLAYWRIGHT_DRIVER_URL=ws://127.0.0.1:3000 pytest tests/fetchers/test_content.py +pytest tests/test_errorhandling.py + unset PLAYWRIGHT_DRIVER_URL docker kill $$-test_browserless \ No newline at end of file diff --git a/changedetectionio/static/js/diff-overview.js b/changedetectionio/static/js/diff-overview.js index f39b9906..fa94316f 100644 --- a/changedetectionio/static/js/diff-overview.js +++ b/changedetectionio/static/js/diff-overview.js @@ -10,7 +10,13 @@ $(document).ready(function () { if (hash_name === '#screenshot') { $("img#screenshot-img").attr('src', screenshot_url); $("#settings").hide(); - } else { + } else if (hash_name === '#error-screenshot') { + $("img#error-screenshot-img").attr('src', error_screenshot_url); + $("#settings").hide(); + } + + + else { $("#settings").show(); } } diff --git a/changedetectionio/static/js/tabs.js b/changedetectionio/static/js/tabs.js index f600ef47..46251382 100644 --- a/changedetectionio/static/js/tabs.js +++ b/changedetectionio/static/js/tabs.js @@ -1,51 +1,44 @@ // Rewrite this is a plugin.. is all this JS really 'worth it?' - -if(!window.location.hash) { - var tab=document.querySelectorAll("#default-tab a"); - tab[0].click(); -} - -window.addEventListener('hashchange', function() { - var tabs = document.getElementsByClassName('active'); - while (tabs[0]) { - tabs[0].classList.remove('active') - } - set_active_tab(); +window.addEventListener('hashchange', function () { + var tabs = document.getElementsByClassName('active'); + while (tabs[0]) { + tabs[0].classList.remove('active') + } + set_active_tab(); }, false); -var has_errors=document.querySelectorAll(".messages .error"); +var has_errors = document.querySelectorAll(".messages .error"); if (!has_errors.length) { - if (document.location.hash == "" ) { - document.location.hash = "#general"; - document.getElementById("default-tab").className = "active"; + if (document.location.hash == "") { + document.querySelector(".tabs ul li:first-child a").click(); } else { set_active_tab(); } } else { - focus_error_tab(); + focus_error_tab(); } function set_active_tab() { - var tab=document.querySelectorAll("a[href='"+location.hash+"']"); - if (tab.length) { - tab[0].parentElement.className="active"; - } + var tab = document.querySelectorAll("a[href='" + location.hash + "']"); + if (tab.length) { + tab[0].parentElement.className = "active"; + } // hash could move the page down window.scrollTo(0, 0); } function focus_error_tab() { - // time to use jquery or vuejs really, - // activate the tab with the error - var tabs = document.querySelectorAll('.tabs li a'),i; + // time to use jquery or vuejs really, + // activate the tab with the error + var tabs = document.querySelectorAll('.tabs li a'), i; for (i = 0; i < tabs.length; ++i) { - var tab_name=tabs[i].hash.replace('#',''); - var pane_errors=document.querySelectorAll('#'+tab_name+' .error') - if (pane_errors.length) { - document.location.hash = '#'+tab_name; - return true; - } + var tab_name = tabs[i].hash.replace('#', ''); + var pane_errors = document.querySelectorAll('#' + tab_name + ' .error') + if (pane_errors.length) { + document.location.hash = '#' + tab_name; + return true; + } } return false; } diff --git a/changedetectionio/static/styles/styles.css b/changedetectionio/static/styles/styles.css index 34934f9a..ea185a64 100644 --- a/changedetectionio/static/styles/styles.css +++ b/changedetectionio/static/styles/styles.css @@ -539,3 +539,13 @@ ul { 100% { -webkit-transform: rotate(360deg); transform: rotate(360deg); } } + +.snapshot-age { + padding: 4px; + background-color: #dfdfdf; + border-radius: 3px; + font-weight: bold; + margin-bottom: 4px; } + .snapshot-age.error { + background-color: #ff0000; + color: #fff; } diff --git a/changedetectionio/static/styles/styles.scss b/changedetectionio/static/styles/styles.scss index acfcacbe..d9b851ed 100644 --- a/changedetectionio/static/styles/styles.scss +++ b/changedetectionio/static/styles/styles.scss @@ -771,3 +771,15 @@ ul { } } +.snapshot-age { + padding: 4px; + background-color: #dfdfdf; + border-radius: 3px; + font-weight: bold; + margin-bottom: 4px; + &.error { + background-color: #ff0000; + color: #fff; + } +} + diff --git a/changedetectionio/store.py b/changedetectionio/store.py index da7835f1..e91f724c 100644 --- a/changedetectionio/store.py +++ b/changedetectionio/store.py @@ -336,14 +336,6 @@ class ChangeDetectionStore: self.sync_to_json() return new_uuid - def get_screenshot(self, watch_uuid): - output_path = "{}/{}".format(self.datastore_path, watch_uuid) - fname = "{}/last-screenshot.png".format(output_path) - if path.isfile(fname): - return fname - - return False - def visualselector_data_is_ready(self, watch_uuid): output_path = "{}/{}".format(self.datastore_path, watch_uuid) screenshot_filename = "{}/last-screenshot.png".format(output_path) @@ -354,17 +346,32 @@ class ChangeDetectionStore: return False # Save as PNG, PNG is larger but better for doing visual diff in the future - def save_screenshot(self, watch_uuid, screenshot: bytes): - output_path = "{}/{}".format(self.datastore_path, watch_uuid) - fname = "{}/last-screenshot.png".format(output_path) - with open(fname, 'wb') as f: + def save_screenshot(self, watch_uuid, screenshot: bytes, as_error=False): + + if as_error: + target_path = os.path.join(self.datastore_path, watch_uuid, "last-error-screenshot.png") + else: + target_path = os.path.join(self.datastore_path, watch_uuid, "last-screenshot.png") + + with open(target_path, 'wb') as f: f.write(screenshot) f.close() - def save_xpath_data(self, watch_uuid, data): - output_path = "{}/{}".format(self.datastore_path, watch_uuid) - fname = "{}/elements.json".format(output_path) - with open(fname, 'w') as f: + def save_error_text(self, watch_uuid, contents): + + target_path = os.path.join(self.datastore_path, watch_uuid, "last-error.txt") + + with open(target_path, 'w') as f: + f.write(contents) + + def save_xpath_data(self, watch_uuid, data, as_error=False): + + if as_error: + target_path = os.path.join(self.datastore_path, watch_uuid, "elements.json") + else: + target_path = os.path.join(self.datastore_path, watch_uuid, "elements-error.json") + + with open(target_path, 'w') as f: f.write(json.dumps(data)) f.close() diff --git a/changedetectionio/templates/diff.html b/changedetectionio/templates/diff.html index 343e3d7a..63cf7b6f 100644 --- a/changedetectionio/templates/diff.html +++ b/changedetectionio/templates/diff.html @@ -3,6 +3,9 @@ {% block content %} @@ -43,15 +46,31 @@
+ {{ last_error_text }} ++