diff --git a/changedetectionio/__init__.py b/changedetectionio/__init__.py index ec43b2be..617e9874 100644 --- a/changedetectionio/__init__.py +++ b/changedetectionio/__init__.py @@ -115,18 +115,19 @@ def _jinja2_filter_datetime(watch_obj, format="%Y-%m-%d %H:%M:%S"): return timeago.format(int(watch_obj['last_checked']), time.time()) - -# @app.context_processor -# def timeago(): -# def _timeago(lower_time, now): -# return timeago.format(lower_time, now) -# return dict(timeago=_timeago) - @app.template_filter('format_timestamp_timeago') def _jinja2_filter_datetimestamp(timestamp, format="%Y-%m-%d %H:%M:%S"): + if timestamp == False: + return 'Not yet' + return timeago.format(timestamp, time.time()) - # return timeago.format(timestamp, time.time()) - # return datetime.datetime.utcfromtimestamp(timestamp).strftime(format) + +@app.template_filter('format_seconds_ago') +def _jinja2_filter_seconds_precise(timestamp): + if timestamp == False: + return 'Not yet' + + return format(int(time.time()-timestamp), ',d') # When nobody is logged in Flask-Login's current_user is set to an AnonymousUser object. class User(flask_login.UserMixin): @@ -830,7 +831,7 @@ def changedetection_app(config=None, datastore_o=None): previous_version_file_contents = "Unable to read {}.\n".format(previous_file) - screenshot_url = datastore.get_screenshot(uuid) + screenshot_url = watch.get_screenshot() system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver' @@ -850,7 +851,11 @@ def changedetection_app(config=None, datastore_o=None): extra_title=" - Diff - {}".format(watch['title'] if watch['title'] else watch['url']), left_sticky=True, screenshot=screenshot_url, - is_html_webdriver=is_html_webdriver) + is_html_webdriver=is_html_webdriver, + last_error=watch['last_error'], + last_error_text=watch.get_error_text(), + last_error_screenshot=watch.get_error_snapshot() + ) return output @@ -865,20 +870,34 @@ def changedetection_app(config=None, datastore_o=None): if uuid == 'first': uuid = list(datastore.data['watching'].keys()).pop() - # Normally you would never reach this, because the 'preview' button is not available when there's no history - # However they may try to clear snapshots and reload the page - if datastore.data['watching'][uuid].history_n == 0: - flash("Preview unavailable - No fetch/check completed or triggers not reached", "error") - return redirect(url_for('index')) - - extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')] - try: watch = datastore.data['watching'][uuid] except KeyError: flash("No history found for the specified link, bad link?", "error") return redirect(url_for('index')) + system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver' + extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')] + + + is_html_webdriver = True if watch.get('fetch_backend') == 'html_webdriver' or ( + watch.get('fetch_backend', None) is None and system_uses_webdriver) else False + + # Never requested successfully, but we detected a fetch error + if datastore.data['watching'][uuid].history_n == 0 and (watch.get_error_text() or watch.get_error_snapshot()): + flash("Preview unavailable - No fetch/check completed or triggers not reached", "error") + output = render_template("preview.html", + content=content, + history_n=watch.history_n, + extra_stylesheets=extra_stylesheets, +# current_diff_url=watch['url'], + watch=watch, + uuid=uuid, + is_html_webdriver=is_html_webdriver, + last_error=watch['last_error'], + last_error_text=watch.get_error_text(), + last_error_screenshot=watch.get_error_snapshot()) + return output timestamp = list(watch.history.keys())[-1] filename = watch.history[timestamp] @@ -913,23 +932,20 @@ def changedetection_app(config=None, datastore_o=None): except Exception as e: content.append({'line': "File doesnt exist or unable to read file {}".format(filename), 'classes': ''}) - - screenshot_url = datastore.get_screenshot(uuid) - system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver' - - is_html_webdriver = True if watch.get('fetch_backend') == 'html_webdriver' or ( - watch.get('fetch_backend', None) is None and system_uses_webdriver) else False - output = render_template("preview.html", content=content, + history_n=watch.history_n, extra_stylesheets=extra_stylesheets, ignored_line_numbers=ignored_line_numbers, triggered_line_numbers=trigger_line_numbers, current_diff_url=watch['url'], - screenshot=screenshot_url, + screenshot=watch.get_screenshot(), watch=watch, uuid=uuid, - is_html_webdriver=is_html_webdriver) + is_html_webdriver=is_html_webdriver, + last_error=watch['last_error'], + last_error_text=watch.get_error_text(), + last_error_screenshot=watch.get_error_snapshot()) return output @@ -1029,11 +1045,12 @@ def changedetection_app(config=None, datastore_o=None): if datastore.data['settings']['application']['password'] and not flask_login.current_user.is_authenticated: abort(403) + screenshot_filename = "last-screenshot.png" if not request.args.get('error_screenshot') else "last-error-screenshot.png" + # These files should be in our subdirectory try: # set nocache, set content-type - watch_dir = datastore_o.datastore_path + "/" + filename - response = make_response(send_from_directory(filename="last-screenshot.png", directory=watch_dir, path=watch_dir + "/last-screenshot.png")) + response = make_response(send_from_directory(os.path.join(datastore_o.datastore_path, filename), screenshot_filename)) response.headers['Content-type'] = 'image/png' response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate' response.headers['Pragma'] = 'no-cache' diff --git a/changedetectionio/content_fetcher.py b/changedetectionio/content_fetcher.py index b97f1e61..049cd18a 100644 --- a/changedetectionio/content_fetcher.py +++ b/changedetectionio/content_fetcher.py @@ -6,38 +6,63 @@ import requests import time import sys + +class Non200ErrorCodeReceived(Exception): + def __init__(self, status_code, url, screenshot=None, xpath_data=None, page_html=None): + # Set this so we can use it in other parts of the app + self.status_code = status_code + self.url = url + self.screenshot = screenshot + self.xpath_data = xpath_data + self.page_text = None + + if page_html: + from changedetectionio import html_tools + self.page_text = html_tools.html_to_text(page_html) + return + + +class JSActionExceptions(Exception): + def __init__(self, status_code, url, screenshot, message=''): + self.status_code = status_code + self.url = url + self.screenshot = screenshot + self.message = message + return + class PageUnloadable(Exception): - def __init__(self, status_code, url): + def __init__(self, status_code, url, screenshot=False): # Set this so we can use it in other parts of the app self.status_code = status_code self.url = url + self.screenshot = screenshot return - pass class EmptyReply(Exception): - def __init__(self, status_code, url): + def __init__(self, status_code, url, screenshot=None): # Set this so we can use it in other parts of the app self.status_code = status_code self.url = url + self.screenshot = screenshot return - pass class ScreenshotUnavailable(Exception): - def __init__(self, status_code, url): + def __init__(self, status_code, url, page_html=None): # Set this so we can use it in other parts of the app self.status_code = status_code self.url = url + if page_html: + from html_tools import html_to_text + self.page_text = html_to_text(page_html) return - pass class ReplyWithContentButNoText(Exception): - def __init__(self, status_code, url): + def __init__(self, status_code, url, screenshot=None): # Set this so we can use it in other parts of the app self.status_code = status_code self.url = url + self.screenshot = screenshot return - pass - class Fetcher(): error = None @@ -180,7 +205,7 @@ class Fetcher(): system_https_proxy = os.getenv('HTTPS_PROXY') # Time ONTOP of the system defined env minimum time - render_extract_delay=0 + render_extract_delay = 0 @abstractmethod def get_error(self): @@ -325,9 +350,10 @@ class base_html_playwright(Fetcher): browser.close() # This can be ok, we will try to grab what we could retrieve pass + except Exception as e: - print ("other exception when page.goto") - print (str(e)) + print("other exception when page.goto") + print(str(e)) context.close() browser.close() raise PageUnloadable(url=url, status_code=None) @@ -335,27 +361,36 @@ class base_html_playwright(Fetcher): if response is None: context.close() browser.close() - print ("response object was none") + print("response object was none") raise EmptyReply(url=url, status_code=None) # Bug 2(?) Set the viewport size AFTER loading the page - page.set_viewport_size({"width": 1280, "height": 1024}) + page.set_viewport_size({"width": 1280, "height": 1024}) extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay time.sleep(extra_wait) if self.webdriver_js_execute_code is not None: - page.evaluate(self.webdriver_js_execute_code) - time.sleep(2) + try: + page.evaluate(self.webdriver_js_execute_code) + except Exception as e: + # Is it possible to get a screenshot? + error_screenshot = False + try: + page.screenshot(type='jpeg', + clip={'x': 1.0, 'y': 1.0, 'width': 1280, 'height': 1024}, + quality=1) + + # The actual screenshot + error_screenshot = page.screenshot(type='jpeg', + full_page=True, + quality=int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72))) + except Exception as s: + pass + + raise JSActionExceptions(status_code=response.status, screenshot=error_screenshot, message=str(e), url=url) self.content = page.content() self.status_code = response.status - - if len(self.content.strip()) == 0: - context.close() - browser.close() - print ("Content was empty") - raise EmptyReply(url=url, status_code=None) - self.headers = response.all_headers() if current_css_filter is not None: @@ -382,9 +417,17 @@ class base_html_playwright(Fetcher): browser.close() raise ScreenshotUnavailable(url=url, status_code=None) + if len(self.content.strip()) == 0: + context.close() + browser.close() + print("Content was empty") + raise EmptyReply(url=url, status_code=None, screenshot=self.screenshot) + context.close() browser.close() + if not ignore_status_codes and self.status_code!=200: + raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, page_html=self.content, screenshot=self.screenshot) class base_html_webdriver(Fetcher): if os.getenv("WEBDRIVER_URL"): @@ -512,7 +555,7 @@ class html_requests(Fetcher): ignore_status_codes=False, current_css_filter=None): - proxies={} + proxies = {} # Allows override the proxy on a per-request basis if self.proxy_override: @@ -540,10 +583,14 @@ class html_requests(Fetcher): if encoding: r.encoding = encoding + if not r.content or not len(r.content): + raise EmptyReply(url=url, status_code=r.status_code) + # @todo test this # @todo maybe you really want to test zero-byte return pages? - if (not ignore_status_codes and not r) or not r.content or not len(r.content): - raise EmptyReply(url=url, status_code=r.status_code) + if r.status_code != 200 and not ignore_status_codes: + # maybe check with content works? + raise Non200ErrorCodeReceived(url=url, status_code=r.status_code, page_html=r.text) self.status_code = r.status_code self.content = r.text diff --git a/changedetectionio/fetch_site_status.py b/changedetectionio/fetch_site_status.py index 48342d93..ea94181e 100644 --- a/changedetectionio/fetch_site_status.py +++ b/changedetectionio/fetch_site_status.py @@ -94,7 +94,7 @@ class perform_site_check(): url = self.datastore.get_val(uuid, 'url') request_body = self.datastore.get_val(uuid, 'body') request_method = self.datastore.get_val(uuid, 'method') - ignore_status_code = self.datastore.get_val(uuid, 'ignore_status_codes') + ignore_status_codes = self.datastore.data['watching'][uuid].get('ignore_status_codes', False) # source: support is_source = False @@ -124,7 +124,7 @@ class perform_site_check(): if watch['webdriver_js_execute_code'] is not None and watch['webdriver_js_execute_code'].strip(): fetcher.webdriver_js_execute_code = watch['webdriver_js_execute_code'] - fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_code, watch['css_filter']) + fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, watch['css_filter']) fetcher.quit() # Fetching complete, now filters @@ -210,7 +210,7 @@ class perform_site_check(): # Treat pages with no renderable text content as a change? No by default empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False) if not is_json and not empty_pages_are_a_change and len(stripped_text_from_html.strip()) == 0: - raise content_fetcher.ReplyWithContentButNoText(url=url, status_code=200) + raise content_fetcher.ReplyWithContentButNoText(url=url, status_code=fetcher.get_last_status_code(), screenshot=screenshot) # We rely on the actual text in the html output.. many sites have random script vars etc, # in the future we'll implement other mechanisms. diff --git a/changedetectionio/model/Watch.py b/changedetectionio/model/Watch.py index 458a6fd2..ad1b1b42 100644 --- a/changedetectionio/model/Watch.py +++ b/changedetectionio/model/Watch.py @@ -38,6 +38,7 @@ class model(dict): 'notification_format': default_notification_format, 'notification_muted': False, 'css_filter': '', + 'last_error': False, 'extract_text': [], # Extract text by regex after filters 'subtractive_selectors': [], 'trigger_text': [], # List of text or regex to wait for until a change is detected @@ -122,19 +123,17 @@ class model(dict): bump = self.history return self.__newest_history_key - # Save some text file to the appropriate path and bump the history # result_obj from fetch_site_status.run() def save_history_text(self, contents, timestamp): import uuid - from os import mkdir, path, unlink import logging output_path = "{}/{}".format(self.__datastore_path, self['uuid']) # Incase the operator deleted it, check and create. if not os.path.isdir(output_path): - mkdir(output_path) + os.mkdir(output_path) snapshot_fname = "{}/{}.stripped.txt".format(output_path, uuid.uuid4()) logging.debug("Saving history text {}".format(snapshot_fname)) @@ -172,7 +171,7 @@ class model(dict): return seconds # Iterate over all history texts and see if something new exists - def lines_contain_something_unique_compared_to_history(self, lines=[]): + def lines_contain_something_unique_compared_to_history(self, lines: list): local_lines = set([l.decode('utf-8').strip().lower() for l in lines]) # Compare each lines (set) against each history text file (set) looking for something new.. @@ -184,3 +183,51 @@ class model(dict): # Check that everything in local_lines(new stuff) already exists in existing_history - it should # if not, something new happened return not local_lines.issubset(existing_history) + + def get_screenshot(self): + fname = os.path.join(self.__datastore_path, self['uuid'], "last-screenshot.png") + if os.path.isfile(fname): + return fname + + return False + + def __get_file_ctime(self, filename): + fname = os.path.join(self.__datastore_path, self['uuid'], filename) + if os.path.isfile(fname): + return int(os.path.getmtime(fname)) + return False + + @property + def error_text_ctime(self): + return self.__get_file_ctime('last-error.txt') + + @property + def snapshot_text_ctime(self): + if self.history_n==0: + return False + + timestamp = list(self.history.keys())[-1] + return int(timestamp) + + @property + def snapshot_screenshot_ctime(self): + return self.__get_file_ctime('last-screenshot.png') + + @property + def snapshot_error_screenshot_ctime(self): + return self.__get_file_ctime('last-error-screenshot.png') + + def get_error_text(self): + """Return the text saved from a previous request that resulted in a non-200 error""" + fname = os.path.join(self.__datastore_path, self['uuid'], "last-error.txt") + if os.path.isfile(fname): + with open(fname, 'r') as f: + return f.read() + return False + + def get_error_snapshot(self): + """Return path to the screenshot that resulted in a non-200 error""" + fname = os.path.join(self.__datastore_path, self['uuid'], "last-error-screenshot.png") + if os.path.isfile(fname): + return fname + return False diff --git a/changedetectionio/run_all_tests.sh b/changedetectionio/run_all_tests.sh index c2bbf9aa..d111105f 100755 --- a/changedetectionio/run_all_tests.sh +++ b/changedetectionio/run_all_tests.sh @@ -32,6 +32,7 @@ docker run -d --name $$-test_selenium -p 4444:4444 --rm --shm-size="2g" seleni sleep 5 export WEBDRIVER_URL=http://localhost:4444/wd/hub pytest tests/fetchers/test_content.py +pytest tests/test_errorhandling.py unset WEBDRIVER_URL docker kill $$-test_selenium @@ -43,5 +44,7 @@ docker run -d --name $$-test_browserless -e "DEFAULT_LAUNCH_ARGS=[\"--window-siz sleep 5 export PLAYWRIGHT_DRIVER_URL=ws://127.0.0.1:3000 pytest tests/fetchers/test_content.py +pytest tests/test_errorhandling.py + unset PLAYWRIGHT_DRIVER_URL docker kill $$-test_browserless \ No newline at end of file diff --git a/changedetectionio/static/js/diff-overview.js b/changedetectionio/static/js/diff-overview.js index f39b9906..fa94316f 100644 --- a/changedetectionio/static/js/diff-overview.js +++ b/changedetectionio/static/js/diff-overview.js @@ -10,7 +10,13 @@ $(document).ready(function () { if (hash_name === '#screenshot') { $("img#screenshot-img").attr('src', screenshot_url); $("#settings").hide(); - } else { + } else if (hash_name === '#error-screenshot') { + $("img#error-screenshot-img").attr('src', error_screenshot_url); + $("#settings").hide(); + } + + + else { $("#settings").show(); } } diff --git a/changedetectionio/static/js/tabs.js b/changedetectionio/static/js/tabs.js index f600ef47..46251382 100644 --- a/changedetectionio/static/js/tabs.js +++ b/changedetectionio/static/js/tabs.js @@ -1,51 +1,44 @@ // Rewrite this is a plugin.. is all this JS really 'worth it?' - -if(!window.location.hash) { - var tab=document.querySelectorAll("#default-tab a"); - tab[0].click(); -} - -window.addEventListener('hashchange', function() { - var tabs = document.getElementsByClassName('active'); - while (tabs[0]) { - tabs[0].classList.remove('active') - } - set_active_tab(); +window.addEventListener('hashchange', function () { + var tabs = document.getElementsByClassName('active'); + while (tabs[0]) { + tabs[0].classList.remove('active') + } + set_active_tab(); }, false); -var has_errors=document.querySelectorAll(".messages .error"); +var has_errors = document.querySelectorAll(".messages .error"); if (!has_errors.length) { - if (document.location.hash == "" ) { - document.location.hash = "#general"; - document.getElementById("default-tab").className = "active"; + if (document.location.hash == "") { + document.querySelector(".tabs ul li:first-child a").click(); } else { set_active_tab(); } } else { - focus_error_tab(); + focus_error_tab(); } function set_active_tab() { - var tab=document.querySelectorAll("a[href='"+location.hash+"']"); - if (tab.length) { - tab[0].parentElement.className="active"; - } + var tab = document.querySelectorAll("a[href='" + location.hash + "']"); + if (tab.length) { + tab[0].parentElement.className = "active"; + } // hash could move the page down window.scrollTo(0, 0); } function focus_error_tab() { - // time to use jquery or vuejs really, - // activate the tab with the error - var tabs = document.querySelectorAll('.tabs li a'),i; + // time to use jquery or vuejs really, + // activate the tab with the error + var tabs = document.querySelectorAll('.tabs li a'), i; for (i = 0; i < tabs.length; ++i) { - var tab_name=tabs[i].hash.replace('#',''); - var pane_errors=document.querySelectorAll('#'+tab_name+' .error') - if (pane_errors.length) { - document.location.hash = '#'+tab_name; - return true; - } + var tab_name = tabs[i].hash.replace('#', ''); + var pane_errors = document.querySelectorAll('#' + tab_name + ' .error') + if (pane_errors.length) { + document.location.hash = '#' + tab_name; + return true; + } } return false; } diff --git a/changedetectionio/static/styles/styles.css b/changedetectionio/static/styles/styles.css index 34934f9a..ea185a64 100644 --- a/changedetectionio/static/styles/styles.css +++ b/changedetectionio/static/styles/styles.css @@ -539,3 +539,13 @@ ul { 100% { -webkit-transform: rotate(360deg); transform: rotate(360deg); } } + +.snapshot-age { + padding: 4px; + background-color: #dfdfdf; + border-radius: 3px; + font-weight: bold; + margin-bottom: 4px; } + .snapshot-age.error { + background-color: #ff0000; + color: #fff; } diff --git a/changedetectionio/static/styles/styles.scss b/changedetectionio/static/styles/styles.scss index acfcacbe..d9b851ed 100644 --- a/changedetectionio/static/styles/styles.scss +++ b/changedetectionio/static/styles/styles.scss @@ -771,3 +771,15 @@ ul { } } +.snapshot-age { + padding: 4px; + background-color: #dfdfdf; + border-radius: 3px; + font-weight: bold; + margin-bottom: 4px; + &.error { + background-color: #ff0000; + color: #fff; + } +} + diff --git a/changedetectionio/store.py b/changedetectionio/store.py index da7835f1..e91f724c 100644 --- a/changedetectionio/store.py +++ b/changedetectionio/store.py @@ -336,14 +336,6 @@ class ChangeDetectionStore: self.sync_to_json() return new_uuid - def get_screenshot(self, watch_uuid): - output_path = "{}/{}".format(self.datastore_path, watch_uuid) - fname = "{}/last-screenshot.png".format(output_path) - if path.isfile(fname): - return fname - - return False - def visualselector_data_is_ready(self, watch_uuid): output_path = "{}/{}".format(self.datastore_path, watch_uuid) screenshot_filename = "{}/last-screenshot.png".format(output_path) @@ -354,17 +346,32 @@ class ChangeDetectionStore: return False # Save as PNG, PNG is larger but better for doing visual diff in the future - def save_screenshot(self, watch_uuid, screenshot: bytes): - output_path = "{}/{}".format(self.datastore_path, watch_uuid) - fname = "{}/last-screenshot.png".format(output_path) - with open(fname, 'wb') as f: + def save_screenshot(self, watch_uuid, screenshot: bytes, as_error=False): + + if as_error: + target_path = os.path.join(self.datastore_path, watch_uuid, "last-error-screenshot.png") + else: + target_path = os.path.join(self.datastore_path, watch_uuid, "last-screenshot.png") + + with open(target_path, 'wb') as f: f.write(screenshot) f.close() - def save_xpath_data(self, watch_uuid, data): - output_path = "{}/{}".format(self.datastore_path, watch_uuid) - fname = "{}/elements.json".format(output_path) - with open(fname, 'w') as f: + def save_error_text(self, watch_uuid, contents): + + target_path = os.path.join(self.datastore_path, watch_uuid, "last-error.txt") + + with open(target_path, 'w') as f: + f.write(contents) + + def save_xpath_data(self, watch_uuid, data, as_error=False): + + if as_error: + target_path = os.path.join(self.datastore_path, watch_uuid, "elements.json") + else: + target_path = os.path.join(self.datastore_path, watch_uuid, "elements-error.json") + + with open(target_path, 'w') as f: f.write(json.dumps(data)) f.close() diff --git a/changedetectionio/templates/diff.html b/changedetectionio/templates/diff.html index 343e3d7a..63cf7b6f 100644 --- a/changedetectionio/templates/diff.html +++ b/changedetectionio/templates/diff.html @@ -3,6 +3,9 @@ {% block content %} @@ -43,15 +46,31 @@
+
+
{{watch_a.error_text_ctime|format_seconds_ago}} seconds ago
+
+            {{ last_error_text }}
+        
+
+ +
+
{{watch_a.snapshot_error_screenshot_ctime|format_seconds_ago}} seconds ago
+ Current error-ing screenshot from most recent request +
+
Pro-tip: Use show current snapshot tab to visualise what will be ignored.
+
{{watch_a.snapshot_text_ctime|format_timestamp_timeago}}
+ @@ -70,10 +89,10 @@
For now, Differences are performed on text, not graphically, only the latest screenshot is available.
-
{% if is_html_webdriver %} {% if screenshot %} - Current screenshot from most recent request +
{{watch_a.snapshot_screenshot_ctime|format_timestamp_timeago}}
+ Current screenshot from most recent request {% else %} No screenshot available just yet! Try rechecking the page. {% endif %} @@ -88,7 +107,6 @@ -
-

Current - {{watch.last_checked|format_timestamp_timeago}}

-
-
+
+
{{watch.error_text_ctime|format_seconds_ago}} seconds ago
+
+            {{ last_error_text }}
+        
+
+ +
+
{{watch.snapshot_error_screenshot_ctime|format_seconds_ago}} seconds ago
+ Current erroring screenshot from most recent request +
+
+
{{watch.snapshot_text_ctime|format_timestamp_timeago}}
Grey lines are ignored Blue lines are triggers
@@ -33,6 +49,7 @@
+
For now, Differences are performed on text, not graphically, only the latest screenshot is available. diff --git a/changedetectionio/templates/settings.html b/changedetectionio/templates/settings.html index 8b3e2e8d..e482dde8 100644 --- a/changedetectionio/templates/settings.html +++ b/changedetectionio/templates/settings.html @@ -16,7 +16,7 @@
    -
  • General
  • +
  • General
  • Notifications
  • Fetching
  • Global Filters
  • diff --git a/changedetectionio/templates/watch-overview.html b/changedetectionio/templates/watch-overview.html index 0ab08dec..493bf65d 100644 --- a/changedetectionio/templates/watch-overview.html +++ b/changedetectionio/templates/watch-overview.html @@ -90,7 +90,7 @@ {% if watch.history_n >= 2 %} Diff {% else %} - {% if watch.history_n == 1 %} + {% if watch.history_n == 1 or (watch.history_n ==0 and watch.error_text_ctime )%} Preview {% endif %} {% endif %} diff --git a/changedetectionio/tests/test_errorhandling.py b/changedetectionio/tests/test_errorhandling.py index 0a0d5d6c..a8b29863 100644 --- a/changedetectionio/tests/test_errorhandling.py +++ b/changedetectionio/tests/test_errorhandling.py @@ -11,16 +11,17 @@ def test_setup(live_server): live_server_setup(live_server) -def test_error_handler(client, live_server): +def _runner_test_http_errors(client, live_server, http_code, expected_text): + with open("test-datastore/endpoint-content.txt", "w") as f: + f.write("Now you going to get a {} error code\n".format(http_code)) - # Give the endpoint time to spin up - time.sleep(1) # Add our URL to the import page test_url = url_for('test_endpoint', - status_code=403, + status_code=http_code, _external=True) + res = client.post( url_for("import_page"), data={"urls": test_url}, @@ -29,15 +30,38 @@ def test_error_handler(client, live_server): assert b"1 Imported" in res.data # Give the thread time to pick it up - time.sleep(3) + time.sleep(2) res = client.get(url_for("index")) + # no change assert b'unviewed' not in res.data - assert b'Status Code 403' in res.data - assert bytes("just now".encode('utf-8')) in res.data + assert bytes(expected_text.encode('utf-8')) in res.data + + + # Error viewing tabs should appear + res = client.get( + url_for("preview_page", uuid="first"), + follow_redirects=True + ) + + assert b'Error Text' in res.data + + # 'Error Screenshot' only when in playwright mode + #assert b'Error Screenshot' in res.data + + + res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) + assert b'Deleted' in res.data + + +def test_http_error_handler(client, live_server): + _runner_test_http_errors(client, live_server, 403, 'Access denied') + _runner_test_http_errors(client, live_server, 404, 'Page not found') + _runner_test_http_errors(client, live_server, 500, '(Internal server Error) received') + _runner_test_http_errors(client, live_server, 400, 'Error - Request returned a HTTP error code 400') # Just to be sure error text is properly handled -def test_error_text_handler(client, live_server): +def test_DNS_errors(client, live_server): # Give the endpoint time to spin up time.sleep(1) @@ -54,5 +78,6 @@ def test_error_text_handler(client, live_server): res = client.get(url_for("index")) assert b'Name or service not known' in res.data + # Should always record that we tried assert bytes("just now".encode('utf-8')) in res.data diff --git a/changedetectionio/tests/test_ignorestatuscode.py b/changedetectionio/tests/test_ignorestatuscode.py index 335f3655..aeafcdaa 100644 --- a/changedetectionio/tests/test_ignorestatuscode.py +++ b/changedetectionio/tests/test_ignorestatuscode.py @@ -137,54 +137,3 @@ def test_403_page_check_works_with_ignore_status_code(client, live_server): res = client.get(url_for("index")) assert b'unviewed' in res.data - -# Tests the whole stack works with staus codes ignored -def test_403_page_check_fails_without_ignore_status_code(client, live_server): - sleep_time_for_fetch_thread = 3 - - set_original_response() - - # Give the endpoint time to spin up - time.sleep(1) - - # Add our URL to the import page - test_url = url_for('test_endpoint', status_code=403, _external=True) - res = client.post( - url_for("import_page"), - data={"urls": test_url}, - follow_redirects=True - ) - assert b"1 Imported" in res.data - - # Trigger a check - client.get(url_for("form_watch_checknow"), follow_redirects=True) - - # Give the thread time to pick it up - time.sleep(sleep_time_for_fetch_thread) - - # Goto the edit page, check our ignore option - # Add our URL to the import page - res = client.post( - url_for("edit_page", uuid="first"), - data={"url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, - follow_redirects=True - ) - assert b"Updated watch." in res.data - - # Trigger a check - client.get(url_for("form_watch_checknow"), follow_redirects=True) - - # Give the thread time to pick it up - time.sleep(sleep_time_for_fetch_thread) - # Make a change - set_some_changed_response() - - # Trigger a check - client.get(url_for("form_watch_checknow"), follow_redirects=True) - # Give the thread time to pick it up - time.sleep(sleep_time_for_fetch_thread) - - # It should have 'unviewed' still - # Because it should be looking at only that 'sametext' id - res = client.get(url_for("index")) - assert b'Status Code 403' in res.data diff --git a/changedetectionio/update_worker.py b/changedetectionio/update_worker.py index 131da6ea..ccdc5bd7 100644 --- a/changedetectionio/update_worker.py +++ b/changedetectionio/update_worker.py @@ -1,3 +1,4 @@ +import os import threading import queue import time @@ -107,6 +108,14 @@ class update_worker(threading.Thread): self.notification_q.put(n_object) print("Sent filter not found notification for {}".format(watch_uuid)) + def cleanup_error_artifacts(self, uuid): + # All went fine, remove error artifacts + cleanup_files = ["last-error-screenshot.png", "last-error.txt"] + for f in cleanup_files: + full_path = os.path.join(self.datastore.datastore_path, uuid, f) + if os.path.isfile(full_path): + os.unlink(full_path) + def run(self): from changedetectionio import fetch_site_status @@ -146,7 +155,31 @@ class update_worker(threading.Thread): # Totally fine, it's by choice - just continue on, nothing more to care about # Page had elements/content but no renderable text # Backend (not filters) gave zero output - self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Got HTML content but no text found."}) + self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Got HTML content but no text found (With {} reply code).".format(e.status_code)}) + if e.screenshot: + self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot) + process_changedetection_results = False + + except content_fetcher.Non200ErrorCodeReceived as e: + if e.status_code == 403: + err_text = "Error - 403 (Access denied) received" + elif e.status_code == 404: + err_text = "Error - 404 (Page not found) received" + elif e.status_code == 500: + err_text = "Error - 500 (Internal server Error) received" + else: + err_text = "Error - Request returned a HTTP error code {}".format(str(e.status_code)) + + if e.screenshot: + self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True) + if e.xpath_data: + self.datastore.save_xpath_data(watch_uuid=uuid, data=e.xpath_data, as_error=True) + if e.page_text: + self.datastore.save_error_text(watch_uuid=uuid, contents=e.page_text) + + self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, + # So that we get a trigger when the content is added again + 'previous_md5': ''}) process_changedetection_results = False except FilterNotFoundInResponse as e: @@ -182,8 +215,17 @@ class update_worker(threading.Thread): self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, 'last_check_status': e.status_code}) process_changedetection_results = False + except content_fetcher.JSActionExceptions as e: + err_text = "Error running JS Actions - Page request - "+e.message + if e.screenshot: + self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True) + self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, + 'last_check_status': e.status_code}) except content_fetcher.PageUnloadable as e: err_text = "Page request from server didnt respond correctly" + if e.screenshot: + self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True) + self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, 'last_check_status': e.status_code}) except Exception as e: @@ -192,9 +234,13 @@ class update_worker(threading.Thread): # Other serious error process_changedetection_results = False else: + # Mark that we never had any failures - update_obj['consecutive_filter_failures'] = 0 - + if not self.datastore.data['watching'][uuid].get('ignore_status_codes'): + update_obj['consecutive_filter_failures'] = 0 + + self.cleanup_error_artifacts(uuid) + # Crash protection, the watch entry could have been removed by this point (during a slow chrome fetch etc) if not self.datastore.data['watching'].get(uuid): continue @@ -231,15 +277,15 @@ class update_worker(threading.Thread): self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)}) - # Always record that we atleast tried - self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3), - 'last_checked': round(time.time())}) + # Always record that we atleast tried + self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3), + 'last_checked': round(time.time())}) - # Always save the screenshot if it's available - if screenshot: - self.datastore.save_screenshot(watch_uuid=uuid, screenshot=screenshot) - if xpath_data: - self.datastore.save_xpath_data(watch_uuid=uuid, data=xpath_data) + # Always save the screenshot if it's available + if screenshot: + self.datastore.save_screenshot(watch_uuid=uuid, screenshot=screenshot) + if xpath_data: + self.datastore.save_xpath_data(watch_uuid=uuid, data=xpath_data) self.current_uuid = None # Done