diff --git a/Dockerfile b/Dockerfile index 23a3f2c4..03463647 100644 --- a/Dockerfile +++ b/Dockerfile @@ -22,7 +22,7 @@ RUN pip install --target=/dependencies -r /requirements.txt # Playwright is an alternative to Selenium # Excluded this package from requirements.txt to prevent arm/v6 and arm/v7 builds from failing -RUN pip install --target=/dependencies playwright~=1.20 \ +RUN pip install --target=/dependencies playwright~=1.24 \ || echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled." # Final image stage diff --git a/README.md b/README.md index 3b7442e8..74918162 100644 --- a/README.md +++ b/README.md @@ -11,8 +11,6 @@ Live your data-life *pro-actively* instead of *re-actively*. Free, Open-source web page monitoring, notification and change detection. Don't have time? [**Try our $6.99/month subscription - unlimited checks and watches!**](https://lemonade.changedetection.io/start) -[![Discord](https://img.shields.io/badge/DISCORD-%237289DA.svg?style=for-the-badge&logo=discord&logoColor=white)](https://discord.gg/XJZy7QK3ja) [ ![YouTube](https://img.shields.io/badge/YouTube-%23FF0000.svg?style=for-the-badge&logo=YouTube&logoColor=white)](https://www.youtube.com/channel/UCbS09q1TRf0o4N2t-WA3emQ) [![LinkedIn](https://img.shields.io/badge/linkedin-%230077B5.svg?style=for-the-badge&logo=linkedin&logoColor=white)](https://www.linkedin.com/company/changedetection-io/) - [](https://lemonade.changedetection.io/start) @@ -31,6 +29,7 @@ Free, Open-source web page monitoring, notification and change detection. Don't #### Example use cases - Products and services have a change in pricing +- _Out of stock notification_ and _Back In stock notification_ - Governmental department updates (changes are often only on their websites) - New software releases, security advisories when you're not on their mailing list. - Festivals with changes diff --git a/changedetection.py b/changedetection.py index 9e76cc8c..8455315a 100755 --- a/changedetection.py +++ b/changedetection.py @@ -6,6 +6,36 @@ # Read more https://github.com/dgtlmoon/changedetection.io/wiki from changedetectionio import changedetection +import multiprocessing +import signal +import os + +def sigchld_handler(_signo, _stack_frame): + import sys + print('Shutdown: Got SIGCHLD') + # https://stackoverflow.com/questions/40453496/python-multiprocessing-capturing-signals-to-restart-child-processes-or-shut-do + pid, status = os.waitpid(-1, os.WNOHANG | os.WUNTRACED | os.WCONTINUED) + + print('Sub-process: pid %d status %d' % (pid, status)) + if status != 0: + sys.exit(1) + + raise SystemExit if __name__ == '__main__': - changedetection.main() + + #signal.signal(signal.SIGCHLD, sigchld_handler) + + # The only way I could find to get Flask to shutdown, is to wrap it and then rely on the subsystem issuing SIGTERM/SIGKILL + parse_process = multiprocessing.Process(target=changedetection.main) + parse_process.daemon = True + parse_process.start() + import time + + try: + while True: + time.sleep(1) + + except KeyboardInterrupt: + #parse_process.terminate() not needed, because this process will issue it to the sub-process anyway + print ("Exited - CTRL+C") diff --git a/changedetectionio/__init__.py b/changedetectionio/__init__.py index 55650312..07558d8a 100644 --- a/changedetectionio/__init__.py +++ b/changedetectionio/__init__.py @@ -44,7 +44,7 @@ from flask_wtf import CSRFProtect from changedetectionio import html_tools from changedetectionio.api import api_v1 -__version__ = '0.39.17.1' +__version__ = '0.39.17.2' datastore = None @@ -54,7 +54,7 @@ ticker_thread = None extra_stylesheets = [] -update_q = queue.Queue() +update_q = queue.PriorityQueue() notification_q = queue.Queue() @@ -76,7 +76,7 @@ app.config['LOGIN_DISABLED'] = False # Disables caching of the templates app.config['TEMPLATES_AUTO_RELOAD'] = True - +app.jinja_env.add_extension('jinja2.ext.loopcontrols') csrf = CSRFProtect() csrf.init_app(app) @@ -115,18 +115,19 @@ def _jinja2_filter_datetime(watch_obj, format="%Y-%m-%d %H:%M:%S"): return timeago.format(int(watch_obj['last_checked']), time.time()) - -# @app.context_processor -# def timeago(): -# def _timeago(lower_time, now): -# return timeago.format(lower_time, now) -# return dict(timeago=_timeago) - @app.template_filter('format_timestamp_timeago') def _jinja2_filter_datetimestamp(timestamp, format="%Y-%m-%d %H:%M:%S"): + if timestamp == False: + return 'Not yet' + return timeago.format(timestamp, time.time()) - # return timeago.format(timestamp, time.time()) - # return datetime.datetime.utcfromtimestamp(timestamp).strftime(format) + +@app.template_filter('format_seconds_ago') +def _jinja2_filter_seconds_precise(timestamp): + if timestamp == False: + return 'Not yet' + + return format(int(time.time()-timestamp), ',d') # When nobody is logged in Flask-Login's current_user is set to an AnonymousUser object. class User(flask_login.UserMixin): @@ -313,7 +314,7 @@ def changedetection_app(config=None, datastore_o=None): watch['uuid'] = uuid sorted_watches.append(watch) - sorted_watches.sort(key=lambda x: x['last_changed'], reverse=True) + sorted_watches.sort(key=lambda x: x.last_changed, reverse=False) fg = FeedGenerator() fg.title('changedetection.io') @@ -332,7 +333,7 @@ def changedetection_app(config=None, datastore_o=None): if not watch.viewed: # Re #239 - GUID needs to be individual for each event # @todo In the future make this a configurable link back (see work on BASE_URL https://github.com/dgtlmoon/changedetection.io/pull/228) - guid = "{}/{}".format(watch['uuid'], watch['last_changed']) + guid = "{}/{}".format(watch['uuid'], watch.last_changed) fe = fg.add_entry() # Include a link to the diff page, they will have to login here to see if password protection is enabled. @@ -370,20 +371,20 @@ def changedetection_app(config=None, datastore_o=None): from changedetectionio import forms limit_tag = request.args.get('tag') - pause_uuid = request.args.get('pause') - # Redirect for the old rss path which used the /?rss=true if request.args.get('rss'): return redirect(url_for('rss', tag=limit_tag)) - if pause_uuid: - try: - datastore.data['watching'][pause_uuid]['paused'] ^= True - datastore.needs_write = True + op = request.args.get('op') + if op: + uuid = request.args.get('uuid') + if op == 'pause': + datastore.data['watching'][uuid]['paused'] ^= True + elif op == 'mute': + datastore.data['watching'][uuid]['notification_muted'] ^= True - return redirect(url_for('index', tag = limit_tag)) - except KeyError: - pass + datastore.needs_write = True + return redirect(url_for('index', tag = limit_tag)) # Sort by last_changed and add the uuid which is usually the key.. sorted_watches = [] @@ -406,7 +407,6 @@ def changedetection_app(config=None, datastore_o=None): existing_tags = datastore.get_all_tags() form = forms.quickWatchForm(request.form) - output = render_template("watch-overview.html", form=form, watches=sorted_watches, @@ -417,7 +417,7 @@ def changedetection_app(config=None, datastore_o=None): # Don't link to hosting when we're on the hosting environment hosted_sticky=os.getenv("SALTED_PASS", False) == False, guid=datastore.data['app_guid'], - queued_uuids=update_q.queue) + queued_uuids=[uuid for p,uuid in update_q.queue]) if session.get('share-link'): @@ -631,18 +631,14 @@ def changedetection_app(config=None, datastore_o=None): # But in the case something is added we should save straight away datastore.needs_write_urgent = True - # Queue the watch for immediate recheck - update_q.put(uuid) + # Queue the watch for immediate recheck, with a higher priority + update_q.put((1, uuid)) # Diff page [edit] link should go back to diff page - if request.args.get("next") and request.args.get("next") == 'diff' and not form.save_and_preview_button.data: + if request.args.get("next") and request.args.get("next") == 'diff': return redirect(url_for('diff_history_page', uuid=uuid)) - else: - if form.save_and_preview_button.data: - flash('You may need to reload this page to see the new content.') - return redirect(url_for('preview_page', uuid=uuid)) - else: - return redirect(url_for('index')) + + return redirect(url_for('index')) else: if request.method == 'POST' and not form.validate(): @@ -708,7 +704,14 @@ def changedetection_app(config=None, datastore_o=None): return redirect(url_for('settings_page')) if form.validate(): - datastore.data['settings']['application'].update(form.data['application']) + # Don't set password to False when a password is set - should be only removed with the `removepassword` button + app_update = dict(deepcopy(form.data['application'])) + + # Never update password with '' or False (Added by wtforms when not in submission) + if 'password' in app_update and not app_update['password']: + del (app_update['password']) + + datastore.data['settings']['application'].update(app_update) datastore.data['settings']['requests'].update(form.data['requests']) if not os.getenv("SALTED_PASS", False) and len(form.application.form.password.encrypted_password): @@ -746,7 +749,7 @@ def changedetection_app(config=None, datastore_o=None): importer = import_url_list() importer.run(data=request.values.get('urls'), flash=flash, datastore=datastore) for uuid in importer.new_uuids: - update_q.put(uuid) + update_q.put((1, uuid)) if len(importer.remaining_data) == 0: return redirect(url_for('index')) @@ -759,7 +762,7 @@ def changedetection_app(config=None, datastore_o=None): d_importer = import_distill_io_json() d_importer.run(data=request.values.get('distill-io'), flash=flash, datastore=datastore) for uuid in d_importer.new_uuids: - update_q.put(uuid) + update_q.put((1, uuid)) @@ -828,7 +831,7 @@ def changedetection_app(config=None, datastore_o=None): previous_version_file_contents = "Unable to read {}.\n".format(previous_file) - screenshot_url = datastore.get_screenshot(uuid) + screenshot_url = watch.get_screenshot() system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver' @@ -848,7 +851,11 @@ def changedetection_app(config=None, datastore_o=None): extra_title=" - Diff - {}".format(watch['title'] if watch['title'] else watch['url']), left_sticky=True, screenshot=screenshot_url, - is_html_webdriver=is_html_webdriver) + is_html_webdriver=is_html_webdriver, + last_error=watch['last_error'], + last_error_text=watch.get_error_text(), + last_error_screenshot=watch.get_error_snapshot() + ) return output @@ -863,73 +870,82 @@ def changedetection_app(config=None, datastore_o=None): if uuid == 'first': uuid = list(datastore.data['watching'].keys()).pop() - # Normally you would never reach this, because the 'preview' button is not available when there's no history - # However they may try to clear snapshots and reload the page - if datastore.data['watching'][uuid].history_n == 0: - flash("Preview unavailable - No fetch/check completed or triggers not reached", "error") - return redirect(url_for('index')) - - extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')] - try: watch = datastore.data['watching'][uuid] except KeyError: flash("No history found for the specified link, bad link?", "error") return redirect(url_for('index')) - if watch.history_n >0: - timestamps = sorted(watch.history.keys(), key=lambda x: int(x)) - filename = watch.history[timestamps[-1]] - try: - with open(filename, 'r') as f: - tmp = f.readlines() - - # Get what needs to be highlighted - ignore_rules = watch.get('ignore_text', []) + datastore.data['settings']['application']['global_ignore_text'] - - # .readlines will keep the \n, but we will parse it here again, in the future tidy this up - ignored_line_numbers = html_tools.strip_ignore_text(content="".join(tmp), - wordlist=ignore_rules, - mode='line numbers' - ) - - trigger_line_numbers = html_tools.strip_ignore_text(content="".join(tmp), - wordlist=watch['trigger_text'], - mode='line numbers' - ) - # Prepare the classes and lines used in the template - i=0 - for l in tmp: - classes=[] - i+=1 - if i in ignored_line_numbers: - classes.append('ignored') - if i in trigger_line_numbers: - classes.append('triggered') - content.append({'line': l, 'classes': ' '.join(classes)}) - - - except Exception as e: - content.append({'line': "File doesnt exist or unable to read file {}".format(filename), 'classes': ''}) - else: - content.append({'line': "No history found", 'classes': ''}) - - screenshot_url = datastore.get_screenshot(uuid) system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver' + extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')] + is_html_webdriver = True if watch.get('fetch_backend') == 'html_webdriver' or ( watch.get('fetch_backend', None) is None and system_uses_webdriver) else False + # Never requested successfully, but we detected a fetch error + if datastore.data['watching'][uuid].history_n == 0 and (watch.get_error_text() or watch.get_error_snapshot()): + flash("Preview unavailable - No fetch/check completed or triggers not reached", "error") + output = render_template("preview.html", + content=content, + history_n=watch.history_n, + extra_stylesheets=extra_stylesheets, +# current_diff_url=watch['url'], + watch=watch, + uuid=uuid, + is_html_webdriver=is_html_webdriver, + last_error=watch['last_error'], + last_error_text=watch.get_error_text(), + last_error_screenshot=watch.get_error_snapshot()) + return output + + timestamp = list(watch.history.keys())[-1] + filename = watch.history[timestamp] + try: + with open(filename, 'r') as f: + tmp = f.readlines() + + # Get what needs to be highlighted + ignore_rules = watch.get('ignore_text', []) + datastore.data['settings']['application']['global_ignore_text'] + + # .readlines will keep the \n, but we will parse it here again, in the future tidy this up + ignored_line_numbers = html_tools.strip_ignore_text(content="".join(tmp), + wordlist=ignore_rules, + mode='line numbers' + ) + + trigger_line_numbers = html_tools.strip_ignore_text(content="".join(tmp), + wordlist=watch['trigger_text'], + mode='line numbers' + ) + # Prepare the classes and lines used in the template + i=0 + for l in tmp: + classes=[] + i+=1 + if i in ignored_line_numbers: + classes.append('ignored') + if i in trigger_line_numbers: + classes.append('triggered') + content.append({'line': l, 'classes': ' '.join(classes)}) + + except Exception as e: + content.append({'line': "File doesnt exist or unable to read file {}".format(filename), 'classes': ''}) + output = render_template("preview.html", content=content, + history_n=watch.history_n, extra_stylesheets=extra_stylesheets, ignored_line_numbers=ignored_line_numbers, triggered_line_numbers=trigger_line_numbers, current_diff_url=watch['url'], - screenshot=screenshot_url, + screenshot=watch.get_screenshot(), watch=watch, uuid=uuid, - is_html_webdriver=is_html_webdriver) + is_html_webdriver=is_html_webdriver, + last_error=watch['last_error'], + last_error_text=watch.get_error_text(), + last_error_screenshot=watch.get_error_snapshot()) return output @@ -1029,11 +1045,12 @@ def changedetection_app(config=None, datastore_o=None): if datastore.data['settings']['application']['password'] and not flask_login.current_user.is_authenticated: abort(403) + screenshot_filename = "last-screenshot.png" if not request.args.get('error_screenshot') else "last-error-screenshot.png" + # These files should be in our subdirectory try: # set nocache, set content-type - watch_dir = datastore_o.datastore_path + "/" + filename - response = make_response(send_from_directory(filename="last-screenshot.png", directory=watch_dir, path=watch_dir + "/last-screenshot.png")) + response = make_response(send_from_directory(os.path.join(datastore_o.datastore_path, filename), screenshot_filename)) response.headers['Content-type'] = 'image/png' response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate' response.headers['Pragma'] = 'no-cache' @@ -1090,7 +1107,7 @@ def changedetection_app(config=None, datastore_o=None): if not add_paused and new_uuid: # Straight into the queue. - update_q.put(new_uuid) + update_q.put((1, new_uuid)) flash("Watch added.") if add_paused: @@ -1127,7 +1144,7 @@ def changedetection_app(config=None, datastore_o=None): uuid = list(datastore.data['watching'].keys()).pop() new_uuid = datastore.clone(uuid) - update_q.put(new_uuid) + update_q.put((5, new_uuid)) flash('Cloned.') return redirect(url_for('index')) @@ -1148,7 +1165,7 @@ def changedetection_app(config=None, datastore_o=None): if uuid: if uuid not in running_uuids: - update_q.put(uuid) + update_q.put((1, uuid)) i = 1 elif tag != None: @@ -1156,7 +1173,7 @@ def changedetection_app(config=None, datastore_o=None): for watch_uuid, watch in datastore.data['watching'].items(): if (tag != None and tag in watch['tag']): if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']: - update_q.put(watch_uuid) + update_q.put((1, watch_uuid)) i += 1 else: @@ -1164,7 +1181,7 @@ def changedetection_app(config=None, datastore_o=None): for watch_uuid, watch in datastore.data['watching'].items(): if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']: - update_q.put(watch_uuid) + update_q.put((1, watch_uuid)) i += 1 flash("{} watches are queued for rechecking.".format(i)) return redirect(url_for('index', tag=tag)) @@ -1266,7 +1283,6 @@ def notification_runner(): global notification_debug_log from datetime import datetime import json - while not app.config.exit.is_set(): try: # At the moment only one thread runs (single runner) @@ -1368,14 +1384,14 @@ def ticker_thread_check_time_launch_checks(): seconds_since_last_recheck = now - watch['last_checked'] if seconds_since_last_recheck >= (threshold + watch.jitter_seconds) and seconds_since_last_recheck >= recheck_time_minimum_seconds: - if not uuid in running_uuids and uuid not in update_q.queue: - print("Queued watch UUID {} last checked at {} queued at {:0.2f} jitter {:0.2f}s, {:0.2f}s since last checked".format(uuid, + if not uuid in running_uuids and uuid not in [q_uuid for p,q_uuid in update_q.queue]: + print("> Queued watch UUID {} last checked at {} queued at {:0.2f} jitter {:0.2f}s, {:0.2f}s since last checked".format(uuid, watch['last_checked'], now, watch.jitter_seconds, now - watch['last_checked'])) # Into the queue with you - update_q.put(uuid) + update_q.put((5, uuid)) # Reset for next time watch.jitter_seconds = 0 diff --git a/changedetectionio/api/api_v1.py b/changedetectionio/api/api_v1.py index d61e93c0..a432bc67 100644 --- a/changedetectionio/api/api_v1.py +++ b/changedetectionio/api/api_v1.py @@ -24,7 +24,7 @@ class Watch(Resource): abort(404, message='No watch exists with the UUID of {}'.format(uuid)) if request.args.get('recheck'): - self.update_q.put(uuid) + self.update_q.put((1, uuid)) return "OK", 200 # Return without history, get that via another API call @@ -100,7 +100,7 @@ class CreateWatch(Resource): extras = {'title': json_data['title'].strip()} if json_data.get('title') else {} new_uuid = self.datastore.add_watch(url=json_data['url'].strip(), tag=tag, extras=extras) - self.update_q.put(new_uuid) + self.update_q.put((1, new_uuid)) return {'uuid': new_uuid}, 201 # Return concise list of available watches and some very basic info @@ -113,12 +113,12 @@ class CreateWatch(Resource): list[k] = {'url': v['url'], 'title': v['title'], 'last_checked': v['last_checked'], - 'last_changed': v['last_changed'], + 'last_changed': v.last_changed, 'last_error': v['last_error']} if request.args.get('recheck_all'): for uuid in self.datastore.data['watching'].keys(): - self.update_q.put(uuid) + self.update_q.put((1, uuid)) return {'status': "OK"}, 200 return list, 200 diff --git a/changedetectionio/changedetection.py b/changedetectionio/changedetection.py index 2adf5ffc..32c21ac4 100755 --- a/changedetectionio/changedetection.py +++ b/changedetectionio/changedetection.py @@ -4,6 +4,7 @@ import getopt import os +import signal import sys import eventlet @@ -11,7 +12,21 @@ import eventlet.wsgi from . import store, changedetection_app, content_fetcher from . import __version__ +# Only global so we can access it in the signal handler +datastore = None +app = None + +def sigterm_handler(_signo, _stack_frame): + global app + global datastore +# app.config.exit.set() + print('Shutdown: Got SIGTERM, DB saved to disk') + datastore.sync_to_json() +# raise SystemExit + def main(): + global datastore + global app ssl_mode = False host = '' port = os.environ.get('PORT') or 5000 @@ -35,11 +50,6 @@ def main(): create_datastore_dir = False for opt, arg in opts: - # if opt == '--clear-all-history': - # Remove history, the actual files you need to delete manually. - # for uuid, watch in datastore.data['watching'].items(): - # watch.update({'history': {}, 'last_checked': 0, 'last_changed': 0, 'previous_md5': None}) - if opt == '-s': ssl_mode = True @@ -72,9 +82,12 @@ def main(): "Or use the -C parameter to create the directory.".format(app_config['datastore_path']), file=sys.stderr) sys.exit(2) + datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path'], version_tag=__version__) app = changedetection_app(app_config, datastore) + signal.signal(signal.SIGTERM, sigterm_handler) + # Go into cleanup mode if do_cleanup: datastore.remove_unused_snapshots() @@ -111,4 +124,3 @@ def main(): else: eventlet.wsgi.server(eventlet.listen((host, int(port))), app) - diff --git a/changedetectionio/content_fetcher.py b/changedetectionio/content_fetcher.py index 90988179..49baf72f 100644 --- a/changedetectionio/content_fetcher.py +++ b/changedetectionio/content_fetcher.py @@ -6,38 +6,64 @@ import requests import time import sys + +class Non200ErrorCodeReceived(Exception): + def __init__(self, status_code, url, screenshot=None, xpath_data=None, page_html=None): + # Set this so we can use it in other parts of the app + self.status_code = status_code + self.url = url + self.screenshot = screenshot + self.xpath_data = xpath_data + self.page_text = None + + if page_html: + from changedetectionio import html_tools + self.page_text = html_tools.html_to_text(page_html) + return + + +class JSActionExceptions(Exception): + def __init__(self, status_code, url, screenshot, message=''): + self.status_code = status_code + self.url = url + self.screenshot = screenshot + self.message = message + return + class PageUnloadable(Exception): - def __init__(self, status_code, url): + def __init__(self, status_code, url, screenshot=False, message=False): # Set this so we can use it in other parts of the app self.status_code = status_code self.url = url + self.screenshot = screenshot + self.message = message return - pass class EmptyReply(Exception): - def __init__(self, status_code, url): + def __init__(self, status_code, url, screenshot=None): # Set this so we can use it in other parts of the app self.status_code = status_code self.url = url + self.screenshot = screenshot return - pass class ScreenshotUnavailable(Exception): - def __init__(self, status_code, url): + def __init__(self, status_code, url, page_html=None): # Set this so we can use it in other parts of the app self.status_code = status_code self.url = url + if page_html: + from html_tools import html_to_text + self.page_text = html_to_text(page_html) return - pass class ReplyWithContentButNoText(Exception): - def __init__(self, status_code, url): + def __init__(self, status_code, url, screenshot=None): # Set this so we can use it in other parts of the app self.status_code = status_code self.url = url + self.screenshot = screenshot return - pass - class Fetcher(): error = None @@ -180,7 +206,7 @@ class Fetcher(): system_https_proxy = os.getenv('HTTPS_PROXY') # Time ONTOP of the system defined env minimum time - render_extract_delay=0 + render_extract_delay = 0 @abstractmethod def get_error(self): @@ -319,40 +345,53 @@ class base_html_playwright(Fetcher): with page.expect_navigation(): response = page.goto(url, wait_until='load') - if self.webdriver_js_execute_code is not None: - page.evaluate(self.webdriver_js_execute_code) except playwright._impl._api_types.TimeoutError as e: context.close() browser.close() # This can be ok, we will try to grab what we could retrieve pass + except Exception as e: - print ("other exception when page.goto") - print (str(e)) + print("other exception when page.goto") + print(str(e)) context.close() browser.close() - raise PageUnloadable(url=url, status_code=None) + raise PageUnloadable(url=url, status_code=None, message=e.message) if response is None: context.close() browser.close() - print ("response object was none") + print("response object was none") raise EmptyReply(url=url, status_code=None) # Bug 2(?) Set the viewport size AFTER loading the page - page.set_viewport_size({"width": 1280, "height": 1024}) + page.set_viewport_size({"width": 1280, "height": 1024}) extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay time.sleep(extra_wait) + + if self.webdriver_js_execute_code is not None: + try: + page.evaluate(self.webdriver_js_execute_code) + except Exception as e: + # Is it possible to get a screenshot? + error_screenshot = False + try: + page.screenshot(type='jpeg', + clip={'x': 1.0, 'y': 1.0, 'width': 1280, 'height': 1024}, + quality=1) + + # The actual screenshot + error_screenshot = page.screenshot(type='jpeg', + full_page=True, + quality=int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72))) + except Exception as s: + pass + + raise JSActionExceptions(status_code=response.status, screenshot=error_screenshot, message=str(e), url=url) + self.content = page.content() self.status_code = response.status - - if len(self.content.strip()) == 0: - context.close() - browser.close() - print ("Content was empty") - raise EmptyReply(url=url, status_code=None) - self.headers = response.all_headers() if current_css_filter is not None: @@ -379,9 +418,17 @@ class base_html_playwright(Fetcher): browser.close() raise ScreenshotUnavailable(url=url, status_code=None) + if len(self.content.strip()) == 0: + context.close() + browser.close() + print("Content was empty") + raise EmptyReply(url=url, status_code=None, screenshot=self.screenshot) + context.close() browser.close() + if not ignore_status_codes and self.status_code!=200: + raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, page_html=self.content, screenshot=self.screenshot) class base_html_webdriver(Fetcher): if os.getenv("WEBDRIVER_URL"): @@ -509,7 +556,7 @@ class html_requests(Fetcher): ignore_status_codes=False, current_css_filter=None): - proxies={} + proxies = {} # Allows override the proxy on a per-request basis if self.proxy_override: @@ -537,10 +584,14 @@ class html_requests(Fetcher): if encoding: r.encoding = encoding + if not r.content or not len(r.content): + raise EmptyReply(url=url, status_code=r.status_code) + # @todo test this # @todo maybe you really want to test zero-byte return pages? - if (not ignore_status_codes and not r) or not r.content or not len(r.content): - raise EmptyReply(url=url, status_code=r.status_code) + if r.status_code != 200 and not ignore_status_codes: + # maybe check with content works? + raise Non200ErrorCodeReceived(url=url, status_code=r.status_code, page_html=r.text) self.status_code = r.status_code self.content = r.text diff --git a/changedetectionio/fetch_site_status.py b/changedetectionio/fetch_site_status.py index d4f618b0..b9d057ae 100644 --- a/changedetectionio/fetch_site_status.py +++ b/changedetectionio/fetch_site_status.py @@ -14,6 +14,8 @@ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) # Some common stuff here that can be moved to a base class # (set_proxy_from_list) class perform_site_check(): + screenshot = None + xpath_data = None def __init__(self, *args, datastore, **kwargs): super().__init__(*args, **kwargs) @@ -95,7 +97,7 @@ class perform_site_check(): url = self.datastore.get_val(uuid, 'url') request_body = self.datastore.get_val(uuid, 'body') request_method = self.datastore.get_val(uuid, 'method') - ignore_status_code = self.datastore.get_val(uuid, 'ignore_status_codes') + ignore_status_codes = self.datastore.data['watching'][uuid].get('ignore_status_codes', False) # source: support is_source = False @@ -125,9 +127,12 @@ class perform_site_check(): if watch['webdriver_js_execute_code'] is not None and watch['webdriver_js_execute_code'].strip(): fetcher.webdriver_js_execute_code = watch['webdriver_js_execute_code'] - fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_code, watch['css_filter']) + fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, watch['css_filter']) fetcher.quit() + self.screenshot = fetcher.screenshot + self.xpath_data = fetcher.xpath_data + # Fetching complete, now filters # @todo move to class / maybe inside of fetcher abstract base? @@ -211,7 +216,7 @@ class perform_site_check(): # Treat pages with no renderable text content as a change? No by default empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False) if not is_json and not empty_pages_are_a_change and len(stripped_text_from_html.strip()) == 0: - raise content_fetcher.ReplyWithContentButNoText(url=url, status_code=200) + raise content_fetcher.ReplyWithContentButNoText(url=url, status_code=fetcher.get_last_status_code(), screenshot=screenshot) # We rely on the actual text in the html output.. many sites have random script vars etc, # in the future we'll implement other mechanisms. @@ -328,4 +333,4 @@ class perform_site_check(): if not watch.get('previous_md5'): watch['previous_md5'] = fetched_md5 - return changed_detected, update_obj, text_content_before_ignored_filter, fetcher.screenshot, fetcher.xpath_data + return changed_detected, update_obj, text_content_before_ignored_filter diff --git a/changedetectionio/forms.py b/changedetectionio/forms.py index f008ea8d..ca70355a 100644 --- a/changedetectionio/forms.py +++ b/changedetectionio/forms.py @@ -363,7 +363,7 @@ class watchForm(commonSettingsForm): webdriver_js_execute_code = TextAreaField('Execute JavaScript before change detection', render_kw={"rows": "5"}, validators=[validators.Optional()]) save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"}) - save_and_preview_button = SubmitField('Save & Preview', render_kw={"class": "pure-button pure-button-primary"}) + proxy = RadioField('Proxy') filter_failure_notification_send = BooleanField( 'Send a notification when the filter can no longer be found on the page', default=False) @@ -397,7 +397,6 @@ class globalSettingsApplicationForm(commonSettingsForm): global_subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)]) global_ignore_text = StringListField('Ignore Text', [ValidateListRegex()]) ignore_whitespace = BooleanField('Ignore whitespace') - real_browser_save_screenshot = BooleanField('Save last screenshot when using Chrome?') removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"}) empty_pages_are_a_change = BooleanField('Treat empty pages as a change?', default=False) render_anchor_tag_content = BooleanField('Render anchor tag content', default=False) diff --git a/changedetectionio/model/App.py b/changedetectionio/model/App.py index 6e74d483..c5f0e977 100644 --- a/changedetectionio/model/App.py +++ b/changedetectionio/model/App.py @@ -42,7 +42,6 @@ class model(dict): 'notification_title': default_notification_title, 'notification_body': default_notification_body, 'notification_format': default_notification_format, - 'real_browser_save_screenshot': True, 'schema_version' : 0, 'webdriver_delay': None # Extra delay in seconds before extracting text } diff --git a/changedetectionio/model/Watch.py b/changedetectionio/model/Watch.py index 85a993a4..7d1481b9 100644 --- a/changedetectionio/model/Watch.py +++ b/changedetectionio/model/Watch.py @@ -19,7 +19,6 @@ class model(dict): 'url': None, 'tag': None, 'last_checked': 0, - 'last_changed': 0, 'paused': False, 'last_viewed': 0, # history key value of the last viewed via the [diff] link #'newest_history_key': 0, @@ -36,7 +35,9 @@ class model(dict): 'notification_title': default_notification_title, 'notification_body': default_notification_body, 'notification_format': default_notification_format, + 'notification_muted': False, 'css_filter': '', + 'last_error': False, 'extract_text': [], # Extract text by regex after filters 'subtractive_selectors': [], 'trigger_text': [], # List of text or regex to wait for until a change is detected @@ -59,11 +60,11 @@ class model(dict): jitter_seconds = 0 def __init__(self, *arg, **kw): - import uuid + self.update(self.__base_config) self.__datastore_path = kw['datastore_path'] - self['uuid'] = str(uuid.uuid4()) + self['uuid'] = str(uuid_builder.uuid4()) del kw['datastore_path'] @@ -71,7 +72,10 @@ class model(dict): self.update(kw['default']) del kw['default'] - # goes at the end so we update the default object with the initialiser + # Be sure the cached timestamp is ready + bump = self.history + + # Goes at the end so we update the default object with the initialiser super(model, self).__init__(*arg, **kw) @property @@ -81,6 +85,28 @@ class model(dict): return False + def ensure_data_dir_exists(self): + target_path = os.path.join(self.__datastore_path, self['uuid']) + if not os.path.isdir(target_path): + print ("> Creating data dir {}".format(target_path)) + os.mkdir(target_path) + + @property + def label(self): + # Used for sorting + if self['title']: + return self['title'] + return self['url'] + + @property + def last_changed(self): + # last_changed will be the newest snapshot, but when we have just one snapshot, it should be 0 + if self.__history_n <= 1: + return 0 + if self.__newest_history_key: + return int(self.__newest_history_key) + return 0 + @property def history_n(self): return self.__history_n @@ -123,19 +149,15 @@ class model(dict): bump = self.history return self.__newest_history_key - # Save some text file to the appropriate path and bump the history # result_obj from fetch_site_status.run() def save_history_text(self, contents, timestamp): import uuid - from os import mkdir, path, unlink import logging output_path = "{}/{}".format(self.__datastore_path, self['uuid']) - # Incase the operator deleted it, check and create. - if not os.path.isdir(output_path): - mkdir(output_path) + self.ensure_data_dir_exists() snapshot_fname = "{}/{}.stripped.txt".format(output_path, uuid.uuid4()) logging.debug("Saving history text {}".format(snapshot_fname)) @@ -206,7 +228,7 @@ class model(dict): return seconds # Iterate over all history texts and see if something new exists - def lines_contain_something_unique_compared_to_history(self, lines=[]): + def lines_contain_something_unique_compared_to_history(self, lines: list): local_lines = set([l.decode('utf-8').strip().lower() for l in lines]) # Compare each lines (set) against each history text file (set) looking for something new.. @@ -244,3 +266,50 @@ class model(dict): return diff_types + def get_screenshot(self): + fname = os.path.join(self.__datastore_path, self['uuid'], "last-screenshot.png") + if os.path.isfile(fname): + return fname + + return False + + def __get_file_ctime(self, filename): + fname = os.path.join(self.__datastore_path, self['uuid'], filename) + if os.path.isfile(fname): + return int(os.path.getmtime(fname)) + return False + + @property + def error_text_ctime(self): + return self.__get_file_ctime('last-error.txt') + + @property + def snapshot_text_ctime(self): + if self.history_n==0: + return False + + timestamp = list(self.history.keys())[-1] + return int(timestamp) + + @property + def snapshot_screenshot_ctime(self): + return self.__get_file_ctime('last-screenshot.png') + + @property + def snapshot_error_screenshot_ctime(self): + return self.__get_file_ctime('last-error-screenshot.png') + + def get_error_text(self): + """Return the text saved from a previous request that resulted in a non-200 error""" + fname = os.path.join(self.__datastore_path, self['uuid'], "last-error.txt") + if os.path.isfile(fname): + with open(fname, 'r') as f: + return f.read() + return False + + def get_error_snapshot(self): + """Return path to the screenshot that resulted in a non-200 error""" + fname = os.path.join(self.__datastore_path, self['uuid'], "last-error-screenshot.png") + if os.path.isfile(fname): + return fname + return False diff --git a/changedetectionio/run_all_tests.sh b/changedetectionio/run_all_tests.sh index c2bbf9aa..ce428f12 100755 --- a/changedetectionio/run_all_tests.sh +++ b/changedetectionio/run_all_tests.sh @@ -32,16 +32,20 @@ docker run -d --name $$-test_selenium -p 4444:4444 --rm --shm-size="2g" seleni sleep 5 export WEBDRIVER_URL=http://localhost:4444/wd/hub pytest tests/fetchers/test_content.py +pytest tests/test_errorhandling.py unset WEBDRIVER_URL docker kill $$-test_selenium echo "TESTING WEBDRIVER FETCH > PLAYWRIGHT/BROWSERLESS..." # Not all platforms support playwright (not ARM/rPI), so it's not packaged in requirements.txt -pip3 install playwright~=1.22 +pip3 install playwright~=1.24 docker run -d --name $$-test_browserless -e "DEFAULT_LAUNCH_ARGS=[\"--window-size=1920,1080\"]" --rm -p 3000:3000 --shm-size="2g" browserless/chrome:1.53-chrome-stable # takes a while to spin up sleep 5 export PLAYWRIGHT_DRIVER_URL=ws://127.0.0.1:3000 pytest tests/fetchers/test_content.py +pytest tests/test_errorhandling.py +pytest tests/visualselector/test_fetch_data.py + unset PLAYWRIGHT_DRIVER_URL docker kill $$-test_browserless \ No newline at end of file diff --git a/changedetectionio/static/images/bell-off.svg b/changedetectionio/static/images/bell-off.svg new file mode 100644 index 00000000..0bbe2765 --- /dev/null +++ b/changedetectionio/static/images/bell-off.svg @@ -0,0 +1,42 @@ + + diff --git a/changedetectionio/static/js/diff-overview.js b/changedetectionio/static/js/diff-overview.js index f39b9906..fa94316f 100644 --- a/changedetectionio/static/js/diff-overview.js +++ b/changedetectionio/static/js/diff-overview.js @@ -10,7 +10,13 @@ $(document).ready(function () { if (hash_name === '#screenshot') { $("img#screenshot-img").attr('src', screenshot_url); $("#settings").hide(); - } else { + } else if (hash_name === '#error-screenshot') { + $("img#error-screenshot-img").attr('src', error_screenshot_url); + $("#settings").hide(); + } + + + else { $("#settings").show(); } } diff --git a/changedetectionio/static/js/tabs.js b/changedetectionio/static/js/tabs.js index f600ef47..46251382 100644 --- a/changedetectionio/static/js/tabs.js +++ b/changedetectionio/static/js/tabs.js @@ -1,51 +1,44 @@ // Rewrite this is a plugin.. is all this JS really 'worth it?' - -if(!window.location.hash) { - var tab=document.querySelectorAll("#default-tab a"); - tab[0].click(); -} - -window.addEventListener('hashchange', function() { - var tabs = document.getElementsByClassName('active'); - while (tabs[0]) { - tabs[0].classList.remove('active') - } - set_active_tab(); +window.addEventListener('hashchange', function () { + var tabs = document.getElementsByClassName('active'); + while (tabs[0]) { + tabs[0].classList.remove('active') + } + set_active_tab(); }, false); -var has_errors=document.querySelectorAll(".messages .error"); +var has_errors = document.querySelectorAll(".messages .error"); if (!has_errors.length) { - if (document.location.hash == "" ) { - document.location.hash = "#general"; - document.getElementById("default-tab").className = "active"; + if (document.location.hash == "") { + document.querySelector(".tabs ul li:first-child a").click(); } else { set_active_tab(); } } else { - focus_error_tab(); + focus_error_tab(); } function set_active_tab() { - var tab=document.querySelectorAll("a[href='"+location.hash+"']"); - if (tab.length) { - tab[0].parentElement.className="active"; - } + var tab = document.querySelectorAll("a[href='" + location.hash + "']"); + if (tab.length) { + tab[0].parentElement.className = "active"; + } // hash could move the page down window.scrollTo(0, 0); } function focus_error_tab() { - // time to use jquery or vuejs really, - // activate the tab with the error - var tabs = document.querySelectorAll('.tabs li a'),i; + // time to use jquery or vuejs really, + // activate the tab with the error + var tabs = document.querySelectorAll('.tabs li a'), i; for (i = 0; i < tabs.length; ++i) { - var tab_name=tabs[i].hash.replace('#',''); - var pane_errors=document.querySelectorAll('#'+tab_name+' .error') - if (pane_errors.length) { - document.location.hash = '#'+tab_name; - return true; - } + var tab_name = tabs[i].hash.replace('#', ''); + var pane_errors = document.querySelectorAll('#' + tab_name + ' .error') + if (pane_errors.length) { + document.location.hash = '#' + tab_name; + return true; + } } return false; } diff --git a/changedetectionio/static/styles/parts/_arrows.scss b/changedetectionio/static/styles/parts/_arrows.scss new file mode 100644 index 00000000..161330d5 --- /dev/null +++ b/changedetectionio/static/styles/parts/_arrows.scss @@ -0,0 +1,26 @@ +.arrow { + border: solid #1b98f8; + border-width: 0 2px 2px 0; + display: inline-block; + padding: 3px; + + &.right { + transform: rotate(-45deg); + -webkit-transform: rotate(-45deg); + } + + &.left { + transform: rotate(135deg); + -webkit-transform: rotate(135deg); + } + + &.up, &.asc { + transform: rotate(-135deg); + -webkit-transform: rotate(-135deg); + } + + &.down, &.desc { + transform: rotate(45deg); + -webkit-transform: rotate(45deg); + } +} diff --git a/changedetectionio/static/styles/styles.css b/changedetectionio/static/styles/styles.css index f3878fb6..61b1fbb1 100644 --- a/changedetectionio/static/styles/styles.css +++ b/changedetectionio/static/styles/styles.css @@ -4,6 +4,24 @@ * nvm use v14.18.1 && npm install && npm run build * or npm run watch */ +.arrow { + border: solid #1b98f8; + border-width: 0 2px 2px 0; + display: inline-block; + padding: 3px; } + .arrow.right { + transform: rotate(-45deg); + -webkit-transform: rotate(-45deg); } + .arrow.left { + transform: rotate(135deg); + -webkit-transform: rotate(135deg); } + .arrow.up, .arrow.asc { + transform: rotate(-135deg); + -webkit-transform: rotate(-135deg); } + .arrow.down, .arrow.desc { + transform: rotate(45deg); + -webkit-transform: rotate(45deg); } + body { color: #333; background: #262626; } @@ -53,6 +71,12 @@ code { white-space: normal; } .watch-table th { white-space: nowrap; } + .watch-table th a { + font-weight: normal; } + .watch-table th a.active { + font-weight: bolder; } + .watch-table th a.inactive .arrow { + display: none; } .watch-table .title-col a[target="_blank"]::after, .watch-table .current-diff-url::after { content: url(); margin: 0 3px 0 5px; } @@ -103,24 +127,6 @@ body:after, body:before { -webkit-clip-path: polygon(100% 0, 0 0, 0 77.5%, 1% 77.4%, 2% 77.1%, 3% 76.6%, 4% 75.9%, 5% 75.05%, 6% 74.05%, 7% 72.95%, 8% 71.75%, 9% 70.55%, 10% 69.3%, 11% 68.05%, 12% 66.9%, 13% 65.8%, 14% 64.8%, 15% 64%, 16% 63.35%, 17% 62.85%, 18% 62.6%, 19% 62.5%, 20% 62.65%, 21% 63%, 22% 63.5%, 23% 64.2%, 24% 65.1%, 25% 66.1%, 26% 67.2%, 27% 68.4%, 28% 69.65%, 29% 70.9%, 30% 72.15%, 31% 73.3%, 32% 74.35%, 33% 75.3%, 34% 76.1%, 35% 76.75%, 36% 77.2%, 37% 77.45%, 38% 77.5%, 39% 77.3%, 40% 76.95%, 41% 76.4%, 42% 75.65%, 43% 74.75%, 44% 73.75%, 45% 72.6%, 46% 71.4%, 47% 70.15%, 48% 68.9%, 49% 67.7%, 50% 66.55%, 51% 65.5%, 52% 64.55%, 53% 63.75%, 54% 63.15%, 55% 62.75%, 56% 62.55%, 57% 62.5%, 58% 62.7%, 59% 63.1%, 60% 63.7%, 61% 64.45%, 62% 65.4%, 63% 66.45%, 64% 67.6%, 65% 68.8%, 66% 70.05%, 67% 71.3%, 68% 72.5%, 69% 73.6%, 70% 74.65%, 71% 75.55%, 72% 76.35%, 73% 76.9%, 74% 77.3%, 75% 77.5%, 76% 77.45%, 77% 77.25%, 78% 76.8%, 79% 76.2%, 80% 75.4%, 81% 74.45%, 82% 73.4%, 83% 72.25%, 84% 71.05%, 85% 69.8%, 86% 68.55%, 87% 67.35%, 88% 66.2%, 89% 65.2%, 90% 64.3%, 91% 63.55%, 92% 63%, 93% 62.65%, 94% 62.5%, 95% 62.55%, 96% 62.8%, 97% 63.3%, 98% 63.9%, 99% 64.75%, 100% 65.7%); clip-path: polygon(100% 0, 0 0, 0 77.5%, 1% 77.4%, 2% 77.1%, 3% 76.6%, 4% 75.9%, 5% 75.05%, 6% 74.05%, 7% 72.95%, 8% 71.75%, 9% 70.55%, 10% 69.3%, 11% 68.05%, 12% 66.9%, 13% 65.8%, 14% 64.8%, 15% 64%, 16% 63.35%, 17% 62.85%, 18% 62.6%, 19% 62.5%, 20% 62.65%, 21% 63%, 22% 63.5%, 23% 64.2%, 24% 65.1%, 25% 66.1%, 26% 67.2%, 27% 68.4%, 28% 69.65%, 29% 70.9%, 30% 72.15%, 31% 73.3%, 32% 74.35%, 33% 75.3%, 34% 76.1%, 35% 76.75%, 36% 77.2%, 37% 77.45%, 38% 77.5%, 39% 77.3%, 40% 76.95%, 41% 76.4%, 42% 75.65%, 43% 74.75%, 44% 73.75%, 45% 72.6%, 46% 71.4%, 47% 70.15%, 48% 68.9%, 49% 67.7%, 50% 66.55%, 51% 65.5%, 52% 64.55%, 53% 63.75%, 54% 63.15%, 55% 62.75%, 56% 62.55%, 57% 62.5%, 58% 62.7%, 59% 63.1%, 60% 63.7%, 61% 64.45%, 62% 65.4%, 63% 66.45%, 64% 67.6%, 65% 68.8%, 66% 70.05%, 67% 71.3%, 68% 72.5%, 69% 73.6%, 70% 74.65%, 71% 75.55%, 72% 76.35%, 73% 76.9%, 74% 77.3%, 75% 77.5%, 76% 77.45%, 77% 77.25%, 78% 76.8%, 79% 76.2%, 80% 75.4%, 81% 74.45%, 82% 73.4%, 83% 72.25%, 84% 71.05%, 85% 69.8%, 86% 68.55%, 87% 67.35%, 88% 66.2%, 89% 65.2%, 90% 64.3%, 91% 63.55%, 92% 63%, 93% 62.65%, 94% 62.5%, 95% 62.55%, 96% 62.8%, 97% 63.3%, 98% 63.9%, 99% 64.75%, 100% 65.7%); } -.arrow { - border: solid black; - border-width: 0 3px 3px 0; - display: inline-block; - padding: 3px; } - .arrow.right { - transform: rotate(-45deg); - -webkit-transform: rotate(-45deg); } - .arrow.left { - transform: rotate(135deg); - -webkit-transform: rotate(135deg); } - .arrow.up { - transform: rotate(-135deg); - -webkit-transform: rotate(-135deg); } - .arrow.down { - transform: rotate(45deg); - -webkit-transform: rotate(45deg); } - .button-small { font-size: 85%; } @@ -271,11 +277,15 @@ footer { #new-version-text a { color: #e07171; } -.paused-state.state-False img { - opacity: 0.2; } - -.paused-state.state-False:hover img { - opacity: 0.8; } +.watch-controls { + /* default */ } + .watch-controls .state-on img { + opacity: 0.8; } + .watch-controls img { + opacity: 0.2; } + .watch-controls img:hover { + transition: opacity 0.3s; + opacity: 0.8; } .monospaced-textarea textarea { width: 100%; @@ -535,3 +545,13 @@ ul { 100% { -webkit-transform: rotate(360deg); transform: rotate(360deg); } } + +.snapshot-age { + padding: 4px; + background-color: #dfdfdf; + border-radius: 3px; + font-weight: bold; + margin-bottom: 4px; } + .snapshot-age.error { + background-color: #ff0000; + color: #fff; } diff --git a/changedetectionio/static/styles/styles.scss b/changedetectionio/static/styles/styles.scss index 761235fe..1801a0e0 100644 --- a/changedetectionio/static/styles/styles.scss +++ b/changedetectionio/static/styles/styles.scss @@ -4,6 +4,8 @@ * nvm use v14.18.1 && npm install && npm run build * or npm run watch */ +@import "parts/_arrows.scss"; + body { color: #333; background: #262626; @@ -68,6 +70,17 @@ code { th { white-space: nowrap; + a { + font-weight: normal; + &.active { + font-weight: bolder; + } + &.inactive { + .arrow { + display: none; + } + } + } } .title-col a[target="_blank"]::after, .current-diff-url::after { @@ -137,29 +150,6 @@ body:after, body:before { clip-path: polygon(100% 0, 0 0, 0 77.5%, 1% 77.4%, 2% 77.1%, 3% 76.6%, 4% 75.9%, 5% 75.05%, 6% 74.05%, 7% 72.95%, 8% 71.75%, 9% 70.55%, 10% 69.3%, 11% 68.05%, 12% 66.9%, 13% 65.8%, 14% 64.8%, 15% 64%, 16% 63.35%, 17% 62.85%, 18% 62.6%, 19% 62.5%, 20% 62.65%, 21% 63%, 22% 63.5%, 23% 64.2%, 24% 65.1%, 25% 66.1%, 26% 67.2%, 27% 68.4%, 28% 69.65%, 29% 70.9%, 30% 72.15%, 31% 73.3%, 32% 74.35%, 33% 75.3%, 34% 76.1%, 35% 76.75%, 36% 77.2%, 37% 77.45%, 38% 77.5%, 39% 77.3%, 40% 76.95%, 41% 76.4%, 42% 75.65%, 43% 74.75%, 44% 73.75%, 45% 72.6%, 46% 71.4%, 47% 70.15%, 48% 68.9%, 49% 67.7%, 50% 66.55%, 51% 65.5%, 52% 64.55%, 53% 63.75%, 54% 63.15%, 55% 62.75%, 56% 62.55%, 57% 62.5%, 58% 62.7%, 59% 63.1%, 60% 63.7%, 61% 64.45%, 62% 65.4%, 63% 66.45%, 64% 67.6%, 65% 68.8%, 66% 70.05%, 67% 71.3%, 68% 72.5%, 69% 73.6%, 70% 74.65%, 71% 75.55%, 72% 76.35%, 73% 76.9%, 74% 77.3%, 75% 77.5%, 76% 77.45%, 77% 77.25%, 78% 76.8%, 79% 76.2%, 80% 75.4%, 81% 74.45%, 82% 73.4%, 83% 72.25%, 84% 71.05%, 85% 69.8%, 86% 68.55%, 87% 67.35%, 88% 66.2%, 89% 65.2%, 90% 64.3%, 91% 63.55%, 92% 63%, 93% 62.65%, 94% 62.5%, 95% 62.55%, 96% 62.8%, 97% 63.3%, 98% 63.9%, 99% 64.75%, 100% 65.7%) } -.arrow { - border: solid black; - border-width: 0 3px 3px 0; - display: inline-block; - padding: 3px; - &.right { - transform: rotate(-45deg); - -webkit-transform: rotate(-45deg); - } - &.left { - transform: rotate(135deg); - -webkit-transform: rotate(135deg); - } - &.up { - transform: rotate(-135deg); - -webkit-transform: rotate(-135deg); - } - &.down { - transform: rotate(45deg); - -webkit-transform: rotate(45deg); - } -} - .button-small { font-size: 85%; } @@ -362,14 +352,25 @@ footer { color: #e07171; } -.paused-state { - &.state-False img { +.watch-controls { + .state-on { + img { + opacity: 0.8; + } + } + + /* default */ + img { opacity: 0.2; } - &.state-False:hover img { - opacity: 0.8; + img { + &:hover { + transition: opacity 0.3s; + opacity: 0.8; + } } + } .monospaced-textarea { @@ -501,6 +502,7 @@ and also iPads specifically. vertical-align: middle; } } + .last-checked::before { color: #555; content: "Last Checked "; @@ -760,3 +762,15 @@ ul { } } +.snapshot-age { + padding: 4px; + background-color: #dfdfdf; + border-radius: 3px; + font-weight: bold; + margin-bottom: 4px; + &.error { + background-color: #ff0000; + color: #fff; + } +} + diff --git a/changedetectionio/store.py b/changedetectionio/store.py index c7e8ca7f..a9af67a3 100644 --- a/changedetectionio/store.py +++ b/changedetectionio/store.py @@ -8,7 +8,7 @@ import threading import time import uuid as uuid_builder from copy import deepcopy -from os import mkdir, path, unlink +from os import path, unlink from threading import Lock import re import requests @@ -254,7 +254,6 @@ class ChangeDetectionStore: self.__data['watching'][uuid].update( {'last_checked': 0, - 'last_changed': 0, 'last_viewed': 0, 'previous_md5': False, 'last_notification_error': False, @@ -325,25 +324,12 @@ class ChangeDetectionStore: new_watch.update(apply_extras) self.__data['watching'][new_uuid]=new_watch - # Get the directory ready - output_path = "{}/{}".format(self.datastore_path, new_uuid) - try: - mkdir(output_path) - except FileExistsError: - print(output_path, "already exists.") + self.__data['watching'][new_uuid].ensure_data_dir_exists() if write_to_disk_now: self.sync_to_json() return new_uuid - def get_screenshot(self, watch_uuid): - output_path = "{}/{}".format(self.datastore_path, watch_uuid) - fname = "{}/last-screenshot.png".format(output_path) - if path.isfile(fname): - return fname - - return False - def visualselector_data_is_ready(self, watch_uuid): output_path = "{}/{}".format(self.datastore_path, watch_uuid) screenshot_filename = "{}/last-screenshot.png".format(output_path) @@ -354,17 +340,34 @@ class ChangeDetectionStore: return False # Save as PNG, PNG is larger but better for doing visual diff in the future - def save_screenshot(self, watch_uuid, screenshot: bytes): - output_path = "{}/{}".format(self.datastore_path, watch_uuid) - fname = "{}/last-screenshot.png".format(output_path) - with open(fname, 'wb') as f: + def save_screenshot(self, watch_uuid, screenshot: bytes, as_error=False): + + if as_error: + target_path = os.path.join(self.datastore_path, watch_uuid, "last-error-screenshot.png") + else: + target_path = os.path.join(self.datastore_path, watch_uuid, "last-screenshot.png") + + self.data['watching'][watch_uuid].ensure_data_dir_exists() + + with open(target_path, 'wb') as f: f.write(screenshot) f.close() - def save_xpath_data(self, watch_uuid, data): - output_path = "{}/{}".format(self.datastore_path, watch_uuid) - fname = "{}/elements.json".format(output_path) - with open(fname, 'w') as f: + def save_error_text(self, watch_uuid, contents): + + target_path = os.path.join(self.datastore_path, watch_uuid, "last-error.txt") + + with open(target_path, 'w') as f: + f.write(contents) + + def save_xpath_data(self, watch_uuid, data, as_error=False): + + if as_error: + target_path = os.path.join(self.datastore_path, watch_uuid, "elements-error.json") + else: + target_path = os.path.join(self.datastore_path, watch_uuid, "elements.json") + + with open(target_path, 'w') as f: f.write(json.dumps(data)) f.close() @@ -521,14 +524,26 @@ class ChangeDetectionStore: # We incorrectly stored last_changed when there was not a change, and then confused the output list table def update_3(self): + # see https://github.com/dgtlmoon/changedetection.io/pull/835 + return + + # `last_changed` not needed, we pull that information from the history.txt index + def update_4(self): for uuid, watch in self.data['watching'].items(): # Be sure it's recalculated p = watch.history if watch.history_n < 2: watch['last_changed'] = 0 + try: + # Remove it from the struct + del(watch['last_changed']) + except: + continue + return + # Generate a previous.txt for all watches that do not have one and contain history - def update_4(self): + def update_5(self): for uuid, watch in self.data['watching'].items(): # Make sure we actually have history if (watch.history_n == 0): @@ -543,5 +558,4 @@ class ChangeDetectionStore: # Fill it with the latest history latest_file_name = watch.history[watch.newest_history_key] with open(latest_file_name, "rb") as f2: - f.write(f2.read()) - \ No newline at end of file + f.write(f2.read()) \ No newline at end of file diff --git a/changedetectionio/templates/_pagination.jinja b/changedetectionio/templates/_pagination.jinja new file mode 100644 index 00000000..0dce3d8e --- /dev/null +++ b/changedetectionio/templates/_pagination.jinja @@ -0,0 +1,7 @@ +{% macro pagination(sorted_watches, total_per_page, current_page) %} + {{ sorted_watches|length }} + + {% for row in sorted_watches|batch(total_per_page, ' ') %} + {{ loop.index}} + {% endfor %} +{% endmacro %} diff --git a/changedetectionio/templates/diff.html b/changedetectionio/templates/diff.html index 343e3d7a..63cf7b6f 100644 --- a/changedetectionio/templates/diff.html +++ b/changedetectionio/templates/diff.html @@ -3,6 +3,9 @@ {% block content %} @@ -43,15 +46,31 @@
+ {{ last_error_text }} ++
# | - | - | Last Checked | -Last Changed | + {% set link_order = "desc" if sort_order else "asc" %} + {% set arrow_span = "" %} +Website | +Last Checked | +Last Changed | |
---|---|---|---|---|---|---|---|---|
{{ loop.index }} | -- + | + + + | {{watch.title if watch.title is not none and watch.title|length > 0 else watch.url}} @@ -88,7 +101,7 @@ {% if watch.history_n >= 2 %} Diff {% else %} - {% if watch.history_n == 1 %} + {% if watch.history_n == 1 or (watch.history_n ==0 and watch.error_text_ctime )%} Preview {% endif %} {% endif %} @@ -111,6 +124,10 @@ + {# WIP for pagination, disabled for now + {{ pagination(sorted_watches,3, pagination_page) }} + #} + {% endblock %} diff --git a/changedetectionio/tests/fetchers/test_content.py b/changedetectionio/tests/fetchers/test_content.py index 02c2c026..d50d8210 100644 --- a/changedetectionio/tests/fetchers/test_content.py +++ b/changedetectionio/tests/fetchers/test_content.py @@ -2,7 +2,7 @@ import time from flask import url_for -from ..util import live_server_setup +from ..util import live_server_setup, wait_for_all_checks import logging @@ -29,14 +29,8 @@ def test_fetch_webdriver_content(client, live_server): assert b"1 Imported" in res.data time.sleep(3) - attempt = 0 - while attempt < 20: - res = client.get(url_for("index")) - if not b'Checking now' in res.data: - break - logging.getLogger().info("Waiting for check to not say 'Checking now'..") - time.sleep(3) - attempt += 1 + + wait_for_all_checks(client) res = client.get( diff --git a/changedetectionio/tests/test_access_control.py b/changedetectionio/tests/test_access_control.py index 8b36923e..d84ed577 100644 --- a/changedetectionio/tests/test_access_control.py +++ b/changedetectionio/tests/test_access_control.py @@ -19,7 +19,6 @@ def test_check_access_control(app, client): ) assert b"Password protection enabled." in res.data - assert b"LOG OUT" not in res.data # Check we hit the login res = c.get(url_for("index"), follow_redirects=True) @@ -38,7 +37,40 @@ def test_check_access_control(app, client): follow_redirects=True ) + # Yes we are correctly logged in + assert b"LOG OUT" in res.data + + # 598 - Password should be set and not accidently removed + res = c.post( + url_for("settings_page"), + data={ + "requests-time_between_check-minutes": 180, + 'application-fetch_backend': "html_requests"}, + follow_redirects=True + ) + + res = c.get(url_for("logout"), + follow_redirects=True) + + res = c.get(url_for("settings_page"), + follow_redirects=True) + + + assert b"Login" in res.data + + res = c.get(url_for("login")) + assert b"Login" in res.data + + + res = c.post( + url_for("login"), + data={"password": "foobar"}, + follow_redirects=True + ) + + # Yes we are correctly logged in assert b"LOG OUT" in res.data + res = c.get(url_for("settings_page")) # Menu should be available now diff --git a/changedetectionio/tests/test_backend.py b/changedetectionio/tests/test_backend.py index eaf517d3..151c0e08 100644 --- a/changedetectionio/tests/test_backend.py +++ b/changedetectionio/tests/test_backend.py @@ -90,6 +90,14 @@ def test_check_basic_change_detection_functionality(client, live_server): res = client.get(url_for("diff_history_page", uuid="first")) assert b'Compare newest' in res.data + # Check the [preview] pulls the right one + res = client.get( + url_for("preview_page", uuid="first"), + follow_redirects=True + ) + assert b'which has this one new line' in res.data + assert b'Which is across multiple lines' not in res.data + time.sleep(2) # Do this a few times.. ensures we dont accidently set the status diff --git a/changedetectionio/tests/test_errorhandling.py b/changedetectionio/tests/test_errorhandling.py index 0a0d5d6c..a8b29863 100644 --- a/changedetectionio/tests/test_errorhandling.py +++ b/changedetectionio/tests/test_errorhandling.py @@ -11,16 +11,17 @@ def test_setup(live_server): live_server_setup(live_server) -def test_error_handler(client, live_server): +def _runner_test_http_errors(client, live_server, http_code, expected_text): + with open("test-datastore/endpoint-content.txt", "w") as f: + f.write("Now you going to get a {} error code\n".format(http_code)) - # Give the endpoint time to spin up - time.sleep(1) # Add our URL to the import page test_url = url_for('test_endpoint', - status_code=403, + status_code=http_code, _external=True) + res = client.post( url_for("import_page"), data={"urls": test_url}, @@ -29,15 +30,38 @@ def test_error_handler(client, live_server): assert b"1 Imported" in res.data # Give the thread time to pick it up - time.sleep(3) + time.sleep(2) res = client.get(url_for("index")) + # no change assert b'unviewed' not in res.data - assert b'Status Code 403' in res.data - assert bytes("just now".encode('utf-8')) in res.data + assert bytes(expected_text.encode('utf-8')) in res.data + + + # Error viewing tabs should appear + res = client.get( + url_for("preview_page", uuid="first"), + follow_redirects=True + ) + + assert b'Error Text' in res.data + + # 'Error Screenshot' only when in playwright mode + #assert b'Error Screenshot' in res.data + + + res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) + assert b'Deleted' in res.data + + +def test_http_error_handler(client, live_server): + _runner_test_http_errors(client, live_server, 403, 'Access denied') + _runner_test_http_errors(client, live_server, 404, 'Page not found') + _runner_test_http_errors(client, live_server, 500, '(Internal server Error) received') + _runner_test_http_errors(client, live_server, 400, 'Error - Request returned a HTTP error code 400') # Just to be sure error text is properly handled -def test_error_text_handler(client, live_server): +def test_DNS_errors(client, live_server): # Give the endpoint time to spin up time.sleep(1) @@ -54,5 +78,6 @@ def test_error_text_handler(client, live_server): res = client.get(url_for("index")) assert b'Name or service not known' in res.data + # Should always record that we tried assert bytes("just now".encode('utf-8')) in res.data diff --git a/changedetectionio/tests/test_ignorestatuscode.py b/changedetectionio/tests/test_ignorestatuscode.py index 335f3655..aeafcdaa 100644 --- a/changedetectionio/tests/test_ignorestatuscode.py +++ b/changedetectionio/tests/test_ignorestatuscode.py @@ -137,54 +137,3 @@ def test_403_page_check_works_with_ignore_status_code(client, live_server): res = client.get(url_for("index")) assert b'unviewed' in res.data - -# Tests the whole stack works with staus codes ignored -def test_403_page_check_fails_without_ignore_status_code(client, live_server): - sleep_time_for_fetch_thread = 3 - - set_original_response() - - # Give the endpoint time to spin up - time.sleep(1) - - # Add our URL to the import page - test_url = url_for('test_endpoint', status_code=403, _external=True) - res = client.post( - url_for("import_page"), - data={"urls": test_url}, - follow_redirects=True - ) - assert b"1 Imported" in res.data - - # Trigger a check - client.get(url_for("form_watch_checknow"), follow_redirects=True) - - # Give the thread time to pick it up - time.sleep(sleep_time_for_fetch_thread) - - # Goto the edit page, check our ignore option - # Add our URL to the import page - res = client.post( - url_for("edit_page", uuid="first"), - data={"url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, - follow_redirects=True - ) - assert b"Updated watch." in res.data - - # Trigger a check - client.get(url_for("form_watch_checknow"), follow_redirects=True) - - # Give the thread time to pick it up - time.sleep(sleep_time_for_fetch_thread) - # Make a change - set_some_changed_response() - - # Trigger a check - client.get(url_for("form_watch_checknow"), follow_redirects=True) - # Give the thread time to pick it up - time.sleep(sleep_time_for_fetch_thread) - - # It should have 'unviewed' still - # Because it should be looking at only that 'sametext' id - res = client.get(url_for("index")) - assert b'Status Code 403' in res.data diff --git a/changedetectionio/tests/util.py b/changedetectionio/tests/util.py index ad1420f8..d878252b 100644 --- a/changedetectionio/tests/util.py +++ b/changedetectionio/tests/util.py @@ -2,11 +2,8 @@ from flask import make_response, request from flask import url_for -from werkzeug import Request -import io - -import multiprocessing -multiprocessing.set_start_method("fork") +import logging +import time def set_original_response(): test_return_data = """ @@ -73,6 +70,31 @@ def extract_api_key_from_UI(client): api_key = m.group(1) return api_key.strip() + +# kinda funky, but works for now +def extract_UUID_from_client(client): + import re + res = client.get( + url_for("index"), + ) + # {{api_key}} + + m = re.search('edit/(.+?)"', str(res.data)) + uuid = m.group(1) + return uuid.strip() + +def wait_for_all_checks(client): + # Loop waiting until done.. + attempt=0 + while attempt < 60: + time.sleep(1) + res = client.get(url_for("index")) + if not b'Checking now' in res.data: + break + logging.getLogger().info("Waiting for watch-list to not say 'Checking now'.. {}".format(attempt)) + + attempt += 1 + def live_server_setup(live_server): @live_server.app.route('/test-endpoint') @@ -169,3 +191,4 @@ def live_server_setup(live_server): live_server.app.wsgi_app = DefaultCheckboxMiddleware(live_server.app.wsgi_app) live_server.start() + diff --git a/changedetectionio/tests/visualselector/__init__.py b/changedetectionio/tests/visualselector/__init__.py new file mode 100644 index 00000000..085b3d78 --- /dev/null +++ b/changedetectionio/tests/visualselector/__init__.py @@ -0,0 +1,2 @@ +"""Tests for the app.""" + diff --git a/changedetectionio/tests/visualselector/conftest.py b/changedetectionio/tests/visualselector/conftest.py new file mode 100644 index 00000000..430513d4 --- /dev/null +++ b/changedetectionio/tests/visualselector/conftest.py @@ -0,0 +1,3 @@ +#!/usr/bin/python3 + +from .. import conftest diff --git a/changedetectionio/tests/visualselector/test_fetch_data.py b/changedetectionio/tests/visualselector/test_fetch_data.py new file mode 100644 index 00000000..dc27d1ba --- /dev/null +++ b/changedetectionio/tests/visualselector/test_fetch_data.py @@ -0,0 +1,35 @@ +#!/usr/bin/python3 + +import time +from flask import url_for +from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client + +# Add a site in paused mode, add an invalid filter, we should still have visual selector data ready +def test_visual_selector_content_ready(client, live_server): + import os + + assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test" + live_server_setup(live_server) + time.sleep(1) + + # Add our URL to the import page, maybe better to use something we control? + # We use an external URL because the docker container is too difficult to setup to connect back to the pytest socket + test_url = 'https://news.ycombinator.com' + res = client.post( + url_for("form_quick_watch_add"), + data={"url": test_url, "tag": '', 'edit_and_watch_submit_button': 'Edit > Watch'}, + follow_redirects=True + ) + assert b"Watch added in Paused state, saving will unpause" in res.data + + res = client.post( + url_for("edit_page", uuid="first", unpause_on_save=1), + data={"css_filter": ".does-not-exist", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_webdriver"}, + follow_redirects=True + ) + assert b"unpaused" in res.data + time.sleep(1) + wait_for_all_checks(client) + uuid = extract_UUID_from_client(client) + assert os.path.isfile(os.path.join('test-datastore', uuid, 'last-screenshot.png')), "last-screenshot.png should exist" + assert os.path.isfile(os.path.join('test-datastore', uuid, 'elements.json')), "xpath elements.json data should exist" diff --git a/changedetectionio/update_worker.py b/changedetectionio/update_worker.py index 02ea036c..d56a9298 100644 --- a/changedetectionio/update_worker.py +++ b/changedetectionio/update_worker.py @@ -1,3 +1,4 @@ +import os import threading import queue import time @@ -21,10 +22,70 @@ class update_worker(threading.Thread): self.datastore = datastore super().__init__(*args, **kwargs) - def send_filter_failure_notification(self, uuid): + def send_content_changed_notification(self, t, watch_uuid): + + from changedetectionio import diff + + n_object = {} + watch = self.datastore.data['watching'].get(watch_uuid, False) + if not watch: + return + + watch_history = watch.history + dates = list(watch_history.keys()) + # Theoretically it's possible that this could be just 1 long, + # - In the case that the timestamp key was not unique + if len(dates) == 1: + raise ValueError( + "History index had 2 or more, but only 1 date loaded, timestamps were not unique? maybe two of the same timestamps got written, needs more delay?" + ) + + # Did it have any notification alerts to hit? + if len(watch['notification_urls']): + print(">>> Notifications queued for UUID from watch {}".format(watch_uuid)) + n_object['notification_urls'] = watch['notification_urls'] + n_object['notification_title'] = watch['notification_title'] + n_object['notification_body'] = watch['notification_body'] + n_object['notification_format'] = watch['notification_format'] + + # No? maybe theres a global setting, queue them all + elif len(self.datastore.data['settings']['application']['notification_urls']): + print(">>> Watch notification URLs were empty, using GLOBAL notifications for UUID: {}".format(watch_uuid)) + n_object['notification_urls'] = self.datastore.data['settings']['application']['notification_urls'] + n_object['notification_title'] = self.datastore.data['settings']['application']['notification_title'] + n_object['notification_body'] = self.datastore.data['settings']['application']['notification_body'] + n_object['notification_format'] = self.datastore.data['settings']['application']['notification_format'] + else: + print(">>> NO notifications queued, watch and global notification URLs were empty.") + + # Only prepare to notify if the rules above matched + if 'notification_urls' in n_object: + # HTML needs linebreak, but MarkDown and Text can use a linefeed + if n_object['notification_format'] == 'HTML': + line_feed_sep = "" + else: + line_feed_sep = "\n" + + snapshot_contents = '' + with open(watch_history[dates[-1]], 'rb') as f: + snapshot_contents = f.read() + + n_object.update({ + 'watch_url': watch['url'], + 'uuid': watch_uuid, + 'current_snapshot': snapshot_contents.decode('utf-8'), + 'diff': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], line_feed_sep=line_feed_sep), + 'diff_full': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], True, line_feed_sep=line_feed_sep) + }) + + self.notification_q.put(n_object) + + def send_filter_failure_notification(self, watch_uuid): threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts') - watch = self.datastore.data['watching'].get(uuid, False) + watch = self.datastore.data['watching'].get(watch_uuid, False) + if not watch: + return n_object = {'notification_title': 'Changedetection.io - Alert - CSS/xPath filter was not present in the page', 'notification_body': "Your configured CSS/xPath filter of '{}' for {{watch_url}} did not appear on the page after {} attempts, did the page change layout?\n\nLink: {{base_url}}/edit/{{watch_uuid}}\n\nThanks - Your omniscient changedetection.io installation :)\n".format( @@ -42,10 +103,18 @@ class update_worker(threading.Thread): if 'notification_urls' in n_object: n_object.update({ 'watch_url': watch['url'], - 'uuid': uuid + 'uuid': watch_uuid }) self.notification_q.put(n_object) - print("Sent filter not found notification for {}".format(uuid)) + print("Sent filter not found notification for {}".format(watch_uuid)) + + def cleanup_error_artifacts(self, uuid): + # All went fine, remove error artifacts + cleanup_files = ["last-error-screenshot.png", "last-error.txt"] + for f in cleanup_files: + full_path = os.path.join(self.datastore.datastore_path, uuid, f) + if os.path.isfile(full_path): + os.unlink(full_path) def run(self): from changedetectionio import fetch_site_status @@ -55,7 +124,7 @@ class update_worker(threading.Thread): while not self.app.config.exit.is_set(): try: - uuid = self.q.get(block=False) + priority, uuid = self.q.get(block=False) except queue.Empty: pass @@ -63,18 +132,17 @@ class update_worker(threading.Thread): self.current_uuid = uuid if uuid in list(self.datastore.data['watching'].keys()): - changed_detected = False contents = b'' screenshot = False update_obj= {} xpath_data = False process_changedetection_results = True - + print("> Processing UUID {} Priority {} URL {}".format(uuid, priority, self.datastore.data['watching'][uuid]['url'])) now = time.time() try: - changed_detected, update_obj, contents, screenshot, xpath_data = update_handler.run(uuid) + changed_detected, update_obj, contents = update_handler.run(uuid) # Re #342 # In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes. # We then convert/.decode('utf-8') for the notification etc @@ -87,7 +155,31 @@ class update_worker(threading.Thread): # Totally fine, it's by choice - just continue on, nothing more to care about # Page had elements/content but no renderable text # Backend (not filters) gave zero output - self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Got HTML content but no text found."}) + self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Got HTML content but no text found (With {} reply code).".format(e.status_code)}) + if e.screenshot: + self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot) + process_changedetection_results = False + + except content_fetcher.Non200ErrorCodeReceived as e: + if e.status_code == 403: + err_text = "Error - 403 (Access denied) received" + elif e.status_code == 404: + err_text = "Error - 404 (Page not found) received" + elif e.status_code == 500: + err_text = "Error - 500 (Internal server Error) received" + else: + err_text = "Error - Request returned a HTTP error code {}".format(str(e.status_code)) + + if e.screenshot: + self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True) + if e.xpath_data: + self.datastore.save_xpath_data(watch_uuid=uuid, data=e.xpath_data, as_error=True) + if e.page_text: + self.datastore.save_error_text(watch_uuid=uuid, contents=e.page_text) + + self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, + # So that we get a trigger when the content is added again + 'previous_md5': ''}) process_changedetection_results = False except FilterNotFoundInResponse as e: @@ -105,8 +197,10 @@ class update_worker(threading.Thread): 0) print("Filter for {} not found, consecutive_filter_failures: {}".format(uuid, c)) if threshold > 0 and c >= threshold: - self.send_filter_failure_notification(uuid) + if not self.datastore.data['watching'][uuid].get('notification_muted'): + self.send_filter_failure_notification(uuid) c = 0 + self.datastore.update_watch(uuid=uuid, update_obj={'consecutive_filter_failures': c}) process_changedetection_results = True @@ -121,8 +215,20 @@ class update_worker(threading.Thread): self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, 'last_check_status': e.status_code}) process_changedetection_results = False + except content_fetcher.JSActionExceptions as e: + err_text = "Error running JS Actions - Page request - "+e.message + if e.screenshot: + self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True) + self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, + 'last_check_status': e.status_code}) except content_fetcher.PageUnloadable as e: err_text = "Page request from server didnt respond correctly" + if e.message: + err_text = "{} - {}".format(err_text, e.message) + + if e.screenshot: + self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True) + self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, 'last_check_status': e.status_code}) except Exception as e: @@ -131,8 +237,15 @@ class update_worker(threading.Thread): # Other serious error process_changedetection_results = False else: + # Crash protection, the watch entry could have been removed by this point (during a slow chrome fetch etc) + if not self.datastore.data['watching'].get(uuid): + continue + # Mark that we never had any failures - update_obj['consecutive_filter_failures'] = 0 + if not self.datastore.data['watching'][uuid].get('ignore_status_codes'): + update_obj['consecutive_filter_failures'] = 0 + + self.cleanup_error_artifacts(uuid) # Different exceptions mean that we may or may not want to bump the snapshot, trigger notifications etc if process_changedetection_results: @@ -143,66 +256,19 @@ class update_worker(threading.Thread): # For the FIRST time we check a site, or a change detected, save the snapshot. if changed_detected or not watch['last_checked']: # A change was detected - fname = watch.save_history_text(contents=contents, timestamp=str(round(time.time()))) + watch.save_history_text(contents=contents, timestamp=str(round(time.time()))) self.datastore.update_watch(uuid=uuid, update_obj=update_obj) # A change was detected if changed_detected: - n_object = {} print (">> Change detected in UUID {} - {}".format(uuid, watch['url'])) # Notifications should only trigger on the second time (first time, we gather the initial snapshot) if watch.history_n >= 2: - # Atleast 2, means there really was a change - self.datastore.update_watch(uuid=uuid, update_obj={'last_changed': round(now)}) - - watch_history = watch.history - dates = list(watch_history.keys()) - # Theoretically it's possible that this could be just 1 long, - # - In the case that the timestamp key was not unique - if len(dates) == 1: - raise ValueError( - "History index had 2 or more, but only 1 date loaded, timestamps were not unique? maybe two of the same timestamps got written, needs more delay?" - ) - prev_fname = watch_history[dates[-2]] - - # Did it have any notification alerts to hit? - if len(watch['notification_urls']): - print(">>> Notifications queued for UUID from watch {}".format(uuid)) - n_object['notification_urls'] = watch['notification_urls'] - n_object['notification_title'] = watch['notification_title'] - n_object['notification_body'] = watch['notification_body'] - n_object['notification_format'] = watch['notification_format'] - - # No? maybe theres a global setting, queue them all - elif len(self.datastore.data['settings']['application']['notification_urls']): - print(">>> Watch notification URLs were empty, using GLOBAL notifications for UUID: {}".format(uuid)) - n_object['notification_urls'] = self.datastore.data['settings']['application']['notification_urls'] - n_object['notification_title'] = self.datastore.data['settings']['application']['notification_title'] - n_object['notification_body'] = self.datastore.data['settings']['application']['notification_body'] - n_object['notification_format'] = self.datastore.data['settings']['application']['notification_format'] - else: - print(">>> NO notifications queued, watch and global notification URLs were empty.") - - # Only prepare to notify if the rules above matched - if 'notification_urls' in n_object: - # HTML needs linebreak, but MarkDown and Text can use a linefeed - if n_object['notification_format'] == 'HTML': - line_feed_sep = "" - else: - line_feed_sep = "\n" - - from changedetectionio import diff - n_object.update({ - 'watch_url': watch['url'], - 'uuid': uuid, - 'current_snapshot': contents.decode('utf-8'), - 'diff': diff.render_diff(prev_fname, fname, line_feed_sep=line_feed_sep), - 'diff_full': diff.render_diff(prev_fname, fname, True, line_feed_sep=line_feed_sep) - }) - - self.notification_q.put(n_object) + if not self.datastore.data['watching'][uuid].get('notification_muted'): + self.send_content_changed_notification(self, watch_uuid=uuid) + except Exception as e: # Catch everything possible here, so that if a worker crashes, we don't lose it until restart! @@ -211,15 +277,15 @@ class update_worker(threading.Thread): self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)}) - # Always record that we atleast tried - self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3), - 'last_checked': round(time.time())}) + # Always record that we atleast tried + self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3), + 'last_checked': round(time.time())}) - # Always save the screenshot if it's available - if screenshot: - self.datastore.save_screenshot(watch_uuid=uuid, screenshot=screenshot) - if xpath_data: - self.datastore.save_xpath_data(watch_uuid=uuid, data=xpath_data) + # Always save the screenshot if it's available + if update_handler.screenshot: + self.datastore.save_screenshot(watch_uuid=uuid, screenshot=update_handler.screenshot) + if update_handler.xpath_data: + self.datastore.save_xpath_data(watch_uuid=uuid, data=update_handler.xpath_data) self.current_uuid = None # Done diff --git a/requirements.txt b/requirements.txt index d755e171..8aaef292 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,7 +18,7 @@ wtforms ~= 3.0 jsonpath-ng ~= 1.5.3 # Notification library -apprise ~= 0.9.9 +apprise ~= 1.0.0 # apprise mqtt https://github.com/dgtlmoon/changedetection.io/issues/315 paho-mqtt |