From 9f964b6d3fae8f12e753e1caf9405d36d68a492c Mon Sep 17 00:00:00 2001 From: Leigh Morresi <275001+dgtlmoon@users.noreply.github.com> Date: Fri, 12 Feb 2021 19:24:30 +0100 Subject: [PATCH 01/18] WIP, separate out the Flask from everything else, get pytest working --- .gitignore | 4 +- backend.py | 54 ++++ backend/__init__.py | 490 ++++++++++++++++++++++++++++++++++ backend/backend.py | 501 ----------------------------------- backend/fetch_site_status.py | 5 +- backend/nocache.py | 14 - backend/store.py | 3 +- backend/test_backend.py | 30 +++ 8 files changed, 580 insertions(+), 521 deletions(-) create mode 100644 backend.py create mode 100644 backend/__init__.py delete mode 100644 backend/backend.py delete mode 100644 backend/nocache.py create mode 100644 backend/test_backend.py diff --git a/.gitignore b/.gitignore index 715c22f1..df3a8817 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,6 @@ __pycache__ .idea *.pyc datastore/url-watches.json -datastore/* \ No newline at end of file +datastore/* +__pycache__ +.pytest_cache diff --git a/backend.py b/backend.py new file mode 100644 index 00000000..ef002d9d --- /dev/null +++ b/backend.py @@ -0,0 +1,54 @@ +#!/usr/bin/python3 + +# Launch as a eventlet.wsgi server instance. + +import getopt +import sys + +import eventlet +import eventlet.wsgi +import backend + +def main(argv): + ssl_mode = False + port = 5000 + datastore_path = "./datastore" + + try: + opts, args = getopt.getopt(argv, "sd:p:", "purge") + except getopt.GetoptError: + print('backend.py -s SSL enable -p [port]') + sys.exit(2) + + for opt, arg in opts: +# if opt == '--purge': + # Remove history, the actual files you need to delete manually. +# for uuid, watch in datastore.data['watching'].items(): +# watch.update({'history': {}, 'last_checked': 0, 'last_changed': 0, 'previous_md5': None}) + + if opt == '-s': + ssl_mode = True + + if opt == '-p': + port = arg + + if opt == '-d': + datastore_path = arg + + + # @todo finalise SSL config, but this should get you in the right direction if you need it. + + app = backend.changedetection_app({'datastore_path':datastore_path}) + if ssl_mode: + eventlet.wsgi.server(eventlet.wrap_ssl(eventlet.listen(('', port)), + certfile='cert.pem', + keyfile='privkey.pem', + server_side=True), app) + + else: + eventlet.wsgi.server(eventlet.listen(('', port)), backend.changedetection_app()) + +if __name__ == '__main__': + main(sys.argv) + +#print (__name__) \ No newline at end of file diff --git a/backend/__init__.py b/backend/__init__.py new file mode 100644 index 00000000..4a60aff4 --- /dev/null +++ b/backend/__init__.py @@ -0,0 +1,490 @@ +#!/usr/bin/python3 + + +# @todo logging +# @todo sort by last_changed +# @todo extra options for url like , verify=False etc. +# @todo enable https://urllib3.readthedocs.io/en/latest/user-guide.html#ssl as option? +# @todo maybe a button to reset all 'last-changed'.. so you can see it clearly when something happens since your last visit +# @todo option for interval day/6 hour/etc +# @todo on change detected, config for calling some API +# @todo make tables responsive! +# @todo fetch title into json +# https://distill.io/features +# proxy per check +# - flask_cors, itsdangerous,MarkupSafe +import json +import time +import os +import timeago + +import threading +import queue + + +from flask import Flask, render_template, request, send_file, send_from_directory, safe_join, abort, redirect, url_for + + +# Local + +from backend import store +running_update_threads = [] +ticker_thread = None + +messages = [] +extra_stylesheets = [] + +update_q = queue.Queue() + + +app = Flask(__name__, static_url_path="/var/www/change-detection/backen/static") + +# Stop browser caching of assets +app.config['SEND_FILE_MAX_AGE_DEFAULT'] = 0 + +app.config['STOP_THREADS']= False + +# Disables caching of the templates +app.config['TEMPLATES_AUTO_RELOAD'] = True + + +# We use the whole watch object from the store/JSON so we can see if there's some related status in terms of a thread +# running or something similar. +@app.template_filter('format_last_checked_time') +def _jinja2_filter_datetime(watch_obj, format="%Y-%m-%d %H:%M:%S"): + # Worker thread tells us which UUID it is currently processing. + for t in running_update_threads: + if t.current_uuid == watch_obj['uuid']: + return "Checking now.." + + if watch_obj['last_checked'] == 0: + return 'Not yet' + + return timeago.format(int(watch_obj['last_checked']), time.time()) + + +# @app.context_processor +# def timeago(): +# def _timeago(lower_time, now): +# return timeago.format(lower_time, now) +# return dict(timeago=_timeago) + +@app.template_filter('format_timestamp_timeago') +def _jinja2_filter_datetimestamp(timestamp, format="%Y-%m-%d %H:%M:%S"): + return timeago.format(timestamp, time.time()) + # return timeago.format(timestamp, time.time()) + # return datetime.datetime.utcfromtimestamp(timestamp).strftime(format) + +def changedetection_app(config=None): + + # Hmm + app.config.update(dict(DEBUG=True)) + app.config.update(config or {}) + + datastore = store.ChangeDetectionStore(datastore_path=app.config['datastore_path']) + + # Setup cors headers to allow all domains + # https://flask-cors.readthedocs.io/en/latest/ +# CORS(app) + + #https://github.com/pallets/flask/blob/93dd1709d05a1cf0e886df6223377bdab3b077fb/examples/tutorial/flaskr/__init__.py#L39 + # You can divide up the stuff like this + + @app.route("/", methods=['GET']) + def main_page(): + global messages + + limit_tag = request.args.get('tag') + + # Sort by last_changed and add the uuid which is usually the key.. + sorted_watches = [] + for uuid, watch in datastore.data['watching'].items(): + + + if limit_tag != None: + # Support for comma separated list of tags. + for tag_in_watch in watch['tag'].split(','): + tag_in_watch = tag_in_watch.strip() + if tag_in_watch == limit_tag: + watch['uuid'] = uuid + sorted_watches.append(watch) + + else: + watch['uuid'] = uuid + sorted_watches.append(watch) + + + sorted_watches.sort(key=lambda x: x['last_changed'], reverse=True) + + existing_tags = datastore.get_all_tags() + output = render_template("watch-overview.html", + watches=sorted_watches, + messages=messages, + tags=existing_tags, + active_tag=limit_tag) + + # Show messages but once. + messages = [] + return output + + @app.route("/scrub", methods=['GET', 'POST']) + def scrub_page(): + from pathlib import Path + + global messages + + if request.method == 'POST': + confirmtext = request.form.get('confirmtext') + + if confirmtext == 'scrub': + + for txt_file_path in Path(app.config['datastore_path']).rglob('*.txt'): + os.unlink(txt_file_path) + + for uuid, watch in datastore.data['watching'].items(): + watch['last_checked'] = 0 + watch['last_changed'] = 0 + watch['previous_md5'] = None + watch['history'] = {} + + datastore.needs_write = True + messages.append({'class': 'ok', 'message': 'Cleaned all version history.'}) + else: + messages.append({'class': 'error', 'message': 'Wrong confirm text.'}) + + return redirect(url_for('main_page')) + + return render_template("scrub.html") + + + @app.route("/edit", methods=['GET', 'POST']) + def edit_page(): + global messages + import validators + + if request.method == 'POST': + uuid = request.args.get('uuid') + + url = request.form.get('url').strip() + tag = request.form.get('tag').strip() + + form_headers = request.form.get('headers').strip().split("\n") + extra_headers = {} + if form_headers: + for header in form_headers: + if len(header): + parts = header.split(':', 1) + extra_headers.update({parts[0].strip(): parts[1].strip()}) + + validators.url(url) # @todo switch to prop/attr/observer + datastore.data['watching'][uuid].update({'url': url, + 'tag': tag, + 'headers': extra_headers}) + datastore.needs_write = True + + messages.append({'class': 'ok', 'message': 'Updated watch.'}) + + return redirect(url_for('main_page')) + + else: + + uuid = request.args.get('uuid') + output = render_template("edit.html", uuid=uuid, watch=datastore.data['watching'][uuid], messages=messages) + + return output + + + @app.route("/settings", methods=['GET', "POST"]) + def settings_page(): + global messages + if request.method == 'POST': + try: + minutes = int(request.values.get('minutes').strip()) + except ValueError: + messages.append({'class': 'error', 'message': "Invalid value given, use an integer."}) + + else: + if minutes >= 5 and minutes <= 600: + datastore.data['settings']['requests']['minutes_between_check'] = minutes + datastore.needs_write = True + + messages.append({'class': 'ok', 'message': "Updated"}) + else: + messages.append({'class': 'error', 'message': "Must be equal to or greater than 5 and less than 600 minutes"}) + + output = render_template("settings.html", messages=messages, minutes=datastore.data['settings']['requests']['minutes_between_check']) + messages =[] + + return output + + @app.route("/import", methods=['GET', "POST"]) + def import_page(): + import validators + global messages + remaining_urls=[] + + good = 0 + + if request.method == 'POST': + urls = request.values.get('urls').split("\n") + for url in urls: + url = url.strip() + if len(url) and validators.url(url): + datastore.add_watch(url=url.strip(), tag="") + good += 1 + else: + if len(url): + remaining_urls.append(url) + + messages.append({'class': 'ok', 'message': "{} Imported, {} Skipped.".format(good, len(remaining_urls))}) + + output = render_template("import.html", + messages=messages, + remaining="\n".join(remaining_urls) + ) + messages = [] + return output + + + @app.route("/diff/", methods=['GET']) + def diff_history_page(uuid): + global messages + + extra_stylesheets=['/static/css/diff.css'] + + watch = datastore.data['watching'][uuid] + + dates = list(watch['history'].keys()) + # Convert to int, sort and back to str again + dates = [int(i) for i in dates] + dates.sort(reverse=True) + dates = [str(i) for i in dates] + + # Save the current newest history as the most recently viewed + datastore.set_last_viewed(uuid, dates[0]) + + newest_file = watch['history'][dates[0]] + with open(newest_file, 'r') as f: + newest_version_file_contents = f.read() + + previous_version = request.args.get('previous_version') + + try: + previous_file = watch['history'][previous_version] + except KeyError: + # Not present, use a default value, the second one in the sorted list. + previous_file = watch['history'][dates[1]] + + with open(previous_file, 'r') as f: + previous_version_file_contents = f.read() + + output = render_template("diff.html", watch_a=watch, + messages=messages, + newest=newest_version_file_contents, + previous=previous_version_file_contents, + extra_stylesheets=extra_stylesheets, + versions=dates[1:], + newest_version_timestamp=dates[0], + current_previous_version=str(previous_version), + current_diff_url=watch['url']) + + return output + + @app.route("/favicon.ico", methods=['GET']) + def favicon(): + return send_from_directory("/app/static/images", filename="favicon.ico") + + + # We're good but backups are even better! + @app.route("/backup", methods=['GET']) + def get_backup(): + import zipfile + from pathlib import Path + import zlib + + # create a ZipFile object + backupname = "changedetection-backup-{}.zip".format(int(time.time())) + + # We only care about UUIDS from the current index file + uuids = list(datastore.data['watching'].keys()) + + with zipfile.ZipFile(os.path.join(app.config['datastore_path'], backupname), 'w', compression=zipfile.ZIP_DEFLATED, + compresslevel=6) as zipObj: + + # Be sure we're written fresh + datastore.sync_to_json() + + # Add the index + zipObj.write(os.path.join(app.config['datastore_path'], "url-watches.json")) + # Add any snapshot data we find + for txt_file_path in Path(app.config['datastore_path']).rglob('*.txt'): + parent_p = txt_file_path.parent + if parent_p.name in uuids: + zipObj.write(txt_file_path) + + return send_file(os.path.join(app.config['datastore_path'], backupname), + as_attachment=True, + mimetype="application/zip", + attachment_filename=backupname) + + + + # A few self sanity checks, mostly for developer/bug check + @app.route("/self-check", methods=['GET']) + def selfcheck(): + output = "All fine" + # In earlier versions before a single threaded write of the JSON store, sometimes histories could get mixed. + # Could also maybe affect people who manually fiddle with their JSON store? + for uuid, watch in datastore.data['watching'].items(): + for timestamp, path in watch['history'].items(): + # Each history snapshot should include a full path, which contains the {uuid} + if not uuid in path: + output = "Something weird in {}, suspected incorrect snapshot path.".format(uuid) + + return output + + @app.route("/static//", methods=['GET']) + def static_content(group, filename): + # These files should be in our subdirectory + full_path = os.path.realpath(__file__) + p = os.path.dirname(full_path) + + try: + return send_from_directory("{}/static/{}".format(p, group), filename=filename) + except FileNotFoundError: + abort(404) + + + @app.route("/api/add", methods=['POST']) + def api_watch_add(): + global messages + + # @todo add_watch should throw a custom Exception for validation etc + new_uuid = datastore.add_watch(url=request.form.get('url').strip(), tag=request.form.get('tag').strip()) + # Straight into the queue. + update_q.put(new_uuid) + + messages.append({'class': 'ok', 'message': 'Watch added.'}) + return redirect(url_for('main_page')) + + + @app.route("/api/delete", methods=['GET']) + def api_delete(): + global messages + uuid = request.args.get('uuid') + datastore.delete(uuid) + messages.append({'class': 'ok', 'message': 'Deleted.'}) + + return redirect(url_for('main_page')) + + + @app.route("/api/checknow", methods=['GET']) + def api_watch_checknow(): + global messages + + tag = request.args.get('tag') + uuid = request.args.get('uuid') + i=0 + + if uuid: + update_q.put(uuid) + i = 1 + + elif tag != None: + for watch_uuid, watch in datastore.data['watching'].items(): + if (tag != None and tag in watch['tag']): + i += 1 + update_q.put(watch_uuid) + else: + # No tag, no uuid, add everything. + for watch_uuid, watch in datastore.data['watching'].items(): + i += 1 + update_q.put(watch_uuid) + + messages.append({'class': 'ok', 'message': "{} watches are rechecking.".format(i)}) + return redirect(url_for('main_page', tag=tag)) + + + + # @todo handle ctrl break + ticker_thread = threading.Thread(target=ticker_thread_check_time_launch_checks).start() + save_data_thread = threading.Thread(target=save_datastore).start() + + return app + + +# Requests for checking on the site use a pool of thread Workers managed by a Queue. +class Worker(threading.Thread): + + current_uuid = None + + + def __init__(self, q, *args, **kwargs): + self.q = q + super().__init__(*args, **kwargs) + + def run(self): + from backend import fetch_site_status + + update_handler = fetch_site_status.perform_site_check(datastore=datastore) + + while True: + + try: + uuid = self.q.get(block=True, timeout=1) # Blocking + except queue.Empty: + # We have a chance to kill this thread that needs to monitor for new jobs.. + if app.config['STOP_THREADS']: + return + else: + self.current_uuid = uuid + + if uuid in list(datastore.data['watching'].keys()): + + try: + result = update_handler.run(uuid) + + except PermissionError as s: + print ("File permission error updating", uuid,str(s)) + else: + if result: + datastore.update_watch(uuid=uuid, update_obj=result) + + self.current_uuid = None # Done + self.q.task_done() + + + # Thread runner to check every minute, look for new watches to feed into the Queue. + def ticker_thread_check_time_launch_checks(): + + # Spin up Workers. + for _ in range(datastore.data['settings']['requests']['workers']): + new_worker = Worker(update_q) + running_update_threads.append(new_worker) + new_worker.start() + + # Every minute check for new UUIDs to follow up on + while True: + minutes = datastore.data['settings']['requests']['minutes_between_check'] + for uuid, watch in datastore.data['watching'].items(): + if watch['last_checked'] <= time.time() - (minutes * 60): + update_q.put(uuid) + + if app.config['STOP_THREADS']: + return + time.sleep(1) + + + # Thread runner, this helps with thread/write issues when there are many operations that want to update the JSON + # by just running periodically in one thread, according to python, dict updates are threadsafe. + def save_datastore(): + + global stop_threads + + while True: + if stop_threads: + return + if datastore.needs_write: + datastore.sync_to_json() + time.sleep(1) + diff --git a/backend/backend.py b/backend/backend.py deleted file mode 100644 index 0a007d71..00000000 --- a/backend/backend.py +++ /dev/null @@ -1,501 +0,0 @@ -#!/usr/bin/python3 - - -# @todo logging -# @todo sort by last_changed -# @todo extra options for url like , verify=False etc. -# @todo enable https://urllib3.readthedocs.io/en/latest/user-guide.html#ssl as option? -# @todo maybe a button to reset all 'last-changed'.. so you can see it clearly when something happens since your last visit -# @todo option for interval day/6 hour/etc -# @todo on change detected, config for calling some API -# @todo make tables responsive! -# @todo fetch title into json -# https://distill.io/features -# proxy per check -#i -import json -import eventlet -import eventlet.wsgi - -import time -import os -import getopt -import sys -import datetime -import timeago - -import threading -import queue - - -from flask import Flask, render_template, request, send_file, send_from_directory, safe_join, abort, redirect, url_for - - -# Local -import store -running_update_threads = [] -ticker_thread = None - -datastore = store.ChangeDetectionStore() -messages = [] -extra_stylesheets = [] - -update_q = queue.Queue() - - -app = Flask(__name__, static_url_path='/static') -app.config['STATIC_RESOURCES'] = "/app/static" -app.config['SEND_FILE_MAX_AGE_DEFAULT'] = 0 - -# app.config['SECRET_KEY'] = 'secret!' - -# Disables caching of the templates -app.config['TEMPLATES_AUTO_RELOAD'] = True - - -# We use the whole watch object from the store/JSON so we can see if there's some related status in terms of a thread -# running or something similar. -@app.template_filter('format_last_checked_time') -def _jinja2_filter_datetime(watch_obj, format="%Y-%m-%d %H:%M:%S"): - # Worker thread tells us which UUID it is currently processing. - for t in running_update_threads: - if t.current_uuid == watch_obj['uuid']: - return "Checking now.." - - if watch_obj['last_checked'] == 0: - return 'Not yet' - - return timeago.format(int(watch_obj['last_checked']), time.time()) - - -# @app.context_processor -# def timeago(): -# def _timeago(lower_time, now): -# return timeago.format(lower_time, now) -# return dict(timeago=_timeago) - -@app.template_filter('format_timestamp_timeago') -def _jinja2_filter_datetimestamp(timestamp, format="%Y-%m-%d %H:%M:%S"): - return timeago.format(timestamp, time.time()) - # return timeago.format(timestamp, time.time()) - # return datetime.datetime.utcfromtimestamp(timestamp).strftime(format) - - -@app.route("/", methods=['GET']) -def main_page(): - global messages - - limit_tag = request.args.get('tag') - - # Sort by last_changed and add the uuid which is usually the key.. - sorted_watches = [] - for uuid, watch in datastore.data['watching'].items(): - - - if limit_tag != None: - # Support for comma separated list of tags. - for tag_in_watch in watch['tag'].split(','): - tag_in_watch = tag_in_watch.strip() - if tag_in_watch == limit_tag: - watch['uuid'] = uuid - sorted_watches.append(watch) - - else: - watch['uuid'] = uuid - sorted_watches.append(watch) - - - sorted_watches.sort(key=lambda x: x['last_changed'], reverse=True) - - existing_tags = datastore.get_all_tags() - output = render_template("watch-overview.html", - watches=sorted_watches, - messages=messages, - tags=existing_tags, - active_tag=limit_tag) - - # Show messages but once. - messages = [] - return output - -@app.route("/scrub", methods=['GET', 'POST']) -def scrub_page(): - from pathlib import Path - - global messages - - if request.method == 'POST': - confirmtext = request.form.get('confirmtext') - - if confirmtext == 'scrub': - - for txt_file_path in Path('/datastore').rglob('*.txt'): - os.unlink(txt_file_path) - - for uuid, watch in datastore.data['watching'].items(): - watch['last_checked'] = 0 - watch['last_changed'] = 0 - watch['previous_md5'] = None - watch['history'] = {} - - datastore.needs_write = True - messages.append({'class': 'ok', 'message': 'Cleaned all version history.'}) - else: - messages.append({'class': 'error', 'message': 'Wrong confirm text.'}) - - return redirect(url_for('main_page')) - - return render_template("scrub.html") - - -@app.route("/edit", methods=['GET', 'POST']) -def edit_page(): - global messages - import validators - - if request.method == 'POST': - uuid = request.args.get('uuid') - - url = request.form.get('url').strip() - tag = request.form.get('tag').strip() - - form_headers = request.form.get('headers').strip().split("\n") - extra_headers = {} - if form_headers: - for header in form_headers: - if len(header): - parts = header.split(':', 1) - extra_headers.update({parts[0].strip(): parts[1].strip()}) - - validators.url(url) # @todo switch to prop/attr/observer - datastore.data['watching'][uuid].update({'url': url, - 'tag': tag, - 'headers': extra_headers}) - datastore.needs_write = True - - messages.append({'class': 'ok', 'message': 'Updated watch.'}) - - return redirect(url_for('main_page')) - - else: - - uuid = request.args.get('uuid') - output = render_template("edit.html", uuid=uuid, watch=datastore.data['watching'][uuid], messages=messages) - - return output - - -@app.route("/settings", methods=['GET', "POST"]) -def settings_page(): - global messages - if request.method == 'POST': - try: - minutes = int(request.values.get('minutes').strip()) - except ValueError: - messages.append({'class': 'error', 'message': "Invalid value given, use an integer."}) - - else: - if minutes >= 5 and minutes <= 600: - datastore.data['settings']['requests']['minutes_between_check'] = minutes - datastore.needs_write = True - - messages.append({'class': 'ok', 'message': "Updated"}) - else: - messages.append({'class': 'error', 'message': "Must be equal to or greater than 5 and less than 600 minutes"}) - - output = render_template("settings.html", messages=messages, minutes=datastore.data['settings']['requests']['minutes_between_check']) - messages =[] - - return output - -@app.route("/import", methods=['GET', "POST"]) -def import_page(): - import validators - global messages - remaining_urls=[] - - good = 0 - - if request.method == 'POST': - urls = request.values.get('urls').split("\n") - for url in urls: - url = url.strip() - if len(url) and validators.url(url): - datastore.add_watch(url=url.strip(), tag="") - good += 1 - else: - if len(url): - remaining_urls.append(url) - - messages.append({'class': 'ok', 'message': "{} Imported, {} Skipped.".format(good, len(remaining_urls))}) - - output = render_template("import.html", - messages=messages, - remaining="\n".join(remaining_urls) - ) - messages = [] - return output - - -@app.route("/diff/", methods=['GET']) -def diff_history_page(uuid): - global messages - - extra_stylesheets=['/static/css/diff.css'] - - watch = datastore.data['watching'][uuid] - - dates = list(watch['history'].keys()) - # Convert to int, sort and back to str again - dates = [int(i) for i in dates] - dates.sort(reverse=True) - dates = [str(i) for i in dates] - - # Save the current newest history as the most recently viewed - datastore.set_last_viewed(uuid, dates[0]) - - newest_file = watch['history'][dates[0]] - with open(newest_file, 'r') as f: - newest_version_file_contents = f.read() - - previous_version = request.args.get('previous_version') - - try: - previous_file = watch['history'][previous_version] - except KeyError: - # Not present, use a default value, the second one in the sorted list. - previous_file = watch['history'][dates[1]] - - with open(previous_file, 'r') as f: - previous_version_file_contents = f.read() - - output = render_template("diff.html", watch_a=watch, - messages=messages, - newest=newest_version_file_contents, - previous=previous_version_file_contents, - extra_stylesheets=extra_stylesheets, - versions=dates[1:], - newest_version_timestamp=dates[0], - current_previous_version=str(previous_version), - current_diff_url=watch['url']) - - return output - -@app.route("/favicon.ico", methods=['GET']) -def favicon(): - return send_from_directory("/app/static/images", filename="favicon.ico") - - -# We're good but backups are even better! -@app.route("/backup", methods=['GET']) -def get_backup(): - import zipfile - from pathlib import Path - import zlib - - # create a ZipFile object - backupname = "changedetection-backup-{}.zip".format(int(time.time())) - - # We only care about UUIDS from the current index file - uuids = list(datastore.data['watching'].keys()) - - with zipfile.ZipFile(os.path.join("/datastore", backupname), 'w', compression=zipfile.ZIP_DEFLATED, - compresslevel=6) as zipObj: - - # Be sure we're written fresh - datastore.sync_to_json() - - # Add the index - zipObj.write(os.path.join("/datastore", "url-watches.json")) - # Add any snapshot data we find - for txt_file_path in Path('/datastore').rglob('*.txt'): - parent_p = txt_file_path.parent - if parent_p.name in uuids: - zipObj.write(txt_file_path) - - return send_file(os.path.join("/datastore", backupname), - as_attachment=True, - mimetype="application/zip", - attachment_filename=backupname) - - - -# A few self sanity checks, mostly for developer/bug check -@app.route("/self-check", methods=['GET']) -def selfcheck(): - output = "All fine" - # In earlier versions before a single threaded write of the JSON store, sometimes histories could get mixed. - # Could also maybe affect people who manually fiddle with their JSON store? - for uuid, watch in datastore.data['watching'].items(): - for timestamp, path in watch['history'].items(): - # Each history snapshot should include a full path, which contains the {uuid} - if not uuid in path: - output = "Something weird in {}, suspected incorrect snapshot path.".format(uuid) - - return output - -@app.route("/static//", methods=['GET']) -def static_content(group, filename): - try: - return send_from_directory("/app/static/{}".format(group), filename=filename) - except FileNotFoundError: - abort(404) - - -@app.route("/api/add", methods=['POST']) -def api_watch_add(): - global messages - - # @todo add_watch should throw a custom Exception for validation etc - new_uuid = datastore.add_watch(url=request.form.get('url').strip(), tag=request.form.get('tag').strip()) - # Straight into the queue. - update_q.put(new_uuid) - - messages.append({'class': 'ok', 'message': 'Watch added.'}) - return redirect(url_for('main_page')) - - -@app.route("/api/delete", methods=['GET']) -def api_delete(): - global messages - uuid = request.args.get('uuid') - datastore.delete(uuid) - messages.append({'class': 'ok', 'message': 'Deleted.'}) - - return redirect(url_for('main_page')) - - -@app.route("/api/checknow", methods=['GET']) -def api_watch_checknow(): - global messages - - tag = request.args.get('tag') - uuid = request.args.get('uuid') - i=0 - - if uuid: - update_q.put(uuid) - i = 1 - - elif tag != None: - for watch_uuid, watch in datastore.data['watching'].items(): - if (tag != None and tag in watch['tag']): - i += 1 - update_q.put(watch_uuid) - else: - # No tag, no uuid, add everything. - for watch_uuid, watch in datastore.data['watching'].items(): - i += 1 - update_q.put(watch_uuid) - - messages.append({'class': 'ok', 'message': "{} watches are rechecking.".format(i)}) - return redirect(url_for('main_page', tag=tag)) - - - -# Requests for checking on the site use a pool of thread Workers managed by a Queue. -class Worker(threading.Thread): - - current_uuid = None - - - def __init__(self, q, *args, **kwargs): - self.q = q - super().__init__(*args, **kwargs) - - def run(self): - import fetch_site_status - - from copy import deepcopy - - update_handler = fetch_site_status.perform_site_check(datastore=datastore) - - try: - while True: - uuid = self.q.get() # Blocking - self.current_uuid = uuid - - if uuid in list(datastore.data['watching'].keys()): - - result = update_handler.run(uuid) - - datastore.update_watch(uuid=uuid, update_obj=result) - - - self.current_uuid = None # Done - self.q.task_done() - - except KeyboardInterrupt: - return - -# Thread runner to check every minute, look for new watches to feed into the Queue. -def ticker_thread_check_time_launch_checks(): - - # Spin up Workers. - for _ in range(datastore.data['settings']['requests']['workers']): - new_worker = Worker(update_q) - running_update_threads.append(new_worker) - new_worker.start() - - # Every minute check for new UUIDs to follow up on - while True: - minutes = datastore.data['settings']['requests']['minutes_between_check'] - for uuid, watch in datastore.data['watching'].items(): - if watch['last_checked'] <= time.time() - (minutes * 60): - update_q.put(uuid) - - time.sleep(60) - - -# Thread runner, this helps with thread/write issues when there are many operations that want to update the JSON -# by just running periodically in one thread, according to python, dict updates are threadsafe. -def save_datastore(): - try: - while True: - if datastore.needs_write: - datastore.sync_to_json() - time.sleep(1) - - except KeyboardInterrupt: - return - -def main(argv): - ssl_mode = False - port = 5000 - - try: - opts, args = getopt.getopt(argv, "sp:", "purge") - except getopt.GetoptError: - print('backend.py -s SSL enable -p [port]') - sys.exit(2) - - for opt, arg in opts: - if opt == '--purge': - # Remove history, the actual files you need to delete manually. - for uuid, watch in datastore.data['watching'].items(): - watch.update({'history': {}, 'last_checked': 0, 'last_changed': 0, 'previous_md5': None}) - - if opt == '-s': - ssl_mode = True - - if opt == '-p': - port = arg - - # @todo handle ctrl break - ticker_thread = threading.Thread(target=ticker_thread_check_time_launch_checks).start() - - save_data_thread = threading.Thread(target=save_datastore).start() - - # @todo finalise SSL config, but this should get you in the right direction if you need it. - if ssl_mode: - eventlet.wsgi.server(eventlet.wrap_ssl(eventlet.listen(('', port)), - certfile='cert.pem', - keyfile='privkey.pem', - server_side=True), app) - - else: - eventlet.wsgi.server(eventlet.listen(('', port)), app) - - -if __name__ == '__main__': - main(sys.argv[1:]) diff --git a/backend/fetch_site_status.py b/backend/fetch_site_status.py index a5770ffb..83647c4c 100644 --- a/backend/fetch_site_status.py +++ b/backend/fetch_site_status.py @@ -20,11 +20,8 @@ class perform_site_check(): return def ensure_output_path(self): + os.mkdir(self.output_path) - try: - os.stat(self.output_path) - except: - os.mkdir(self.output_path) def save_response_stripped_output(self, output, fname): diff --git a/backend/nocache.py b/backend/nocache.py deleted file mode 100644 index 9d0b2ae2..00000000 --- a/backend/nocache.py +++ /dev/null @@ -1,14 +0,0 @@ - -from flask import make_response -from functools import wraps, update_wrapper -from datetime import datetime - -def nocache(view): - @wraps(view) - def no_cache(*args, **kwargs): - response = make_response(view(*args, **kwargs)) - response.headers['hmm'] = datetime.now() - - return response - - return update_wrapper(no_cache, view) diff --git a/backend/store.py b/backend/store.py index f68d60e8..cc3c0976 100644 --- a/backend/store.py +++ b/backend/store.py @@ -13,8 +13,9 @@ from copy import deepcopy class ChangeDetectionStore: lock = Lock() - def __init__(self): + def __init__(self, datastore_path="/datastore"): self.needs_write = False + self.datastore_path = datastore_path self.__data = { 'note': "Hello! If you change this file manually, please be sure to restart your changedetection.io instance!", diff --git a/backend/test_backend.py b/backend/test_backend.py new file mode 100644 index 00000000..3909d0f6 --- /dev/null +++ b/backend/test_backend.py @@ -0,0 +1,30 @@ +#!/usr/bin/python3 + +import pytest +import backend + +# https://github.com/pallets/flask/blob/1.1.2/examples/tutorial/tests/test_auth.py + +# Much better boilerplate than the docs +# https://www.python-boilerplate.com/py3+flask+pytest/ + +@pytest.fixture +def app(request): + app = backend.changedetection_app({'datastore_path':'./datastorexxx'}) + app.debug = True + + def teardown(): + app.config['STOP_THREADS']=True + print("teardown") + + request.addfinalizer(teardown) + + return app.test_client() + + +def test_hello_world(app): + res = app.get("/") + # print(dir(res), res.status_code) + assert res.status_code == 200 + assert b"IMPORT" in res.data + From 93ee65fe533a002e956b246aced02cc16cea3147 Mon Sep 17 00:00:00 2001 From: Leigh Morresi <275001+dgtlmoon@users.noreply.github.com> Date: Fri, 12 Feb 2021 19:43:05 +0100 Subject: [PATCH 02/18] Tidy up a few broken datastore paths --- Dockerfile | 2 +- backend.py | 27 +++++++++------- backend/__init__.py | 61 ++++++++++++++++++------------------ backend/fetch_site_status.py | 7 +++-- backend/store.py | 16 ++++------ backend/test_backend.py | 18 +++++++++-- 6 files changed, 75 insertions(+), 56 deletions(-) diff --git a/Dockerfile b/Dockerfile index e44eace7..109305ee 100644 --- a/Dockerfile +++ b/Dockerfile @@ -18,7 +18,7 @@ RUN echo "commit: $SOURCE_COMMIT branch: $SOURCE_BRANCH" >/source.txt RUN [ ! -d "/datastore" ] && mkdir /datastore -CMD [ "python", "./backend.py" ] +CMD [ "python", "./backend.py" , "-d", "/datastore"] diff --git a/backend.py b/backend.py index ef002d9d..fb3dfd80 100644 --- a/backend.py +++ b/backend.py @@ -9,6 +9,9 @@ import eventlet import eventlet.wsgi import backend +from backend import store + + def main(argv): ssl_mode = False port = 5000 @@ -17,14 +20,14 @@ def main(argv): try: opts, args = getopt.getopt(argv, "sd:p:", "purge") except getopt.GetoptError: - print('backend.py -s SSL enable -p [port]') + print('backend.py -s SSL enable -p [port] -d [datastore path]') sys.exit(2) for opt, arg in opts: -# if opt == '--purge': - # Remove history, the actual files you need to delete manually. -# for uuid, watch in datastore.data['watching'].items(): -# watch.update({'history': {}, 'last_checked': 0, 'last_changed': 0, 'previous_md5': None}) + # if opt == '--purge': + # Remove history, the actual files you need to delete manually. + # for uuid, watch in datastore.data['watching'].items(): + # watch.update({'history': {}, 'last_checked': 0, 'last_changed': 0, 'previous_md5': None}) if opt == '-s': ssl_mode = True @@ -36,19 +39,21 @@ def main(argv): datastore_path = arg - # @todo finalise SSL config, but this should get you in the right direction if you need it. + # Kinda weird to tell them both where `datastore_path` is right.. + app_config = {'datastore_path': datastore_path} + datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path']) + app = backend.changedetection_app(app_config, datastore) - app = backend.changedetection_app({'datastore_path':datastore_path}) if ssl_mode: + # @todo finalise SSL config, but this should get you in the right direction if you need it. eventlet.wsgi.server(eventlet.wrap_ssl(eventlet.listen(('', port)), certfile='cert.pem', keyfile='privkey.pem', server_side=True), app) else: - eventlet.wsgi.server(eventlet.listen(('', port)), backend.changedetection_app()) + eventlet.wsgi.server(eventlet.listen(('', port)), app) -if __name__ == '__main__': - main(sys.argv) -#print (__name__) \ No newline at end of file +if __name__ == '__main__': + main(sys.argv[1:]) diff --git a/backend/__init__.py b/backend/__init__.py index 4a60aff4..2d8ce9c8 100644 --- a/backend/__init__.py +++ b/backend/__init__.py @@ -24,10 +24,9 @@ import queue from flask import Flask, render_template, request, send_file, send_from_directory, safe_join, abort, redirect, url_for +datastore=None # Local - -from backend import store running_update_threads = [] ticker_thread = None @@ -75,13 +74,14 @@ def _jinja2_filter_datetimestamp(timestamp, format="%Y-%m-%d %H:%M:%S"): # return timeago.format(timestamp, time.time()) # return datetime.datetime.utcfromtimestamp(timestamp).strftime(format) -def changedetection_app(config=None): +def changedetection_app(config=None, datastore_o=None): + global datastore + datastore = datastore_o # Hmm app.config.update(dict(DEBUG=True)) app.config.update(config or {}) - datastore = store.ChangeDetectionStore(datastore_path=app.config['datastore_path']) # Setup cors headers to allow all domains # https://flask-cors.readthedocs.io/en/latest/ @@ -454,37 +454,38 @@ class Worker(threading.Thread): self.q.task_done() - # Thread runner to check every minute, look for new watches to feed into the Queue. - def ticker_thread_check_time_launch_checks(): +# Thread runner to check every minute, look for new watches to feed into the Queue. +def ticker_thread_check_time_launch_checks(): - # Spin up Workers. - for _ in range(datastore.data['settings']['requests']['workers']): - new_worker = Worker(update_q) - running_update_threads.append(new_worker) - new_worker.start() + # Spin up Workers. + for _ in range(datastore.data['settings']['requests']['workers']): + print ("...") + new_worker = Worker(update_q) + running_update_threads.append(new_worker) + new_worker.start() - # Every minute check for new UUIDs to follow up on - while True: - minutes = datastore.data['settings']['requests']['minutes_between_check'] - for uuid, watch in datastore.data['watching'].items(): - if watch['last_checked'] <= time.time() - (minutes * 60): - update_q.put(uuid) + # Every minute check for new UUIDs to follow up on + while True: + minutes = datastore.data['settings']['requests']['minutes_between_check'] + for uuid, watch in datastore.data['watching'].items(): + if watch['last_checked'] <= time.time() - (minutes * 60): + update_q.put(uuid) - if app.config['STOP_THREADS']: - return - time.sleep(1) + if app.config['STOP_THREADS']: + return + time.sleep(1) - # Thread runner, this helps with thread/write issues when there are many operations that want to update the JSON - # by just running periodically in one thread, according to python, dict updates are threadsafe. - def save_datastore(): +# Thread runner, this helps with thread/write issues when there are many operations that want to update the JSON +# by just running periodically in one thread, according to python, dict updates are threadsafe. +def save_datastore(): - global stop_threads + global stop_threads - while True: - if stop_threads: - return - if datastore.needs_write: - datastore.sync_to_json() - time.sleep(1) + while True: + if app.config['STOP_THREADS']: + return + if datastore.needs_write: + datastore.sync_to_json() + time.sleep(1) diff --git a/backend/fetch_site_status.py b/backend/fetch_site_status.py index 83647c4c..2ed85c05 100644 --- a/backend/fetch_site_status.py +++ b/backend/fetch_site_status.py @@ -20,7 +20,10 @@ class perform_site_check(): return def ensure_output_path(self): - os.mkdir(self.output_path) + try: + os.mkdir(self.output_path) + except FileExistsError: + print (self.output_path, "already exists.") def save_response_stripped_output(self, output, fname): @@ -40,7 +43,7 @@ class perform_site_check(): "last_checked": timestamp } - self.output_path = "/datastore/{}".format(uuid) + self.output_path = "{}/{}".format(self.datastore.datastore_path,uuid) self.ensure_output_path() extra_headers = self.datastore.get_val(uuid, 'headers') diff --git a/backend/store.py b/backend/store.py index cc3c0976..36f9eebd 100644 --- a/backend/store.py +++ b/backend/store.py @@ -7,6 +7,7 @@ from threading import Lock, Thread from copy import deepcopy + # Is there an existing library to ensure some data store (JSON etc) is in sync with CRUD methods? # Open a github issue if you know something :) # https://stackoverflow.com/questions/6190468/how-to-trigger-function-on-value-change @@ -42,7 +43,7 @@ class ChangeDetectionStore: 'tag': None, 'last_checked': 0, 'last_changed': 0, - 'last_viewed': 0, # history key value of the last viewed via the [diff] link + 'last_viewed': 0, # history key value of the last viewed via the [diff] link 'newest_history_key': "", 'title': None, 'previous_md5': "", @@ -58,7 +59,7 @@ class ChangeDetectionStore: self.__data['build_sha'] = f.read() try: - with open('/datastore/url-watches.json') as json_file: + with open("{}/url-watches.json".format(self.datastore_path)) as json_file: from_disk = json.load(json_file) # @todo isnt there a way todo this dict.update recursively? @@ -85,7 +86,7 @@ class ChangeDetectionStore: # First time ran, doesnt exist. except (FileNotFoundError, json.decoder.JSONDecodeError): - print("Creating JSON store") + print("Creating JSON store at", self.datastore_path) self.add_watch(url='http://www.quotationspage.com/random.php', tag='test') self.add_watch(url='https://news.ycombinator.com/', tag='Tech news') self.add_watch(url='https://www.gov.uk/coronavirus', tag='Covid') @@ -106,9 +107,6 @@ class ChangeDetectionStore: return 0 - - - def set_last_viewed(self, uuid, timestamp): self.data['watching'][uuid].update({'last_viewed': str(timestamp)}) self.needs_write = True @@ -122,7 +120,7 @@ class ChangeDetectionStore: if isinstance(d, dict): if update_obj is not None and dict_key in update_obj: self.__data['watching'][uuid][dict_key].update(update_obj[dict_key]) - del(update_obj[dict_key]) + del (update_obj[dict_key]) self.__data['watching'][uuid].update(update_obj) self.__data['watching'][uuid]['newest_history_key'] = self.get_newest_history_key(uuid) @@ -167,7 +165,6 @@ class ChangeDetectionStore: def add_watch(self, url, tag): with self.lock: - # @todo use a common generic version of this new_uuid = str(uuid_builder.uuid4()) _blank = deepcopy(self.generic_definition) @@ -185,8 +182,7 @@ class ChangeDetectionStore: def sync_to_json(self): - - with open('/datastore/url-watches.json', 'w') as json_file: + with open("{}/url-watches.json".format(self.datastore_path), 'w') as json_file: json.dump(self.__data, json_file, indent=4) print("Re-saved index") diff --git a/backend/test_backend.py b/backend/test_backend.py index 3909d0f6..7ac1936f 100644 --- a/backend/test_backend.py +++ b/backend/test_backend.py @@ -2,7 +2,8 @@ import pytest import backend - +from backend import store +import os # https://github.com/pallets/flask/blob/1.1.2/examples/tutorial/tests/test_auth.py # Much better boilerplate than the docs @@ -10,7 +11,20 @@ import backend @pytest.fixture def app(request): - app = backend.changedetection_app({'datastore_path':'./datastorexxx'}) + + + datastore_path ="./test-datastore" + try: + os.mkdir(datastore_path) + except FileExistsError: + pass + + # Kinda weird to tell them both where `datastore_path` is right.. + app_config = {'datastore_path': datastore_path} + datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path']) + app = backend.changedetection_app(app_config, datastore) + + app.debug = True def teardown(): From 87f4347fe56c6c893f5e0cc01421fa640f810c8a Mon Sep 17 00:00:00 2001 From: Leigh Morresi <275001+dgtlmoon@users.noreply.github.com> Date: Tue, 16 Feb 2021 21:35:28 +0100 Subject: [PATCH 03/18] hack of pytest implementation - doesnt work yet --- backend.py | 8 ++++- backend/__init__.py | 65 +++++++++++++++-------------------- backend/fetch_site_status.py | 32 ++--------------- backend/store.py | 54 +++++++++++++++++++++++++---- backend/test_backend.py | 44 ------------------------ backend/tests/test_backend.py | 39 +++++++++++++++++++++ 6 files changed, 123 insertions(+), 119 deletions(-) delete mode 100644 backend/test_backend.py create mode 100644 backend/tests/test_backend.py diff --git a/backend.py b/backend.py index fb3dfd80..83422615 100644 --- a/backend.py +++ b/backend.py @@ -39,8 +39,14 @@ def main(argv): datastore_path = arg - # Kinda weird to tell them both where `datastore_path` is right.. + + # threads can read from disk every x seconds right? + # front end can just save + # We just need to know which threads are looking at which UUIDs + + # isnt there some @thingy to attach to each route to tell it, that this route needs a datastore app_config = {'datastore_path': datastore_path} + datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path']) app = backend.changedetection_app(app_config, datastore) diff --git a/backend/__init__.py b/backend/__init__.py index 2d8ce9c8..1fa98bb0 100644 --- a/backend/__init__.py +++ b/backend/__init__.py @@ -238,11 +238,14 @@ def changedetection_app(config=None, datastore_o=None): messages.append({'class': 'ok', 'message': "{} Imported, {} Skipped.".format(good, len(remaining_urls))}) - output = render_template("import.html", - messages=messages, - remaining="\n".join(remaining_urls) - ) - messages = [] + if len(remaining_urls) == 0: + return redirect(url_for('main_page')) + else: + output = render_template("import.html", + messages=messages, + remaining="\n".join(remaining_urls) + ) + messages = [] return output @@ -328,21 +331,6 @@ def changedetection_app(config=None, datastore_o=None): attachment_filename=backupname) - - # A few self sanity checks, mostly for developer/bug check - @app.route("/self-check", methods=['GET']) - def selfcheck(): - output = "All fine" - # In earlier versions before a single threaded write of the JSON store, sometimes histories could get mixed. - # Could also maybe affect people who manually fiddle with their JSON store? - for uuid, watch in datastore.data['watching'].items(): - for timestamp, path in watch['history'].items(): - # Each history snapshot should include a full path, which contains the {uuid} - if not uuid in path: - output = "Something weird in {}, suspected incorrect snapshot path.".format(uuid) - - return output - @app.route("/static//", methods=['GET']) def static_content(group, filename): # These files should be in our subdirectory @@ -380,6 +368,7 @@ def changedetection_app(config=None, datastore_o=None): @app.route("/api/checknow", methods=['GET']) def api_watch_checknow(): + global messages tag = request.args.get('tag') @@ -404,11 +393,14 @@ def changedetection_app(config=None, datastore_o=None): messages.append({'class': 'ok', 'message': "{} watches are rechecking.".format(i)}) return redirect(url_for('main_page', tag=tag)) - + # for pytest flask + @app.route("/timestamp", methods=['GET']) + def api_test_rand_int(): + return str(time.time()) # @todo handle ctrl break ticker_thread = threading.Thread(target=ticker_thread_check_time_launch_checks).start() - save_data_thread = threading.Thread(target=save_datastore).start() + return app @@ -434,6 +426,8 @@ class Worker(threading.Thread): uuid = self.q.get(block=True, timeout=1) # Blocking except queue.Empty: # We have a chance to kill this thread that needs to monitor for new jobs.. + # Delays here would be caused by a current response object pending + # @todo switch to threaded response handler if app.config['STOP_THREADS']: return else: @@ -442,14 +436,23 @@ class Worker(threading.Thread): if uuid in list(datastore.data['watching'].keys()): try: - result = update_handler.run(uuid) + result, contents = update_handler.run(uuid) except PermissionError as s: - print ("File permission error updating", uuid,str(s)) + app.logger.error("File permission error updating", uuid, str(s)) else: if result: datastore.update_watch(uuid=uuid, update_obj=result) + if contents: + # A change was detected + datastore.save_history_text(uuid=uuid, contents=contents, result_obj=result) + + else: + # No change + x = 1 + + self.current_uuid = None # Done self.q.task_done() @@ -459,7 +462,6 @@ def ticker_thread_check_time_launch_checks(): # Spin up Workers. for _ in range(datastore.data['settings']['requests']['workers']): - print ("...") new_worker = Worker(update_q) running_update_threads.append(new_worker) new_worker.start() @@ -473,19 +475,6 @@ def ticker_thread_check_time_launch_checks(): if app.config['STOP_THREADS']: return - time.sleep(1) - - -# Thread runner, this helps with thread/write issues when there are many operations that want to update the JSON -# by just running periodically in one thread, according to python, dict updates are threadsafe. -def save_datastore(): - global stop_threads - - while True: - if app.config['STOP_THREADS']: - return - if datastore.needs_write: - datastore.sync_to_json() time.sleep(1) diff --git a/backend/fetch_site_status.py b/backend/fetch_site_status.py index 2ed85c05..2ab531ee 100644 --- a/backend/fetch_site_status.py +++ b/backend/fetch_site_status.py @@ -15,37 +15,17 @@ class perform_site_check(): super().__init__(*args, **kwargs) self.datastore = datastore - def save_firefox_screenshot(self, uuid, output): - # @todo call selenium or whatever - return - - def ensure_output_path(self): - try: - os.mkdir(self.output_path) - except FileExistsError: - print (self.output_path, "already exists.") - - - def save_response_stripped_output(self, output, fname): - - with open(fname, 'w') as f: - f.write(output) - f.close() - - return fname def run(self, uuid): timestamp = int(time.time()) # used for storage etc too + stripped_text_from_html = False update_obj = {'previous_md5': self.datastore.data['watching'][uuid]['previous_md5'], 'history': {}, "last_checked": timestamp } - self.output_path = "{}/{}".format(self.datastore.datastore_path,uuid) - self.ensure_output_path() - extra_headers = self.datastore.get_val(uuid, 'headers') # Tweak the base config with the per-watch ones @@ -111,13 +91,5 @@ class perform_site_check(): update_obj["last_changed"] = timestamp update_obj["previous_md5"] = fetched_md5 - fname = "{}/{}.stripped.txt".format(self.output_path, fetched_md5) - with open(fname, 'w') as f: - f.write(stripped_text_from_html) - f.close() - - # Update history with the stripped text for future reference, this will also mean we save the first - # Should always be keyed by string(timestamp) - update_obj.update({"history": {str(timestamp): fname}}) - return update_obj + return update_obj, stripped_text_from_html diff --git a/backend/store.py b/backend/store.py index 36f9eebd..715892d2 100644 --- a/backend/store.py +++ b/backend/store.py @@ -7,6 +7,9 @@ from threading import Lock, Thread from copy import deepcopy +import logging +import time +import threading # Is there an existing library to ensure some data store (JSON etc) is in sync with CRUD methods? # Open a github issue if you know something :) @@ -17,7 +20,8 @@ class ChangeDetectionStore: def __init__(self, datastore_path="/datastore"): self.needs_write = False self.datastore_path = datastore_path - + self.json_store_path = "{}/url-watches.json".format(self.datastore_path) + self.stop_thread = False self.__data = { 'note': "Hello! If you change this file manually, please be sure to restart your changedetection.io instance!", 'watching': {}, @@ -59,7 +63,7 @@ class ChangeDetectionStore: self.__data['build_sha'] = f.read() try: - with open("{}/url-watches.json".format(self.datastore_path)) as json_file: + with open(self.json_store_path) as json_file: from_disk = json.load(json_file) # @todo isnt there a way todo this dict.update recursively? @@ -92,6 +96,9 @@ class ChangeDetectionStore: self.add_watch(url='https://www.gov.uk/coronavirus', tag='Covid') self.add_watch(url='https://changedetection.io', tag='Tech news') + # Finally start the thread that will manage periodic data saves to JSON + save_data_thread = threading.Thread(target=self.save_datastore).start() + # Returns the newest key, but if theres only 1 record, then it's counted as not being new, so return 0. def get_newest_history_key(self, uuid): if len(self.__data['watching'][uuid]['history']) == 1: @@ -176,16 +183,51 @@ class ChangeDetectionStore: self.data['watching'][new_uuid] = _blank - self.needs_write = True + # Get the directory ready + output_path = "{}/{}".format(self.datastore_path, new_uuid) + try: + os.mkdir(output_path) + except FileExistsError: + print(output_path, "already exists.") + self.sync_to_json() return new_uuid - def sync_to_json(self): + # Save some text file to the appropriate path and bump the history + # result_obj from fetch_site_status.run() + def save_history_text(self, uuid, result_obj, contents): + + output_path = "{}/{}".format(self.datastore_path, uuid) + fname = "{}/{}.stripped.txt".format(output_path, result_obj['previous_md5']) + with open(fname, 'w') as f: + f.write(contents) + f.close() - with open("{}/url-watches.json".format(self.datastore_path), 'w') as json_file: + # Update history with the stripped text for future reference, this will also mean we save the first + # Should always be keyed by string(timestamp) + self.update_watch(uuid, {"history": {str(result_obj["last_checked"]): fname}}) + + return fname + + def sync_to_json(self): + print ("Saving..") + with open(self.json_store_path, 'w') as json_file: json.dump(self.__data, json_file, indent=4) - print("Re-saved index") + logging.info("Re-saved index") self.needs_write = False + # Thread runner, this helps with thread/write issues when there are many operations that want to update the JSON + # by just running periodically in one thread, according to python, dict updates are threadsafe. + def save_datastore(self): + + while True: + if self.stop_thread: + print ("Shutting down datastore thread") + return + if self.needs_write: + self.sync_to_json() + time.sleep(1) + + # body of the constructor diff --git a/backend/test_backend.py b/backend/test_backend.py deleted file mode 100644 index 7ac1936f..00000000 --- a/backend/test_backend.py +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/python3 - -import pytest -import backend -from backend import store -import os -# https://github.com/pallets/flask/blob/1.1.2/examples/tutorial/tests/test_auth.py - -# Much better boilerplate than the docs -# https://www.python-boilerplate.com/py3+flask+pytest/ - -@pytest.fixture -def app(request): - - - datastore_path ="./test-datastore" - try: - os.mkdir(datastore_path) - except FileExistsError: - pass - - # Kinda weird to tell them both where `datastore_path` is right.. - app_config = {'datastore_path': datastore_path} - datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path']) - app = backend.changedetection_app(app_config, datastore) - - - app.debug = True - - def teardown(): - app.config['STOP_THREADS']=True - print("teardown") - - request.addfinalizer(teardown) - - return app.test_client() - - -def test_hello_world(app): - res = app.get("/") - # print(dir(res), res.status_code) - assert res.status_code == 200 - assert b"IMPORT" in res.data - diff --git a/backend/tests/test_backend.py b/backend/tests/test_backend.py new file mode 100644 index 00000000..b27c1743 --- /dev/null +++ b/backend/tests/test_backend.py @@ -0,0 +1,39 @@ +#!/usr/bin/python3 + +import pytest +import backend +from backend import store +import os +import time +import requests +# https://github.com/pallets/flask/blob/1.1.2/examples/tutorial/tests/test_auth.py + +# Much better boilerplate than the docs +# https://www.python-boilerplate.com/py3+flask+pytest/ + + +def test_import(session): + res = session.get("/") + assert b"IMPORT" in res.data + assert res.status_code == 200 + + test_url_list = ["https://slashdot.org"] + res = session.post('/import', data={'urls': "\n".join(test_url_list)}, follow_redirects=True) + s = "{} Imported".format(len(test_url_list)) + + #p= url_for('test_endpoint', _external=True + + assert bytes(s.encode('utf-8')) in res.data + + for url in test_url_list: + assert bytes(url.encode('utf-8')) in res.data + + #response = requests.get('http://localhost:5000/random_string') + #assert response.status_code == 200 + #assert response.json() == [{'id': 1}] + + +def test_import_a(session): + res = session.get("/") + assert b"IMPORT" in res.data + assert res.status_code == 200 From b361a61d18de7104cf493a734044474edf36661f Mon Sep 17 00:00:00 2001 From: Leigh Morresi <275001+dgtlmoon@users.noreply.github.com> Date: Tue, 16 Feb 2021 21:36:41 +0100 Subject: [PATCH 04/18] Addingmissing files --- backend/tests/__init__.py | 2 ++ backend/tests/conftest.py | 46 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 backend/tests/__init__.py create mode 100644 backend/tests/conftest.py diff --git a/backend/tests/__init__.py b/backend/tests/__init__.py new file mode 100644 index 00000000..085b3d78 --- /dev/null +++ b/backend/tests/__init__.py @@ -0,0 +1,2 @@ +"""Tests for the app.""" + diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py new file mode 100644 index 00000000..cf7165d3 --- /dev/null +++ b/backend/tests/conftest.py @@ -0,0 +1,46 @@ +#!/usr/bin/python3 + +import pytest +from webtest import TestApp +from backend import changedetection_app +from backend import store +import os + + +# https://github.com/pallets/flask/blob/1.1.2/examples/tutorial/tests/test_auth.py + +# Much better boilerplate than the docs +# https://www.python-boilerplate.com/py3+flask+pytest/ + + + +@pytest.fixture(scope='session') +def app(request): + """Create application for the tests.""" + + datastore_path = "./test-datastore" + app_config = {'datastore_path': datastore_path} + datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path']) + _app = changedetection_app(app_config, datastore) + + + # Establish an application context before running the tests. + ctx = _app.app_context() + ctx.push() + + def teardown(): + ctx.pop() + + request.addfinalizer(teardown) + return _app + +@pytest.fixture(scope='function') +def session(request): + """Creates a new database session for a test.""" + + + def teardown(): + print ("teardown") + + request.addfinalizer(teardown) + return session From c8607ae8bb5c0eb4d16cf8b5f28cf453ddb579ff Mon Sep 17 00:00:00 2001 From: Leigh Morresi <275001+dgtlmoon@users.noreply.github.com> Date: Tue, 16 Feb 2021 21:42:26 +0100 Subject: [PATCH 05/18] Use session/client fixture --- backend/tests/conftest.py | 12 ++++-------- backend/tests/test_backend.py | 10 +++++----- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py index cf7165d3..b61cc133 100644 --- a/backend/tests/conftest.py +++ b/backend/tests/conftest.py @@ -13,7 +13,6 @@ import os # https://www.python-boilerplate.com/py3+flask+pytest/ - @pytest.fixture(scope='session') def app(request): """Create application for the tests.""" @@ -23,7 +22,6 @@ def app(request): datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path']) _app = changedetection_app(app_config, datastore) - # Establish an application context before running the tests. ctx = _app.app_context() ctx.push() @@ -34,13 +32,11 @@ def app(request): request.addfinalizer(teardown) return _app +@pytest.fixture(scope='session') +def client(app): + return app.test_client() + @pytest.fixture(scope='function') def session(request): """Creates a new database session for a test.""" - - - def teardown(): - print ("teardown") - - request.addfinalizer(teardown) return session diff --git a/backend/tests/test_backend.py b/backend/tests/test_backend.py index b27c1743..55dab50c 100644 --- a/backend/tests/test_backend.py +++ b/backend/tests/test_backend.py @@ -12,13 +12,13 @@ import requests # https://www.python-boilerplate.com/py3+flask+pytest/ -def test_import(session): - res = session.get("/") +def test_import(client): + res = client.get("/") assert b"IMPORT" in res.data assert res.status_code == 200 test_url_list = ["https://slashdot.org"] - res = session.post('/import', data={'urls': "\n".join(test_url_list)}, follow_redirects=True) + res = client.post('/import', data={'urls': "\n".join(test_url_list)}, follow_redirects=True) s = "{} Imported".format(len(test_url_list)) #p= url_for('test_endpoint', _external=True @@ -33,7 +33,7 @@ def test_import(session): #assert response.json() == [{'id': 1}] -def test_import_a(session): - res = session.get("/") +def test_import_a(client): + res = client.get("/") assert b"IMPORT" in res.data assert res.status_code == 200 From d4db082c01dc14d87739fe3b8e7b3708d221104c Mon Sep 17 00:00:00 2001 From: Leigh Morresi <275001+dgtlmoon@users.noreply.github.com> Date: Tue, 16 Feb 2021 21:44:44 +0100 Subject: [PATCH 06/18] remove unused imports --- backend/tests/conftest.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py index b61cc133..213173ac 100644 --- a/backend/tests/conftest.py +++ b/backend/tests/conftest.py @@ -1,10 +1,8 @@ #!/usr/bin/python3 import pytest -from webtest import TestApp from backend import changedetection_app from backend import store -import os # https://github.com/pallets/flask/blob/1.1.2/examples/tutorial/tests/test_auth.py From 4770ebb2eacfc1ff9725745ce52b35cdb76760d5 Mon Sep 17 00:00:00 2001 From: Leigh Morresi <275001+dgtlmoon@users.noreply.github.com> Date: Tue, 16 Feb 2021 21:48:38 +0100 Subject: [PATCH 07/18] Tweaking client --- backend/tests/conftest.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py index 213173ac..03ccd21c 100644 --- a/backend/tests/conftest.py +++ b/backend/tests/conftest.py @@ -32,7 +32,9 @@ def app(request): @pytest.fixture(scope='session') def client(app): - return app.test_client() + with app.test_client() as client: + yield client + @pytest.fixture(scope='function') def session(request): From b46a7fc4b1dff071b99f1961cc89646f8c124438 Mon Sep 17 00:00:00 2001 From: Leigh Morresi <275001+dgtlmoon@users.noreply.github.com> Date: Sun, 21 Feb 2021 13:40:48 +0100 Subject: [PATCH 08/18] Port should be an integer --- backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend.py b/backend.py index 83422615..4e6058d2 100644 --- a/backend.py +++ b/backend.py @@ -33,7 +33,7 @@ def main(argv): ssl_mode = True if opt == '-p': - port = arg + port = int(arg) if opt == '-d': datastore_path = arg From 1718e2e86f30f738d3ccdb5b4c3fe2296038067f Mon Sep 17 00:00:00 2001 From: Leigh Morresi <275001+dgtlmoon@users.noreply.github.com> Date: Sun, 21 Feb 2021 13:41:00 +0100 Subject: [PATCH 09/18] Finalse pytest methods --- backend/README-pytest.md | 1 + backend/__init__.py | 58 ++++++++++++------ backend/fetch_site_status.py | 11 ++-- backend/store.py | 10 +-- backend/tests/conftest.py | 32 ++++++---- backend/tests/test_backend.py | 111 ++++++++++++++++++++++++++-------- requirements.txt | 3 + 7 files changed, 157 insertions(+), 69 deletions(-) create mode 100644 backend/README-pytest.md diff --git a/backend/README-pytest.md b/backend/README-pytest.md new file mode 100644 index 00000000..1d32ac0f --- /dev/null +++ b/backend/README-pytest.md @@ -0,0 +1 @@ +Note: run `pytest` from this directory. diff --git a/backend/__init__.py b/backend/__init__.py index 1fa98bb0..e597c766 100644 --- a/backend/__init__.py +++ b/backend/__init__.py @@ -91,7 +91,7 @@ def changedetection_app(config=None, datastore_o=None): # You can divide up the stuff like this @app.route("/", methods=['GET']) - def main_page(): + def index(): global messages limit_tag = request.args.get('tag') @@ -152,7 +152,7 @@ def changedetection_app(config=None, datastore_o=None): else: messages.append({'class': 'error', 'message': 'Wrong confirm text.'}) - return redirect(url_for('main_page')) + return redirect(url_for('index')) return render_template("scrub.html") @@ -184,7 +184,7 @@ def changedetection_app(config=None, datastore_o=None): messages.append({'class': 'ok', 'message': 'Updated watch.'}) - return redirect(url_for('main_page')) + return redirect(url_for('index')) else: @@ -230,7 +230,9 @@ def changedetection_app(config=None, datastore_o=None): for url in urls: url = url.strip() if len(url) and validators.url(url): - datastore.add_watch(url=url.strip(), tag="") + new_uuid = datastore.add_watch(url=url.strip(), tag="") + # Straight into the queue. + update_q.put(new_uuid) good += 1 else: if len(url): @@ -239,7 +241,7 @@ def changedetection_app(config=None, datastore_o=None): messages.append({'class': 'ok', 'message': "{} Imported, {} Skipped.".format(good, len(remaining_urls))}) if len(remaining_urls) == 0: - return redirect(url_for('main_page')) + return redirect(url_for('index')) else: output = render_template("import.html", messages=messages, @@ -353,7 +355,7 @@ def changedetection_app(config=None, datastore_o=None): update_q.put(new_uuid) messages.append({'class': 'ok', 'message': 'Watch added.'}) - return redirect(url_for('main_page')) + return redirect(url_for('index')) @app.route("/api/delete", methods=['GET']) @@ -363,7 +365,7 @@ def changedetection_app(config=None, datastore_o=None): datastore.delete(uuid) messages.append({'class': 'ok', 'message': 'Deleted.'}) - return redirect(url_for('main_page')) + return redirect(url_for('index')) @app.route("/api/checknow", methods=['GET']) @@ -375,28 +377,34 @@ def changedetection_app(config=None, datastore_o=None): uuid = request.args.get('uuid') i=0 + running_uuids=[] + for t in running_update_threads: + running_uuids.append(t.current_uuid) + + # @todo check thread is running and skip + if uuid: - update_q.put(uuid) + if not uuid in running_uuids: + update_q.put(uuid) i = 1 elif tag != None: + # Items that have this current tag for watch_uuid, watch in datastore.data['watching'].items(): if (tag != None and tag in watch['tag']): i += 1 - update_q.put(watch_uuid) + if not watch_uuid in running_uuids: + update_q.put(watch_uuid) else: # No tag, no uuid, add everything. for watch_uuid, watch in datastore.data['watching'].items(): i += 1 - update_q.put(watch_uuid) + if not watch_uuid in running_uuids: + update_q.put(watch_uuid) messages.append({'class': 'ok', 'message': "{} watches are rechecking.".format(i)}) - return redirect(url_for('main_page', tag=tag)) + return redirect(url_for('index', tag=tag)) - # for pytest flask - @app.route("/timestamp", methods=['GET']) - def api_test_rand_int(): - return str(time.time()) # @todo handle ctrl break ticker_thread = threading.Thread(target=ticker_thread_check_time_launch_checks).start() @@ -423,7 +431,7 @@ class Worker(threading.Thread): while True: try: - uuid = self.q.get(block=True, timeout=1) # Blocking + uuid = self.q.get(block=True, timeout=1) except queue.Empty: # We have a chance to kill this thread that needs to monitor for new jobs.. # Delays here would be caused by a current response object pending @@ -442,6 +450,8 @@ class Worker(threading.Thread): app.logger.error("File permission error updating", uuid, str(s)) else: if result: + + result["previous_md5"] = result["current_md5"] datastore.update_watch(uuid=uuid, update_obj=result) if contents: @@ -468,13 +478,23 @@ def ticker_thread_check_time_launch_checks(): # Every minute check for new UUIDs to follow up on while True: + + if app.config['STOP_THREADS']: + return + + running_uuids=[] + for t in running_update_threads: + running_uuids.append(t.current_uuid) + + # Look at the dataset, find a stale watch to process minutes = datastore.data['settings']['requests']['minutes_between_check'] for uuid, watch in datastore.data['watching'].items(): if watch['last_checked'] <= time.time() - (minutes * 60): - update_q.put(uuid) - if app.config['STOP_THREADS']: - return + # @todo maybe update_q.queue is enough? + if not uuid in running_uuids and uuid not in update_q.queue: + update_q.put(uuid) + # Should be low so we can break this out in testing time.sleep(1) diff --git a/backend/fetch_site_status.py b/backend/fetch_site_status.py index 2ab531ee..225ed812 100644 --- a/backend/fetch_site_status.py +++ b/backend/fetch_site_status.py @@ -5,8 +5,6 @@ import os import re from inscriptis import get_text -from copy import deepcopy - # Some common stuff here that can be moved to a base class class perform_site_check(): @@ -17,7 +15,6 @@ class perform_site_check(): def run(self, uuid): - timestamp = int(time.time()) # used for storage etc too stripped_text_from_html = False @@ -45,7 +42,9 @@ class perform_site_check(): timeout = 15 try: - r = requests.get(self.datastore.get_val(uuid, 'url'), + url = self.datastore.get_val(uuid, 'url') + + r = requests.get(url, headers=request_headers, timeout=timeout, verify=False) @@ -53,7 +52,6 @@ class perform_site_check(): stripped_text_from_html = get_text(r.text) - # Usually from networkIO/requests level except (requests.exceptions.ConnectionError, requests.exceptions.ReadTimeout) as e: update_obj["last_error"] = str(e) @@ -90,6 +88,7 @@ class perform_site_check(): if self.datastore.get_val(uuid, 'previous_md5'): update_obj["last_changed"] = timestamp - update_obj["previous_md5"] = fetched_md5 + + update_obj["current_md5"] = fetched_md5 return update_obj, stripped_text_from_html diff --git a/backend/store.py b/backend/store.py index 715892d2..ca0f95f4 100644 --- a/backend/store.py +++ b/backend/store.py @@ -91,10 +91,10 @@ class ChangeDetectionStore: # First time ran, doesnt exist. except (FileNotFoundError, json.decoder.JSONDecodeError): print("Creating JSON store at", self.datastore_path) - self.add_watch(url='http://www.quotationspage.com/random.php', tag='test') - self.add_watch(url='https://news.ycombinator.com/', tag='Tech news') - self.add_watch(url='https://www.gov.uk/coronavirus', tag='Covid') - self.add_watch(url='https://changedetection.io', tag='Tech news') + #self.add_watch(url='http://www.quotationspage.com/random.php', tag='test') + #self.add_watch(url='https://news.ycombinator.com/', tag='Tech news') + #self.add_watch(url='https://www.gov.uk/coronavirus', tag='Covid') + #self.add_watch(url='https://changedetection.io', tag='Tech news') # Finally start the thread that will manage periodic data saves to JSON save_data_thread = threading.Thread(target=self.save_datastore).start() @@ -198,7 +198,7 @@ class ChangeDetectionStore: def save_history_text(self, uuid, result_obj, contents): output_path = "{}/{}".format(self.datastore_path, uuid) - fname = "{}/{}.stripped.txt".format(output_path, result_obj['previous_md5']) + fname = "{}/{}-{}.stripped.txt".format(output_path, result_obj['current_md5'], str(time.time())) with open(fname, 'w') as f: f.write(contents) f.close() diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py index 03ccd21c..a0870911 100644 --- a/backend/tests/conftest.py +++ b/backend/tests/conftest.py @@ -10,33 +10,39 @@ from backend import store # Much better boilerplate than the docs # https://www.python-boilerplate.com/py3+flask+pytest/ +global app @pytest.fixture(scope='session') def app(request): """Create application for the tests.""" datastore_path = "./test-datastore" + + import os + try: + os.unlink("{}/url-watches.json".format(datastore_path)) + except FileNotFoundError: + pass + + app_config = {'datastore_path': datastore_path} datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path']) - _app = changedetection_app(app_config, datastore) + app = changedetection_app(app_config, datastore) # Establish an application context before running the tests. - ctx = _app.app_context() - ctx.push() + #ctx = _app.app_context() + #ctx.push() def teardown(): - ctx.pop() + datastore.stop_thread = True + app.config['STOP_THREADS']= True request.addfinalizer(teardown) - return _app + return app -@pytest.fixture(scope='session') -def client(app): - with app.test_client() as client: - yield client +#@pytest.fixture(scope='session') +#def client(app): +# with app.test_client() as client: +# yield client -@pytest.fixture(scope='function') -def session(request): - """Creates a new database session for a test.""" - return session diff --git a/backend/tests/test_backend.py b/backend/tests/test_backend.py index 55dab50c..2e1027ef 100644 --- a/backend/tests/test_backend.py +++ b/backend/tests/test_backend.py @@ -1,39 +1,98 @@ #!/usr/bin/python3 -import pytest -import backend -from backend import store -import os import time -import requests -# https://github.com/pallets/flask/blob/1.1.2/examples/tutorial/tests/test_auth.py +import pytest +from flask import url_for +from urllib.request import urlopen + +def set_original_response(): + + test_return_data = """ + + Some initial text
+

Which is across multiple lines

+
+ So let's see what happens.
+ + + + """ + + with open("test-datastore/output.txt", "w") as f: + f.write(test_return_data) + + +def set_modified_response(): + test_return_data = """ + + Some initial text
+

which has this one new line

+
+ So let's see what happens.
+ + + + """ + + with open("test-datastore/output.txt", "w") as f: + f.write(test_return_data) + + +def test_add_endpoint_to_live_server(client, live_server): + sleep_time_for_fetch_thread = 3 + + @live_server.app.route('/test-endpoint') + + def test_endpoint(): + # Tried using a global var here but didn't seem to work, so reading from a file instead. + with open("test-datastore/output.txt", "r") as f: + return f.read() + + set_original_response() + + live_server.start() + + # Add our URL to the import page + res = client.post( + url_for("import_page"), + data={"urls": url_for('test_endpoint', _external=True)}, + follow_redirects=True + ) + assert b"1 Imported" in res.data + + client.get(url_for("api_watch_checknow"), follow_redirects=True) + + # Give the thread time to pick it up + time.sleep(sleep_time_for_fetch_thread) + + # It should report nothing found (no new 'unviewed' class) + res = client.get(url_for("index")) + assert b'unviewed' not in res.data + assert b'test-endpoint' in res.data + + # Give the thread time to pick it up + time.sleep(sleep_time_for_fetch_thread) + res = client.get(url_for("index")) -# Much better boilerplate than the docs -# https://www.python-boilerplate.com/py3+flask+pytest/ + assert b'unviewed' not in res.data +##################### -def test_import(client): - res = client.get("/") - assert b"IMPORT" in res.data - assert res.status_code == 200 - test_url_list = ["https://slashdot.org"] - res = client.post('/import', data={'urls': "\n".join(test_url_list)}, follow_redirects=True) - s = "{} Imported".format(len(test_url_list)) + # Make a change + set_modified_response() - #p= url_for('test_endpoint', _external=True + res = urlopen(url_for('test_endpoint', _external=True)) + assert b'which has this one new line' in res.read() - assert bytes(s.encode('utf-8')) in res.data - for url in test_url_list: - assert bytes(url.encode('utf-8')) in res.data + # Force recheck + res = client.get(url_for("api_watch_checknow"), follow_redirects=True) + assert b'1 watches are rechecking.' in res.data - #response = requests.get('http://localhost:5000/random_string') - #assert response.status_code == 200 - #assert response.json() == [{'id': 1}] + time.sleep(sleep_time_for_fetch_thread) + # Now something should be ready, indicated by having a 'unviewed' class + res = client.get(url_for("index")) + assert b'unviewed' in res.data -def test_import_a(client): - res = client.get("/") - assert b"IMPORT" in res.data - assert res.status_code == 200 diff --git a/requirements.txt b/requirements.txt index f0121d9f..77412b12 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,6 +7,9 @@ six==1.10.0 yarl flask +pytest +pytest-flask # for live_server + eventlet requests validators From b0c5dbd88eea2bccfc6ba826865b957bcf300513 Mon Sep 17 00:00:00 2001 From: Leigh Morresi <275001+dgtlmoon@users.noreply.github.com> Date: Sun, 21 Feb 2021 13:46:16 +0100 Subject: [PATCH 10/18] Just use the current/previous md5 --- backend/__init__.py | 3 +-- backend/fetch_site_status.py | 2 +- backend/store.py | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/backend/__init__.py b/backend/__init__.py index e597c766..cf090abb 100644 --- a/backend/__init__.py +++ b/backend/__init__.py @@ -450,8 +450,7 @@ class Worker(threading.Thread): app.logger.error("File permission error updating", uuid, str(s)) else: if result: - - result["previous_md5"] = result["current_md5"] + datastore.update_watch(uuid=uuid, update_obj=result) if contents: diff --git a/backend/fetch_site_status.py b/backend/fetch_site_status.py index 225ed812..dad9c2de 100644 --- a/backend/fetch_site_status.py +++ b/backend/fetch_site_status.py @@ -89,6 +89,6 @@ class perform_site_check(): update_obj["last_changed"] = timestamp - update_obj["current_md5"] = fetched_md5 + update_obj["previous_md5"] = fetched_md5 return update_obj, stripped_text_from_html diff --git a/backend/store.py b/backend/store.py index ca0f95f4..49f76681 100644 --- a/backend/store.py +++ b/backend/store.py @@ -198,7 +198,7 @@ class ChangeDetectionStore: def save_history_text(self, uuid, result_obj, contents): output_path = "{}/{}".format(self.datastore_path, uuid) - fname = "{}/{}-{}.stripped.txt".format(output_path, result_obj['current_md5'], str(time.time())) + fname = "{}/{}-{}.stripped.txt".format(output_path, result_obj['previous_md5'], str(time.time())) with open(fname, 'w') as f: f.write(contents) f.close() From fd5475ba38196414d4b3dbee19d75a103fd4efda Mon Sep 17 00:00:00 2001 From: Leigh Morresi <275001+dgtlmoon@users.noreply.github.com> Date: Sun, 21 Feb 2021 14:05:52 +0100 Subject: [PATCH 11/18] Minor cleanup --- backend/__init__.py | 2 +- backend/pytest.ini | 2 ++ backend/tests/test_backend.py | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) create mode 100644 backend/pytest.ini diff --git a/backend/__init__.py b/backend/__init__.py index cf090abb..c513d1b7 100644 --- a/backend/__init__.py +++ b/backend/__init__.py @@ -450,7 +450,7 @@ class Worker(threading.Thread): app.logger.error("File permission error updating", uuid, str(s)) else: if result: - + datastore.update_watch(uuid=uuid, update_obj=result) if contents: diff --git a/backend/pytest.ini b/backend/pytest.ini new file mode 100644 index 00000000..8b9ccf85 --- /dev/null +++ b/backend/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +addopts = --no-start-live-server --live-server-port=5005 \ No newline at end of file diff --git a/backend/tests/test_backend.py b/backend/tests/test_backend.py index 2e1027ef..fcdf8519 100644 --- a/backend/tests/test_backend.py +++ b/backend/tests/test_backend.py @@ -38,7 +38,7 @@ def set_modified_response(): f.write(test_return_data) -def test_add_endpoint_to_live_server(client, live_server): +def test_check_basic_change_detection_functionality(client, live_server): sleep_time_for_fetch_thread = 3 @live_server.app.route('/test-endpoint') From 3c175bfc4a8683cb67d928d25a9b17f9a807889a Mon Sep 17 00:00:00 2001 From: Leigh Morresi <275001+dgtlmoon@users.noreply.github.com> Date: Sun, 21 Feb 2021 14:08:34 +0100 Subject: [PATCH 12/18] Create the test datastore --- backend/tests/conftest.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py index a0870911..94c7cb07 100644 --- a/backend/tests/conftest.py +++ b/backend/tests/conftest.py @@ -12,6 +12,7 @@ from backend import store global app + @pytest.fixture(scope='session') def app(request): """Create application for the tests.""" @@ -19,6 +20,12 @@ def app(request): datastore_path = "./test-datastore" import os + try: + os.mkdir(datastore_path) + except FileExistsError: + pass + + try: os.unlink("{}/url-watches.json".format(datastore_path)) except FileNotFoundError: From 22bc8fabd1dfffb08ed52231548afe4598fbf3c6 Mon Sep 17 00:00:00 2001 From: Leigh Morresi <275001+dgtlmoon@users.noreply.github.com> Date: Sun, 21 Feb 2021 14:14:27 +0100 Subject: [PATCH 13/18] Add badge under pytest branch --- .github/workflows/python-app.yml | 2 +- README.md | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index e7c5fb8e..ae14bba8 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -1,7 +1,7 @@ # This workflow will install Python dependencies, run tests and lint with a single version of Python # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions -name: changedetection.io Python application +name: changedetection.io on: diff --git a/README.md b/README.md index b541e35b..f8b0c77c 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ # changedetection.io +![changedetection.io](https://github.com/dgtlmoon/changedetection.io/actions/workflows/python-app.yml/badge.svg?branch=pytest) ## Self-hosted change monitoring of web pages. From 3e9a11067196a27842879baabece926b6236e355 Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Sun, 21 Feb 2021 14:15:21 +0100 Subject: [PATCH 14/18] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f8b0c77c..5b1be005 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ Know when ... - Government department updates (changes are often only on their websites) - Local government news (changes are often only on their websites) -- New software releases +- New software releases, security advisories when you're not on their mailing list. - Festivals with changes - Realestate listing changes From 63eea2d6db9697deb5e66fd24bf9828e6e86e239 Mon Sep 17 00:00:00 2001 From: Leigh Morresi <275001+dgtlmoon@users.noreply.github.com> Date: Sun, 21 Feb 2021 14:21:14 +0100 Subject: [PATCH 15/18] Linting fixups --- backend/__init__.py | 55 +++++++++++++----------------------- backend/fetch_site_status.py | 4 --- backend/store.py | 22 ++++++++------- backend/tests/conftest.py | 2 +- 4 files changed, 32 insertions(+), 51 deletions(-) diff --git a/backend/__init__.py b/backend/__init__.py index c513d1b7..690dd51a 100644 --- a/backend/__init__.py +++ b/backend/__init__.py @@ -13,7 +13,7 @@ # https://distill.io/features # proxy per check # - flask_cors, itsdangerous,MarkupSafe -import json + import time import os import timeago @@ -21,10 +21,9 @@ import timeago import threading import queue +from flask import Flask, render_template, request, send_file, send_from_directory, abort, redirect, url_for -from flask import Flask, render_template, request, send_file, send_from_directory, safe_join, abort, redirect, url_for - -datastore=None +datastore = None # Local running_update_threads = [] @@ -35,13 +34,12 @@ extra_stylesheets = [] update_q = queue.Queue() - app = Flask(__name__, static_url_path="/var/www/change-detection/backen/static") # Stop browser caching of assets app.config['SEND_FILE_MAX_AGE_DEFAULT'] = 0 -app.config['STOP_THREADS']= False +app.config['STOP_THREADS'] = False # Disables caching of the templates app.config['TEMPLATES_AUTO_RELOAD'] = True @@ -74,20 +72,19 @@ def _jinja2_filter_datetimestamp(timestamp, format="%Y-%m-%d %H:%M:%S"): # return timeago.format(timestamp, time.time()) # return datetime.datetime.utcfromtimestamp(timestamp).strftime(format) -def changedetection_app(config=None, datastore_o=None): +def changedetection_app(config=None, datastore_o=None): global datastore datastore = datastore_o # Hmm app.config.update(dict(DEBUG=True)) app.config.update(config or {}) - # Setup cors headers to allow all domains # https://flask-cors.readthedocs.io/en/latest/ -# CORS(app) + # CORS(app) - #https://github.com/pallets/flask/blob/93dd1709d05a1cf0e886df6223377bdab3b077fb/examples/tutorial/flaskr/__init__.py#L39 + # https://github.com/pallets/flask/blob/93dd1709d05a1cf0e886df6223377bdab3b077fb/examples/tutorial/flaskr/__init__.py#L39 # You can divide up the stuff like this @app.route("/", methods=['GET']) @@ -100,7 +97,6 @@ def changedetection_app(config=None, datastore_o=None): sorted_watches = [] for uuid, watch in datastore.data['watching'].items(): - if limit_tag != None: # Support for comma separated list of tags. for tag_in_watch in watch['tag'].split(','): @@ -113,7 +109,6 @@ def changedetection_app(config=None, datastore_o=None): watch['uuid'] = uuid sorted_watches.append(watch) - sorted_watches.sort(key=lambda x: x['last_changed'], reverse=True) existing_tags = datastore.get_all_tags() @@ -156,7 +151,6 @@ def changedetection_app(config=None, datastore_o=None): return render_template("scrub.html") - @app.route("/edit", methods=['GET', 'POST']) def edit_page(): global messages @@ -193,7 +187,6 @@ def changedetection_app(config=None, datastore_o=None): return output - @app.route("/settings", methods=['GET', "POST"]) def settings_page(): global messages @@ -210,10 +203,12 @@ def changedetection_app(config=None, datastore_o=None): messages.append({'class': 'ok', 'message': "Updated"}) else: - messages.append({'class': 'error', 'message': "Must be equal to or greater than 5 and less than 600 minutes"}) + messages.append( + {'class': 'error', 'message': "Must be equal to or greater than 5 and less than 600 minutes"}) - output = render_template("settings.html", messages=messages, minutes=datastore.data['settings']['requests']['minutes_between_check']) - messages =[] + output = render_template("settings.html", messages=messages, + minutes=datastore.data['settings']['requests']['minutes_between_check']) + messages = [] return output @@ -221,7 +216,7 @@ def changedetection_app(config=None, datastore_o=None): def import_page(): import validators global messages - remaining_urls=[] + remaining_urls = [] good = 0 @@ -250,12 +245,11 @@ def changedetection_app(config=None, datastore_o=None): messages = [] return output - @app.route("/diff/", methods=['GET']) def diff_history_page(uuid): global messages - extra_stylesheets=['/static/css/diff.css'] + extra_stylesheets = ['/static/css/diff.css'] watch = datastore.data['watching'][uuid] @@ -299,7 +293,6 @@ def changedetection_app(config=None, datastore_o=None): def favicon(): return send_from_directory("/app/static/images", filename="favicon.ico") - # We're good but backups are even better! @app.route("/backup", methods=['GET']) def get_backup(): @@ -313,7 +306,8 @@ def changedetection_app(config=None, datastore_o=None): # We only care about UUIDS from the current index file uuids = list(datastore.data['watching'].keys()) - with zipfile.ZipFile(os.path.join(app.config['datastore_path'], backupname), 'w', compression=zipfile.ZIP_DEFLATED, + with zipfile.ZipFile(os.path.join(app.config['datastore_path'], backupname), 'w', + compression=zipfile.ZIP_DEFLATED, compresslevel=6) as zipObj: # Be sure we're written fresh @@ -332,7 +326,6 @@ def changedetection_app(config=None, datastore_o=None): mimetype="application/zip", attachment_filename=backupname) - @app.route("/static//", methods=['GET']) def static_content(group, filename): # These files should be in our subdirectory @@ -344,7 +337,6 @@ def changedetection_app(config=None, datastore_o=None): except FileNotFoundError: abort(404) - @app.route("/api/add", methods=['POST']) def api_watch_add(): global messages @@ -357,7 +349,6 @@ def changedetection_app(config=None, datastore_o=None): messages.append({'class': 'ok', 'message': 'Watch added.'}) return redirect(url_for('index')) - @app.route("/api/delete", methods=['GET']) def api_delete(): global messages @@ -367,7 +358,6 @@ def changedetection_app(config=None, datastore_o=None): return redirect(url_for('index')) - @app.route("/api/checknow", methods=['GET']) def api_watch_checknow(): @@ -375,9 +365,9 @@ def changedetection_app(config=None, datastore_o=None): tag = request.args.get('tag') uuid = request.args.get('uuid') - i=0 + i = 0 - running_uuids=[] + running_uuids = [] for t in running_update_threads: running_uuids.append(t.current_uuid) @@ -405,20 +395,16 @@ def changedetection_app(config=None, datastore_o=None): messages.append({'class': 'ok', 'message': "{} watches are rechecking.".format(i)}) return redirect(url_for('index', tag=tag)) - # @todo handle ctrl break ticker_thread = threading.Thread(target=ticker_thread_check_time_launch_checks).start() - return app # Requests for checking on the site use a pool of thread Workers managed by a Queue. class Worker(threading.Thread): - current_uuid = None - def __init__(self, q, *args, **kwargs): self.q = q super().__init__(*args, **kwargs) @@ -461,14 +447,12 @@ class Worker(threading.Thread): # No change x = 1 - self.current_uuid = None # Done self.q.task_done() # Thread runner to check every minute, look for new watches to feed into the Queue. def ticker_thread_check_time_launch_checks(): - # Spin up Workers. for _ in range(datastore.data['settings']['requests']['workers']): new_worker = Worker(update_q) @@ -481,7 +465,7 @@ def ticker_thread_check_time_launch_checks(): if app.config['STOP_THREADS']: return - running_uuids=[] + running_uuids = [] for t in running_update_threads: running_uuids.append(t.current_uuid) @@ -496,4 +480,3 @@ def ticker_thread_check_time_launch_checks(): # Should be low so we can break this out in testing time.sleep(1) - diff --git a/backend/fetch_site_status.py b/backend/fetch_site_status.py index dad9c2de..f71ecd7a 100644 --- a/backend/fetch_site_status.py +++ b/backend/fetch_site_status.py @@ -1,8 +1,6 @@ import time import requests import hashlib -import os -import re from inscriptis import get_text @@ -13,7 +11,6 @@ class perform_site_check(): super().__init__(*args, **kwargs) self.datastore = datastore - def run(self, uuid): timestamp = int(time.time()) # used for storage etc too stripped_text_from_html = False @@ -88,7 +85,6 @@ class perform_site_check(): if self.datastore.get_val(uuid, 'previous_md5'): update_obj["last_changed"] = timestamp - update_obj["previous_md5"] = fetched_md5 return update_obj, stripped_text_from_html diff --git a/backend/store.py b/backend/store.py index 49f76681..4acaa175 100644 --- a/backend/store.py +++ b/backend/store.py @@ -11,13 +11,14 @@ import logging import time import threading + # Is there an existing library to ensure some data store (JSON etc) is in sync with CRUD methods? # Open a github issue if you know something :) # https://stackoverflow.com/questions/6190468/how-to-trigger-function-on-value-change class ChangeDetectionStore: lock = Lock() - def __init__(self, datastore_path="/datastore"): + def __init__(self, datastore_path="/datastore", include_default_watches=True): self.needs_write = False self.datastore_path = datastore_path self.json_store_path = "{}/url-watches.json".format(self.datastore_path) @@ -90,11 +91,13 @@ class ChangeDetectionStore: # First time ran, doesnt exist. except (FileNotFoundError, json.decoder.JSONDecodeError): - print("Creating JSON store at", self.datastore_path) - #self.add_watch(url='http://www.quotationspage.com/random.php', tag='test') - #self.add_watch(url='https://news.ycombinator.com/', tag='Tech news') - #self.add_watch(url='https://www.gov.uk/coronavirus', tag='Covid') - #self.add_watch(url='https://changedetection.io', tag='Tech news') + if include_default_watches: + print("Creating JSON store at", self.datastore_path) + + self.add_watch(url='http://www.quotationspage.com/random.php', tag='test') + self.add_watch(url='https://news.ycombinator.com/', tag='Tech news') + self.add_watch(url='https://www.gov.uk/coronavirus', tag='Covid') + self.add_watch(url='https://changedetection.io', tag='Tech news') # Finally start the thread that will manage periodic data saves to JSON save_data_thread = threading.Thread(target=self.save_datastore).start() @@ -146,7 +149,7 @@ class ChangeDetectionStore: # Support for comma separated list of tags. for tag in watch['tag'].split(','): tag = tag.strip() - if not tag in tags: + if tag not in tags: tags.append(tag) tags.sort() @@ -210,7 +213,7 @@ class ChangeDetectionStore: return fname def sync_to_json(self): - print ("Saving..") + print("Saving..") with open(self.json_store_path, 'w') as json_file: json.dump(self.__data, json_file, indent=4) logging.info("Re-saved index") @@ -223,11 +226,10 @@ class ChangeDetectionStore: while True: if self.stop_thread: - print ("Shutting down datastore thread") + print("Shutting down datastore thread") return if self.needs_write: self.sync_to_json() time.sleep(1) - # body of the constructor diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py index 94c7cb07..7dfa2ebe 100644 --- a/backend/tests/conftest.py +++ b/backend/tests/conftest.py @@ -33,7 +33,7 @@ def app(request): app_config = {'datastore_path': datastore_path} - datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path']) + datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path'], include_default_watches=False) app = changedetection_app(app_config, datastore) # Establish an application context before running the tests. From e200cd32892ead596c74226519382aa950f20a6f Mon Sep 17 00:00:00 2001 From: Leigh Morresi <275001+dgtlmoon@users.noreply.github.com> Date: Sun, 21 Feb 2021 14:26:19 +0100 Subject: [PATCH 16/18] Fixing a few more easy lint wins --- backend/__init__.py | 9 +++------ backend/dev-docker/sleep.py | 4 +--- backend/fetch_site_status.py | 1 - backend/store.py | 3 +-- backend/tests/conftest.py | 18 +++--------------- backend/tests/test_backend.py | 9 ++------- 6 files changed, 10 insertions(+), 34 deletions(-) diff --git a/backend/__init__.py b/backend/__init__.py index 690dd51a..028d49b7 100644 --- a/backend/__init__.py +++ b/backend/__init__.py @@ -374,7 +374,7 @@ def changedetection_app(config=None, datastore_o=None): # @todo check thread is running and skip if uuid: - if not uuid in running_uuids: + if uuid not in running_uuids: update_q.put(uuid) i = 1 @@ -383,13 +383,13 @@ def changedetection_app(config=None, datastore_o=None): for watch_uuid, watch in datastore.data['watching'].items(): if (tag != None and tag in watch['tag']): i += 1 - if not watch_uuid in running_uuids: + if watch_uuid not in running_uuids: update_q.put(watch_uuid) else: # No tag, no uuid, add everything. for watch_uuid, watch in datastore.data['watching'].items(): i += 1 - if not watch_uuid in running_uuids: + if watch_uuid not in running_uuids: update_q.put(watch_uuid) messages.append({'class': 'ok', 'message': "{} watches are rechecking.".format(i)}) @@ -443,9 +443,6 @@ class Worker(threading.Thread): # A change was detected datastore.save_history_text(uuid=uuid, contents=contents, result_obj=result) - else: - # No change - x = 1 self.current_uuid = None # Done self.q.task_done() diff --git a/backend/dev-docker/sleep.py b/backend/dev-docker/sleep.py index e2108f0f..27c632b5 100644 --- a/backend/dev-docker/sleep.py +++ b/backend/dev-docker/sleep.py @@ -1,9 +1,7 @@ import time -import sys print ("Sleep loop, you should run your script from the console") while True: # Wait for 5 seconds - - time.sleep(2) \ No newline at end of file + time.sleep(2) diff --git a/backend/fetch_site_status.py b/backend/fetch_site_status.py index f71ecd7a..f03cdb00 100644 --- a/backend/fetch_site_status.py +++ b/backend/fetch_site_status.py @@ -48,7 +48,6 @@ class perform_site_check(): stripped_text_from_html = get_text(r.text) - # Usually from networkIO/requests level except (requests.exceptions.ConnectionError, requests.exceptions.ReadTimeout) as e: update_obj["last_error"] = str(e) diff --git a/backend/store.py b/backend/store.py index 4acaa175..1600dd1f 100644 --- a/backend/store.py +++ b/backend/store.py @@ -1,9 +1,8 @@ import json import uuid as uuid_builder -import validators import os.path from os import path -from threading import Lock, Thread +from threading import Lock from copy import deepcopy diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py index 7dfa2ebe..2c2fa334 100644 --- a/backend/tests/conftest.py +++ b/backend/tests/conftest.py @@ -3,6 +3,7 @@ import pytest from backend import changedetection_app from backend import store +import os # https://github.com/pallets/flask/blob/1.1.2/examples/tutorial/tests/test_auth.py @@ -19,37 +20,24 @@ def app(request): datastore_path = "./test-datastore" - import os try: os.mkdir(datastore_path) except FileExistsError: pass - try: os.unlink("{}/url-watches.json".format(datastore_path)) except FileNotFoundError: pass - app_config = {'datastore_path': datastore_path} datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path'], include_default_watches=False) app = changedetection_app(app_config, datastore) - # Establish an application context before running the tests. - #ctx = _app.app_context() - #ctx.push() - def teardown(): datastore.stop_thread = True - app.config['STOP_THREADS']= True + app.config['STOP_THREADS'] = True request.addfinalizer(teardown) - return app - -#@pytest.fixture(scope='session') -#def client(app): -# with app.test_client() as client: -# yield client - + return app diff --git a/backend/tests/test_backend.py b/backend/tests/test_backend.py index fcdf8519..f1fdff19 100644 --- a/backend/tests/test_backend.py +++ b/backend/tests/test_backend.py @@ -1,12 +1,11 @@ #!/usr/bin/python3 import time -import pytest from flask import url_for from urllib.request import urlopen -def set_original_response(): +def set_original_response(): test_return_data = """ Some initial text
@@ -42,7 +41,6 @@ def test_check_basic_change_detection_functionality(client, live_server): sleep_time_for_fetch_thread = 3 @live_server.app.route('/test-endpoint') - def test_endpoint(): # Tried using a global var here but didn't seem to work, so reading from a file instead. with open("test-datastore/output.txt", "r") as f: @@ -76,8 +74,7 @@ def test_check_basic_change_detection_functionality(client, live_server): assert b'unviewed' not in res.data -##################### - + ##################### # Make a change set_modified_response() @@ -85,7 +82,6 @@ def test_check_basic_change_detection_functionality(client, live_server): res = urlopen(url_for('test_endpoint', _external=True)) assert b'which has this one new line' in res.read() - # Force recheck res = client.get(url_for("api_watch_checknow"), follow_redirects=True) assert b'1 watches are rechecking.' in res.data @@ -95,4 +91,3 @@ def test_check_basic_change_detection_functionality(client, live_server): # Now something should be ready, indicated by having a 'unviewed' class res = client.get(url_for("index")) assert b'unviewed' in res.data - From cbeafcbaa0370bcef069eb98cee1e29947b6d862 Mon Sep 17 00:00:00 2001 From: Leigh Morresi <275001+dgtlmoon@users.noreply.github.com> Date: Sun, 21 Feb 2021 14:26:58 +0100 Subject: [PATCH 17/18] Removing unused import --- backend/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/__init__.py b/backend/__init__.py index 028d49b7..38f8ba86 100644 --- a/backend/__init__.py +++ b/backend/__init__.py @@ -298,7 +298,7 @@ def changedetection_app(config=None, datastore_o=None): def get_backup(): import zipfile from pathlib import Path - import zlib + # create a ZipFile object backupname = "changedetection-backup-{}.zip".format(int(time.time())) From beebba487c016008bea4a8dd228e7a14bf68c592 Mon Sep 17 00:00:00 2001 From: Leigh Morresi <275001+dgtlmoon@users.noreply.github.com> Date: Sun, 21 Feb 2021 15:21:30 +0100 Subject: [PATCH 18/18] Use master branch for badge --- README.md | 2 +- backend/__init__.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 5b1be005..3b7db369 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # changedetection.io -![changedetection.io](https://github.com/dgtlmoon/changedetection.io/actions/workflows/python-app.yml/badge.svg?branch=pytest) +![changedetection.io](https://github.com/dgtlmoon/changedetection.io/actions/workflows/python-app.yml/badge.svg?branch=master) ## Self-hosted change monitoring of web pages. diff --git a/backend/__init__.py b/backend/__init__.py index 38f8ba86..28a833a3 100644 --- a/backend/__init__.py +++ b/backend/__init__.py @@ -298,7 +298,6 @@ def changedetection_app(config=None, datastore_o=None): def get_backup(): import zipfile from pathlib import Path - # create a ZipFile object backupname = "changedetection-backup-{}.zip".format(int(time.time()))