#!/usr/bin/python3 # @todo logging # @todo extra options for url like , verify=False etc. # @todo enable https://urllib3.readthedocs.io/en/latest/user-guide.html#ssl as option? # @todo option for interval day/6 hour/etc # @todo on change detected, config for calling some API # @todo make tables responsive! # @todo fetch title into json # https://distill.io/features # proxy per check # - flask_cors, itsdangerous,MarkupSafe import time import os import timeago import flask_login from flask_login import login_required import threading from threading import Event import queue from flask import Flask, render_template, request, send_from_directory, abort, redirect, url_for from feedgen.feed import FeedGenerator from flask import make_response import datetime import pytz datastore = None # Local running_update_threads = [] ticker_thread = None messages = [] extra_stylesheets = [] update_q = queue.Queue() notification_q = queue.Queue() app = Flask(__name__, static_url_path="/var/www/change-detection/backend/static") # Stop browser caching of assets app.config['SEND_FILE_MAX_AGE_DEFAULT'] = 0 app.config.exit = Event() app.config['NEW_VERSION_AVAILABLE'] = False app.config['LOGIN_DISABLED'] = False # Disables caching of the templates app.config['TEMPLATES_AUTO_RELOAD'] = True # We use the whole watch object from the store/JSON so we can see if there's some related status in terms of a thread # running or something similar. @app.template_filter('format_last_checked_time') def _jinja2_filter_datetime(watch_obj, format="%Y-%m-%d %H:%M:%S"): # Worker thread tells us which UUID it is currently processing. for t in running_update_threads: if t.current_uuid == watch_obj['uuid']: return "Checking now.." if watch_obj['last_checked'] == 0: return 'Not yet' return timeago.format(int(watch_obj['last_checked']), time.time()) # @app.context_processor # def timeago(): # def _timeago(lower_time, now): # return timeago.format(lower_time, now) # return dict(timeago=_timeago) @app.template_filter('format_timestamp_timeago') def _jinja2_filter_datetimestamp(timestamp, format="%Y-%m-%d %H:%M:%S"): return timeago.format(timestamp, time.time()) # return timeago.format(timestamp, time.time()) # return datetime.datetime.utcfromtimestamp(timestamp).strftime(format) class User(flask_login.UserMixin): id=None def set_password(self, password): return True def get_user(self, email="defaultuser@changedetection.io"): return self def is_authenticated(self): return True def is_active(self): return True def is_anonymous(self): return False def get_id(self): return str(self.id) def check_password(self, password): import hashlib import base64 # Getting the values back out raw_salt_pass = base64.b64decode(datastore.data['settings']['application']['password']) salt_from_storage = raw_salt_pass[:32] # 32 is the length of the salt # Use the exact same setup you used to generate the key, but this time put in the password to check new_key = hashlib.pbkdf2_hmac( 'sha256', password.encode('utf-8'), # Convert the password to bytes salt_from_storage, 100000 ) new_key = salt_from_storage + new_key return new_key == raw_salt_pass pass def changedetection_app(conig=None, datastore_o=None): global datastore datastore = datastore_o app.config.update(dict(DEBUG=True)) #app.config.update(config or {}) login_manager = flask_login.LoginManager(app) login_manager.login_view = 'login' # Setup cors headers to allow all domains # https://flask-cors.readthedocs.io/en/latest/ # CORS(app) @login_manager.user_loader def user_loader(email): user = User() user.get_user(email) return user @login_manager.unauthorized_handler def unauthorized_handler(): # @todo validate its a URL of this host and use that return redirect(url_for('login', next=url_for('index'))) @app.route('/logout') def logout(): flask_login.logout_user() return redirect(url_for('index')) # https://github.com/pallets/flask/blob/93dd1709d05a1cf0e886df6223377bdab3b077fb/examples/tutorial/flaskr/__init__.py#L39 # You can divide up the stuff like this @app.route('/login', methods=['GET', 'POST']) def login(): global messages if request.method == 'GET': output = render_template("login.html", messages=messages) # Show messages but once. messages = [] return output user = User() user.id = "defaultuser@changedetection.io" password = request.form.get('password') if (user.check_password(password)): flask_login.login_user(user, remember=True) next = request.args.get('next') # if not is_safe_url(next): # return flask.abort(400) return redirect(next or url_for('index')) else: messages.append({'class': 'error', 'message': 'Incorrect password'}) return redirect(url_for('login')) @app.before_request def do_something_whenever_a_request_comes_in(): # Disable password loginif there is not one set app.config['LOGIN_DISABLED'] = datastore.data['settings']['application']['password'] == False @app.route("/", methods=['GET']) @login_required def index(): global messages limit_tag = request.args.get('tag') pause_uuid = request.args.get('pause') if pause_uuid: try: datastore.data['watching'][pause_uuid]['paused'] ^= True datastore.needs_write = True return redirect(url_for('index', tag = limit_tag)) except KeyError: pass # Sort by last_changed and add the uuid which is usually the key.. sorted_watches = [] for uuid, watch in datastore.data['watching'].items(): if limit_tag != None: # Support for comma separated list of tags. for tag_in_watch in watch['tag'].split(','): tag_in_watch = tag_in_watch.strip() if tag_in_watch == limit_tag: watch['uuid'] = uuid sorted_watches.append(watch) else: watch['uuid'] = uuid sorted_watches.append(watch) sorted_watches.sort(key=lambda x: x['last_changed'], reverse=True) existing_tags = datastore.get_all_tags() rss = request.args.get('rss') if rss: fg = FeedGenerator() fg.title('changedetection.io') fg.description('Feed description') fg.link(href='https://changedetection.io') for watch in sorted_watches: if not watch['viewed']: fe = fg.add_entry() fe.title(watch['url']) fe.link(href=watch['url']) fe.description(watch['url']) fe.guid(watch['uuid'], permalink=False) dt = datetime.datetime.fromtimestamp(int(watch['newest_history_key'])) dt = dt.replace(tzinfo=pytz.UTC) fe.pubDate(dt) response = make_response(fg.rss_str()) response.headers.set('Content-Type', 'application/rss+xml') return response else: output = render_template("watch-overview.html", watches=sorted_watches, messages=messages, tags=existing_tags, active_tag=limit_tag, has_unviewed=datastore.data['has_unviewed']) # Show messages but once. messages = [] return output @app.route("/scrub", methods=['GET', 'POST']) @login_required def scrub_page(): from pathlib import Path global messages if request.method == 'POST': confirmtext = request.form.get('confirmtext') limit_timestamp = int(request.form.get('limit_date')) if confirmtext == 'scrub': for uuid, watch in datastore.data['watching'].items(): if len(str(limit_timestamp)) == 10: datastore.scrub_watch(uuid, limit_timestamp = limit_timestamp) else: datastore.scrub_watch(uuid) messages.append({'class': 'ok', 'message': 'Cleaned all version history.'}) else: messages.append({'class': 'error', 'message': 'Wrong confirm text.'}) return redirect(url_for('index')) return render_template("scrub.html") # If they edited an existing watch, we need to know to reset the current/previous md5 to include # the excluded text. def get_current_checksum_include_ignore_text(uuid): import hashlib from backend import fetch_site_status # Get the most recent one newest_history_key = datastore.get_val(uuid, 'newest_history_key') # 0 means that theres only one, so that there should be no 'unviewed' history availabe if newest_history_key == 0: newest_history_key = list(datastore.data['watching'][uuid]['history'].keys())[0] if newest_history_key: with open(datastore.data['watching'][uuid]['history'][newest_history_key], encoding='utf-8') as file: raw_content = file.read() handler = fetch_site_status.perform_site_check(datastore=datastore) stripped_content = handler.strip_ignore_text(raw_content, datastore.data['watching'][uuid]['ignore_text']) checksum = hashlib.md5(stripped_content).hexdigest() return checksum return datastore.data['watching'][uuid]['previous_md5'] @app.route("/edit/", methods=['GET', 'POST']) @login_required def edit_page(uuid): global messages import validators # More for testing, possible to return the first/only if uuid == 'first': uuid = list(datastore.data['watching'].keys()).pop() if request.method == 'POST': url = request.form.get('url').strip() tag = request.form.get('tag').strip() # Extra headers form_headers = request.form.get('headers').strip().split("\n") extra_headers = {} if form_headers: for header in form_headers: if len(header): parts = header.split(':', 1) if len(parts) == 2: extra_headers.update({parts[0].strip(): parts[1].strip()}) update_obj = {'url': url, 'tag': tag, 'headers': extra_headers } # Notification URLs form_notification_text = request.form.get('notification_urls') notification_urls = [] if form_notification_text: for text in form_notification_text.strip().split("\n"): text = text.strip() if len(text): notification_urls.append(text) datastore.data['watching'][uuid]['notification_urls'] = notification_urls # Ignore text form_ignore_text = request.form.get('ignore-text') ignore_text = [] if form_ignore_text: for text in form_ignore_text.strip().split("\n"): text = text.strip() if len(text): ignore_text.append(text) datastore.data['watching'][uuid]['ignore_text'] = ignore_text # Reset the previous_md5 so we process a new snapshot including stripping ignore text. if len(datastore.data['watching'][uuid]['history']): update_obj['previous_md5'] = get_current_checksum_include_ignore_text(uuid=uuid) validators.url(url) # @todo switch to prop/attr/observer datastore.data['watching'][uuid].update(update_obj) datastore.needs_write = True messages.append({'class': 'ok', 'message': 'Updated watch.'}) trigger_n = request.form.get('trigger-test-notification') if trigger_n: n_object = {'watch_url': url, 'notification_urls': datastore.data['settings']['application']['notification_urls']} notification_q.put(n_object) messages.append({'class': 'ok', 'message': 'Notifications queued.'}) return redirect(url_for('index')) else: output = render_template("edit.html", uuid=uuid, watch=datastore.data['watching'][uuid], messages=messages) return output @app.route("/settings", methods=['GET', "POST"]) @login_required def settings_page(): global messages if request.method == 'GET': if request.values.get('notification-test'): url_count = len(datastore.data['settings']['application']['notification_urls']) if url_count: import apprise apobj = apprise.Apprise() apobj.debug = True # Add each notification for n in datastore.data['settings']['application']['notification_urls']: apobj.add(n) outcome = apobj.notify( body='Hello from the worlds best and simplest web page change detection and monitoring service!', title='Changedetection.io Notification Test', ) if outcome: messages.append( {'class': 'notice', 'message': "{} Notification URLs reached.".format(url_count)}) else: messages.append( {'class': 'error', 'message': "One or more Notification URLs failed"}) return redirect(url_for('settings_page')) if request.values.get('removepassword'): from pathlib import Path datastore.data['settings']['application']['password'] = False messages.append({'class': 'notice', 'message': "Password protection removed."}) flask_login.logout_user() return redirect(url_for('settings_page')) if request.method == 'POST': password = request.values.get('password') if password: import hashlib import base64 import secrets # Make a new salt on every new password and store it with the password salt = secrets.token_bytes(32) key = hashlib.pbkdf2_hmac('sha256', password.encode('utf-8'), salt, 100000) store = base64.b64encode(salt + key).decode('ascii') datastore.data['settings']['application']['password'] = store messages.append({'class': 'notice', 'message': "Password protection enabled."}) flask_login.logout_user() return redirect(url_for('index')) try: minutes = int(request.values.get('minutes').strip()) except ValueError: messages.append({'class': 'error', 'message': "Invalid value given, use an integer."}) else: if minutes >= 5: datastore.data['settings']['requests']['minutes_between_check'] = minutes datastore.needs_write = True else: messages.append( {'class': 'error', 'message': "Must be atleast 5 minutes."}) # 'validators' package doesnt work because its often a non-stanadard protocol. :( datastore.data['settings']['application']['notification_urls'] = [] trigger_n = request.form.get('trigger-test-notification') for n in request.values.get('notification_urls').strip().split("\n"): url = n.strip() datastore.data['settings']['application']['notification_urls'].append(url) datastore.needs_write = True if trigger_n: n_object = {'watch_url': "Test from changedetection.io!", 'notification_urls': datastore.data['settings']['application']['notification_urls']} notification_q.put(n_object) messages.append({'class': 'ok', 'message': 'Notifications queued.'}) output = render_template("settings.html", messages=messages, minutes=datastore.data['settings']['requests']['minutes_between_check'], notification_urls="\r\n".join( datastore.data['settings']['application']['notification_urls'])) messages = [] return output @app.route("/import", methods=['GET', "POST"]) @login_required def import_page(): import validators global messages remaining_urls = [] good = 0 if request.method == 'POST': urls = request.values.get('urls').split("\n") for url in urls: url = url.strip() if len(url) and validators.url(url): new_uuid = datastore.add_watch(url=url.strip(), tag="") # Straight into the queue. update_q.put(new_uuid) good += 1 else: if len(url): remaining_urls.append(url) messages.append({'class': 'ok', 'message': "{} Imported, {} Skipped.".format(good, len(remaining_urls))}) if len(remaining_urls) == 0: # Looking good, redirect to index. return redirect(url_for('index')) # Could be some remaining, or we could be on GET output = render_template("import.html", messages=messages, remaining="\n".join(remaining_urls) ) messages = [] return output # Clear all statuses, so we do not see the 'unviewed' class @app.route("/api/mark-all-viewed", methods=['GET']) @login_required def mark_all_viewed(): # Save the current newest history as the most recently viewed for watch_uuid, watch in datastore.data['watching'].items(): datastore.set_last_viewed(watch_uuid, watch['newest_history_key']) messages.append({'class': 'ok', 'message': "Cleared all statuses."}) return redirect(url_for('index')) @app.route("/diff/", methods=['GET']) @login_required def diff_history_page(uuid): global messages # More for testing, possible to return the first/only if uuid == 'first': uuid = list(datastore.data['watching'].keys()).pop() extra_stylesheets = ['/static/css/diff.css'] try: watch = datastore.data['watching'][uuid] except KeyError: messages.append({'class': 'error', 'message': "No history found for the specified link, bad link?"}) return redirect(url_for('index')) dates = list(watch['history'].keys()) # Convert to int, sort and back to str again dates = [int(i) for i in dates] dates.sort(reverse=True) dates = [str(i) for i in dates] if len(dates) < 2: messages.append( {'class': 'error', 'message': "Not enough saved change detection snapshots to produce a report."}) return redirect(url_for('index')) # Save the current newest history as the most recently viewed datastore.set_last_viewed(uuid, dates[0]) newest_file = watch['history'][dates[0]] with open(newest_file, 'r') as f: newest_version_file_contents = f.read() previous_version = request.args.get('previous_version') try: previous_file = watch['history'][previous_version] except KeyError: # Not present, use a default value, the second one in the sorted list. previous_file = watch['history'][dates[1]] with open(previous_file, 'r') as f: previous_version_file_contents = f.read() output = render_template("diff.html", watch_a=watch, messages=messages, newest=newest_version_file_contents, previous=previous_version_file_contents, extra_stylesheets=extra_stylesheets, versions=dates[1:], newest_version_timestamp=dates[0], current_previous_version=str(previous_version), current_diff_url=watch['url']) return output @app.route("/preview/", methods=['GET']) @login_required def preview_page(uuid): global messages # More for testing, possible to return the first/only if uuid == 'first': uuid = list(datastore.data['watching'].keys()).pop() extra_stylesheets = ['/static/css/diff.css'] try: watch = datastore.data['watching'][uuid] except KeyError: messages.append({'class': 'error', 'message': "No history found for the specified link, bad link?"}) return redirect(url_for('index')) print(watch) with open(list(watch['history'].values())[-1], 'r') as f: content = f.readlines() output = render_template("preview.html", content=content, extra_stylesheets=extra_stylesheets) return output @app.route("/favicon.ico", methods=['GET']) def favicon(): return send_from_directory("/app/static/images", filename="favicon.ico") # We're good but backups are even better! @app.route("/backup", methods=['GET']) @login_required def get_backup(): import zipfile from pathlib import Path # create a ZipFile object backupname = "changedetection-backup-{}.zip".format(int(time.time())) # We only care about UUIDS from the current index file uuids = list(datastore.data['watching'].keys()) backup_filepath = os.path.join(app.config['datastore_path'], backupname) with zipfile.ZipFile(backup_filepath, "w", compression=zipfile.ZIP_DEFLATED, compresslevel=8) as zipObj: # Be sure we're written fresh datastore.sync_to_json() # Add the index zipObj.write(os.path.join(app.config['datastore_path'], "url-watches.json"), arcname="url-watches.json") # Add the flask app secret zipObj.write(os.path.join(app.config['datastore_path'], "secret.txt"), arcname="secret.txt") # Add any snapshot data we find, use the full path to access the file, but make the file 'relative' in the Zip. for txt_file_path in Path(app.config['datastore_path']).rglob('*.txt'): parent_p = txt_file_path.parent if parent_p.name in uuids: zipObj.write(txt_file_path, arcname=str(txt_file_path).replace(app.config['datastore_path'], ''), compress_type=zipfile.ZIP_DEFLATED, compresslevel=8) return send_from_directory(app.config['datastore_path'], backupname) @app.route("/static//", methods=['GET']) def static_content(group, filename): # These files should be in our subdirectory full_path = os.path.realpath(__file__) p = os.path.dirname(full_path) try: return send_from_directory("{}/static/{}".format(p, group), filename=filename) except FileNotFoundError: abort(404) @app.route("/api/add", methods=['POST']) @login_required def api_watch_add(): global messages url = request.form.get('url').strip() if datastore.url_exists(url): messages.append({'class': 'error', 'message': 'The URL {} already exists'.format(url)}) return redirect(url_for('index')) # @todo add_watch should throw a custom Exception for validation etc new_uuid = datastore.add_watch(url=url, tag=request.form.get('tag').strip()) # Straight into the queue. update_q.put(new_uuid) messages.append({'class': 'ok', 'message': 'Watch added.'}) return redirect(url_for('index')) @app.route("/api/delete", methods=['GET']) @login_required def api_delete(): global messages uuid = request.args.get('uuid') datastore.delete(uuid) messages.append({'class': 'ok', 'message': 'Deleted.'}) return redirect(url_for('index')) @app.route("/api/checknow", methods=['GET']) @login_required def api_watch_checknow(): global messages tag = request.args.get('tag') uuid = request.args.get('uuid') i = 0 running_uuids = [] for t in running_update_threads: running_uuids.append(t.current_uuid) # @todo check thread is running and skip if uuid: if uuid not in running_uuids: update_q.put(uuid) i = 1 elif tag != None: # Items that have this current tag for watch_uuid, watch in datastore.data['watching'].items(): if (tag != None and tag in watch['tag']): if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']: update_q.put(watch_uuid) i += 1 else: # No tag, no uuid, add everything. for watch_uuid, watch in datastore.data['watching'].items(): if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']: update_q.put(watch_uuid) i += 1 messages.append({'class': 'ok', 'message': "{} watches are rechecking.".format(i)}) return redirect(url_for('index', tag=tag)) # @todo handle ctrl break ticker_thread = threading.Thread(target=ticker_thread_check_time_launch_checks).start() threading.Thread(target=notification_runner).start() # Check for new release version threading.Thread(target=check_for_new_version).start() return app # Check for new version and anonymous stats def check_for_new_version(): import requests import urllib3 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) while not app.config.exit.is_set(): try: r = requests.post("https://changedetection.io/check-ver.php", data={'version': datastore.data['version_tag'], 'app_guid': datastore.data['app_guid']}, verify=False) except: pass try: if "new_version" in r.text: app.config['NEW_VERSION_AVAILABLE'] = True except: pass # Check daily app.config.exit.wait(86400) def notification_runner(): while not app.config.exit.is_set(): try: # At the moment only one thread runs (single runner) n_object = notification_q.get(block=False) except queue.Empty: time.sleep(1) pass else: import apprise # Create an Apprise instance try: apobj = apprise.Apprise() for url in n_object['notification_urls']: apobj.add(url.strip()) apobj.notify( body=n_object['watch_url'], # @todo This should be configurable. title="ChangeDetection.io Notification - {}".format(n_object['watch_url']) ) except Exception as e: print("Watch URL: {} Error {}".format(n_object['watch_url'],e)) # Thread runner to check every minute, look for new watches to feed into the Queue. def ticker_thread_check_time_launch_checks(): from backend import update_worker # Spin up Workers. for _ in range(datastore.data['settings']['requests']['workers']): new_worker = update_worker.update_worker(update_q, notification_q, app, datastore) running_update_threads.append(new_worker) new_worker.start() while not app.config.exit.is_set(): running_uuids = [] for t in running_update_threads: if t.current_uuid: running_uuids.append(t.current_uuid) # Look at the dataset, find a stale watch to process # Every minute check for new UUIDs to follow up on, should be inside the loop incase it changes. minutes = datastore.data['settings']['requests']['minutes_between_check'] threshold = time.time() - (minutes * 60) for uuid, watch in datastore.data['watching'].items(): if not watch['paused'] and watch['last_checked'] <= threshold: if not uuid in running_uuids and uuid not in update_q.queue: update_q.put(uuid) # Should be low so we can break this out in testing app.config.exit.wait(1)