diff --git a/.dockerignore b/.dockerignore index 320bd34f..2f88d7d3 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,18 +1,31 @@ -.git -.github -changedetectionio/processors/__pycache__ -changedetectionio/api/__pycache__ -changedetectionio/model/__pycache__ -changedetectionio/blueprint/price_data_follower/__pycache__ -changedetectionio/blueprint/tags/__pycache__ -changedetectionio/blueprint/__pycache__ -changedetectionio/blueprint/browser_steps/__pycache__ -changedetectionio/fetchers/__pycache__ -changedetectionio/tests/visualselector/__pycache__ -changedetectionio/tests/restock/__pycache__ -changedetectionio/tests/__pycache__ -changedetectionio/tests/fetchers/__pycache__ -changedetectionio/tests/unit/__pycache__ -changedetectionio/tests/proxy_list/__pycache__ -changedetectionio/__pycache__ +# Git +.git/ +.gitignore +# GitHub +.github/ + +# Byte-compiled / optimized / DLL files +**/__pycache__ +**/*.py[cod] + +# Caches +.mypy_cache/ +.pytest_cache/ +.ruff_cache/ + +# Distribution / packaging +build/ +dist/ +*.egg-info* + +# Virtual environment +.env +.venv/ +venv/ + +# IntelliJ IDEA +.idea/ + +# Visual Studio +.vscode/ diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index c35dbd76..0bdf52f5 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -27,6 +27,10 @@ A clear and concise description of what the bug is. **Version** *Exact version* in the top right area: 0.... +**How did you install?** + +Docker, Pip, from source directly etc + **To Reproduce** Steps to reproduce the behavior: diff --git a/.github/workflows/test-only.yml b/.github/workflows/test-only.yml index 69e42cba..3d61ca2a 100644 --- a/.github/workflows/test-only.yml +++ b/.github/workflows/test-only.yml @@ -37,3 +37,10 @@ jobs: python-version: '3.12' skip-pypuppeteer: true + test-application-3-13: + needs: lint-code + uses: ./.github/workflows/test-stack-reusable-workflow.yml + with: + python-version: '3.13' + skip-pypuppeteer: true + diff --git a/.gitignore b/.gitignore index 39fc0dd0..835597c3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,14 +1,29 @@ -__pycache__ -.idea -*.pyc -datastore/url-watches.json -datastore/* -__pycache__ -.pytest_cache -build -dist -venv -test-datastore/* -test-datastore +# Byte-compiled / optimized / DLL files +**/__pycache__ +**/*.py[cod] + +# Caches +.mypy_cache/ +.pytest_cache/ +.ruff_cache/ + +# Distribution / packaging +build/ +dist/ *.egg-info* + +# Virtual environment +.env +.venv/ +venv/ + +# IDEs +.idea .vscode/settings.json + +# Datastore files +datastore/ +test-datastore/ + +# Memory consumption log +test-memory.log diff --git a/COMMERCIAL_LICENCE.md b/COMMERCIAL_LICENCE.md index 9ac72335..fa59b2ea 100644 --- a/COMMERCIAL_LICENCE.md +++ b/COMMERCIAL_LICENCE.md @@ -4,7 +4,7 @@ In any commercial activity involving 'Hosting' (as defined herein), whether in p # Commercial License Agreement -This Commercial License Agreement ("Agreement") is entered into by and between Mr Morresi (the original creator of this software) here-in ("Licensor") and (your company or personal name) _____________ ("Licensee"). This Agreement sets forth the terms and conditions under which Licensor provides its software ("Software") and services to Licensee for the purpose of reselling the software either in part or full, as part of any commercial activity where the activity involves a third party. +This Commercial License Agreement ("Agreement") is entered into by and between Web Technologies s.r.o. here-in ("Licensor") and (your company or personal name) _____________ ("Licensee"). This Agreement sets forth the terms and conditions under which Licensor provides its software ("Software") and services to Licensee for the purpose of reselling the software either in part or full, as part of any commercial activity where the activity involves a third party. ### Definition of Hosting diff --git a/Dockerfile b/Dockerfile index 3c057d67..c993ab24 100644 --- a/Dockerfile +++ b/Dockerfile @@ -32,7 +32,7 @@ RUN pip install --extra-index-url https://www.piwheels.org/simple --target=/dep # Playwright is an alternative to Selenium # Excluded this package from requirements.txt to prevent arm/v6 and arm/v7 builds from failing # https://github.com/dgtlmoon/changedetection.io/pull/1067 also musl/alpine (not supported) -RUN pip install --target=/dependencies playwright~=1.41.2 \ +RUN pip install --target=/dependencies playwright~=1.48.0 \ || echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled." # Final image stage diff --git a/README.md b/README.md index 87451d24..12bcb507 100644 --- a/README.md +++ b/README.md @@ -105,6 +105,15 @@ We [recommend and use Bright Data](https://brightdata.grsm.io/n0r16zf7eivq) glob Please :star: star :star: this project and help it grow! https://github.com/dgtlmoon/changedetection.io/ +### Schedule web page watches in any timezone, limit by day of week and time. + +Easily set a re-check schedule, for example you could limit the web page change detection to only operate during business hours. +Or perhaps based on a foreign timezone (for example, you want to check for the latest news-headlines in a foreign country at 0900 AM), + +How to monitor web page changes according to a schedule + +Includes quick short-cut buttons to setup a schedule for **business hours only**, or **weekends**. + ### We have a Chrome extension! Easily add the current web page to your changedetection.io tool, simply install the extension and click "Sync" to connect it to your existing changedetection.io install. diff --git a/changedetectionio/__init__.py b/changedetectionio/__init__.py index 781c848b..f8c2c161 100644 --- a/changedetectionio/__init__.py +++ b/changedetectionio/__init__.py @@ -2,7 +2,7 @@ # Read more https://github.com/dgtlmoon/changedetection.io/wiki -__version__ = '0.47.03' +__version__ = '0.48.05' from changedetectionio.strtobool import strtobool from json.decoder import JSONDecodeError @@ -160,11 +160,10 @@ def main(): ) # Monitored websites will not receive a Referer header when a user clicks on an outgoing link. - # @Note: Incompatible with password login (and maybe other features) for now, submit a PR! @app.after_request def hide_referrer(response): if strtobool(os.getenv("HIDE_REFERER", 'false')): - response.headers["Referrer-Policy"] = "no-referrer" + response.headers["Referrer-Policy"] = "same-origin" return response diff --git a/changedetectionio/apprise_plugin/__init__.py b/changedetectionio/apprise_plugin/__init__.py index ecca929f..cbee31eb 100644 --- a/changedetectionio/apprise_plugin/__init__.py +++ b/changedetectionio/apprise_plugin/__init__.py @@ -13,6 +13,7 @@ from loguru import logger def apprise_custom_api_call_wrapper(body, title, notify_type, *args, **kwargs): import requests import json + from urllib.parse import unquote_plus from apprise.utils import parse_url as apprise_parse_url from apprise import URLBase @@ -47,7 +48,7 @@ def apprise_custom_api_call_wrapper(body, title, notify_type, *args, **kwargs): if results: # Add our headers that the user can potentially over-ride if they wish # to to our returned result set and tidy entries by unquoting them - headers = {URLBase.unquote(x): URLBase.unquote(y) + headers = {unquote_plus(x): unquote_plus(y) for x, y in results['qsd+'].items()} # https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation @@ -55,14 +56,14 @@ def apprise_custom_api_call_wrapper(body, title, notify_type, *args, **kwargs): # but here we are making straight requests, so we need todo convert this against apprise's logic for k, v in results['qsd'].items(): if not k.strip('+-') in results['qsd+'].keys(): - params[URLBase.unquote(k)] = URLBase.unquote(v) + params[unquote_plus(k)] = unquote_plus(v) # Determine Authentication auth = '' if results.get('user') and results.get('password'): - auth = (URLBase.unquote(results.get('user')), URLBase.unquote(results.get('user'))) + auth = (unquote_plus(results.get('user')), unquote_plus(results.get('user'))) elif results.get('user'): - auth = (URLBase.unquote(results.get('user'))) + auth = (unquote_plus(results.get('user'))) # Try to auto-guess if it's JSON h = 'application/json; charset=utf-8' diff --git a/changedetectionio/blueprint/backups/__init__.py b/changedetectionio/blueprint/backups/__init__.py new file mode 100644 index 00000000..add44308 --- /dev/null +++ b/changedetectionio/blueprint/backups/__init__.py @@ -0,0 +1,164 @@ +import datetime +import glob +import threading + +from flask import Blueprint, render_template, send_from_directory, flash, url_for, redirect, abort +import os + +from changedetectionio.store import ChangeDetectionStore +from changedetectionio.flask_app import login_optionally_required +from loguru import logger + +BACKUP_FILENAME_FORMAT = "changedetection-backup-{}.zip" + + +def create_backup(datastore_path, watches: dict): + logger.debug("Creating backup...") + import zipfile + from pathlib import Path + + # create a ZipFile object + timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S") + backupname = BACKUP_FILENAME_FORMAT.format(timestamp) + backup_filepath = os.path.join(datastore_path, backupname) + + with zipfile.ZipFile(backup_filepath.replace('.zip', '.tmp'), "w", + compression=zipfile.ZIP_DEFLATED, + compresslevel=8) as zipObj: + + # Add the index + zipObj.write(os.path.join(datastore_path, "url-watches.json"), arcname="url-watches.json") + + # Add the flask app secret + zipObj.write(os.path.join(datastore_path, "secret.txt"), arcname="secret.txt") + + # Add any data in the watch data directory. + for uuid, w in watches.items(): + for f in Path(w.watch_data_dir).glob('*'): + zipObj.write(f, + # Use the full path to access the file, but make the file 'relative' in the Zip. + arcname=os.path.join(f.parts[-2], f.parts[-1]), + compress_type=zipfile.ZIP_DEFLATED, + compresslevel=8) + + # Create a list file with just the URLs, so it's easier to port somewhere else in the future + list_file = "url-list.txt" + with open(os.path.join(datastore_path, list_file), "w") as f: + for uuid in watches: + url = watches[uuid]["url"] + f.write("{}\r\n".format(url)) + list_with_tags_file = "url-list-with-tags.txt" + with open( + os.path.join(datastore_path, list_with_tags_file), "w" + ) as f: + for uuid in watches: + url = watches[uuid].get('url') + tag = watches[uuid].get('tags', {}) + f.write("{} {}\r\n".format(url, tag)) + + # Add it to the Zip + zipObj.write( + os.path.join(datastore_path, list_file), + arcname=list_file, + compress_type=zipfile.ZIP_DEFLATED, + compresslevel=8, + ) + zipObj.write( + os.path.join(datastore_path, list_with_tags_file), + arcname=list_with_tags_file, + compress_type=zipfile.ZIP_DEFLATED, + compresslevel=8, + ) + + # Now it's done, rename it so it shows up finally and its completed being written. + os.rename(backup_filepath.replace('.zip', '.tmp'), backup_filepath.replace('.tmp', '.zip')) + + +def construct_blueprint(datastore: ChangeDetectionStore): + backups_blueprint = Blueprint('backups', __name__, template_folder="templates") + backup_threads = [] + + @login_optionally_required + @backups_blueprint.route("/request-backup", methods=['GET']) + def request_backup(): + if any(thread.is_alive() for thread in backup_threads): + flash("A backup is already running, check back in a few minutes", "error") + return redirect(url_for('backups.index')) + + if len(find_backups()) > int(os.getenv("MAX_NUMBER_BACKUPS", 100)): + flash("Maximum number of backups reached, please remove some", "error") + return redirect(url_for('backups.index')) + + # Be sure we're written fresh + datastore.sync_to_json() + zip_thread = threading.Thread(target=create_backup, args=(datastore.datastore_path, datastore.data.get("watching"))) + zip_thread.start() + backup_threads.append(zip_thread) + flash("Backup building in background, check back in a few minutes.") + + return redirect(url_for('backups.index')) + + def find_backups(): + backup_filepath = os.path.join(datastore.datastore_path, BACKUP_FILENAME_FORMAT.format("*")) + backups = glob.glob(backup_filepath) + backup_info = [] + + for backup in backups: + size = os.path.getsize(backup) / (1024 * 1024) + creation_time = os.path.getctime(backup) + backup_info.append({ + 'filename': os.path.basename(backup), + 'filesize': f"{size:.2f}", + 'creation_time': creation_time + }) + + backup_info.sort(key=lambda x: x['creation_time'], reverse=True) + + return backup_info + + @login_optionally_required + @backups_blueprint.route("/download/", methods=['GET']) + def download_backup(filename): + import re + filename = filename.strip() + backup_filename_regex = BACKUP_FILENAME_FORMAT.format("\d+") + + full_path = os.path.join(os.path.abspath(datastore.datastore_path), filename) + if not full_path.startswith(os.path.abspath(datastore.datastore_path)): + abort(404) + + if filename == 'latest': + backups = find_backups() + filename = backups[0]['filename'] + + if not re.match(r"^" + backup_filename_regex + "$", filename): + abort(400) # Bad Request if the filename doesn't match the pattern + + logger.debug(f"Backup download request for '{full_path}'") + return send_from_directory(os.path.abspath(datastore.datastore_path), filename, as_attachment=True) + + @login_optionally_required + @backups_blueprint.route("/", methods=['GET']) + def index(): + backups = find_backups() + output = render_template("overview.html", + available_backups=backups, + backup_running=any(thread.is_alive() for thread in backup_threads) + ) + + return output + + @login_optionally_required + @backups_blueprint.route("/remove-backups", methods=['GET']) + def remove_backups(): + + backup_filepath = os.path.join(datastore.datastore_path, BACKUP_FILENAME_FORMAT.format("*")) + backups = glob.glob(backup_filepath) + for backup in backups: + os.unlink(backup) + + flash("Backups were deleted.") + + return redirect(url_for('backups.index')) + + return backups_blueprint diff --git a/changedetectionio/blueprint/backups/templates/overview.html b/changedetectionio/blueprint/backups/templates/overview.html new file mode 100644 index 00000000..b07be4bd --- /dev/null +++ b/changedetectionio/blueprint/backups/templates/overview.html @@ -0,0 +1,36 @@ +{% extends 'base.html' %} +{% block content %} + {% from '_helpers.html' import render_simple_field, render_field %} +
+
+

Backups

+ {% if backup_running %} +

+ A backup is running! +

+ {% endif %} +

+ Here you can download and request a new backup, when a backup is completed you will see it listed below. +

+
+ {% if available_backups %} + + {% else %} +

+ No backups found. +

+ {% endif %} + + Create backup + {% if available_backups %} + Remove backups + {% endif %} +
+
+ + +{% endblock %} diff --git a/changedetectionio/blueprint/tags/__init__.py b/changedetectionio/blueprint/tags/__init__.py index ca974666..d7086213 100644 --- a/changedetectionio/blueprint/tags/__init__.py +++ b/changedetectionio/blueprint/tags/__init__.py @@ -13,6 +13,7 @@ def construct_blueprint(datastore: ChangeDetectionStore): def tags_overview_page(): from .form import SingleTag add_form = SingleTag(request.form) + sorted_tags = sorted(datastore.data['settings']['application'].get('tags').items(), key=lambda x: x[1]['title']) from collections import Counter @@ -104,9 +105,11 @@ def construct_blueprint(datastore: ChangeDetectionStore): default = datastore.data['settings']['application']['tags'].get(uuid) - form = group_restock_settings_form(formdata=request.form if request.method == 'POST' else None, + form = group_restock_settings_form( + formdata=request.form if request.method == 'POST' else None, data=default, - extra_notification_tokens=datastore.get_unique_notification_tokens_available() + extra_notification_tokens=datastore.get_unique_notification_tokens_available(), + default_system_settings = datastore.data['settings'], ) template_args = { diff --git a/changedetectionio/content_fetchers/res/stock-not-in-stock.js b/changedetectionio/content_fetchers/res/stock-not-in-stock.js index 373c669e..0f8c3473 100644 --- a/changedetectionio/content_fetchers/res/stock-not-in-stock.js +++ b/changedetectionio/content_fetchers/res/stock-not-in-stock.js @@ -30,6 +30,8 @@ function isItemInStock() { 'dieser artikel ist bald wieder verfügbar', 'dostępne wkrótce', 'en rupture de stock', + 'esgotado', + 'indisponível', 'isn\'t in stock right now', 'isnt in stock right now', 'isn’t in stock right now', @@ -37,6 +39,7 @@ function isItemInStock() { 'let me know when it\'s available', 'mail me when available', 'message if back in stock', + 'mevcut değil', 'nachricht bei', 'nicht auf lager', 'nicht lagernd', @@ -48,7 +51,7 @@ function isItemInStock() { 'niet beschikbaar', 'niet leverbaar', 'niet op voorraad', - 'no disponible temporalmente', + 'no disponible', 'no longer in stock', 'no tickets available', 'not available', @@ -57,6 +60,7 @@ function isItemInStock() { 'notify me when available', 'notify me', 'notify when available', + 'não disponível', 'não estamos a aceitar encomendas', 'out of stock', 'out-of-stock', @@ -64,12 +68,14 @@ function isItemInStock() { 'produkt niedostępny', 'sold out', 'sold-out', + 'stokta yok', 'temporarily out of stock', 'temporarily unavailable', 'there were no search results for', 'this item is currently unavailable', 'tickets unavailable', 'tijdelijk uitverkocht', + 'tükendi', 'unavailable nearby', 'unavailable tickets', 'vergriffen', diff --git a/changedetectionio/diff.py b/changedetectionio/diff.py index 859abe6a..1fa9b60a 100644 --- a/changedetectionio/diff.py +++ b/changedetectionio/diff.py @@ -1,6 +1,9 @@ import difflib from typing import List, Iterator, Union +REMOVED_STYLE = "background-color: #fadad7; color: #b30000;" +ADDED_STYLE = "background-color: #eaf2c2; color: #406619;" + def same_slicer(lst: List[str], start: int, end: int) -> List[str]: """Return a slice of the list, or a single element if start == end.""" return lst[start:end] if start != end else [lst[start]] @@ -12,11 +15,12 @@ def customSequenceMatcher( include_removed: bool = True, include_added: bool = True, include_replaced: bool = True, - include_change_type_prefix: bool = True + include_change_type_prefix: bool = True, + html_colour: bool = False ) -> Iterator[List[str]]: """ Compare two sequences and yield differences based on specified parameters. - + Args: before (List[str]): Original sequence after (List[str]): Modified sequence @@ -25,26 +29,35 @@ def customSequenceMatcher( include_added (bool): Include added parts include_replaced (bool): Include replaced parts include_change_type_prefix (bool): Add prefixes to indicate change types - + html_colour (bool): Use HTML background colors for differences + Yields: List[str]: Differences between sequences """ cruncher = difflib.SequenceMatcher(isjunk=lambda x: x in " \t", a=before, b=after) - + + + for tag, alo, ahi, blo, bhi in cruncher.get_opcodes(): if include_equal and tag == 'equal': yield before[alo:ahi] elif include_removed and tag == 'delete': - prefix = "(removed) " if include_change_type_prefix else '' - yield [f"{prefix}{line}" for line in same_slicer(before, alo, ahi)] + if html_colour: + yield [f'{line}' for line in same_slicer(before, alo, ahi)] + else: + yield [f"(removed) {line}" for line in same_slicer(before, alo, ahi)] if include_change_type_prefix else same_slicer(before, alo, ahi) elif include_replaced and tag == 'replace': - prefix_changed = "(changed) " if include_change_type_prefix else '' - prefix_into = "(into) " if include_change_type_prefix else '' - yield [f"{prefix_changed}{line}" for line in same_slicer(before, alo, ahi)] + \ - [f"{prefix_into}{line}" for line in same_slicer(after, blo, bhi)] + if html_colour: + yield [f'{line}' for line in same_slicer(before, alo, ahi)] + \ + [f'{line}' for line in same_slicer(after, blo, bhi)] + else: + yield [f"(changed) {line}" for line in same_slicer(before, alo, ahi)] + \ + [f"(into) {line}" for line in same_slicer(after, blo, bhi)] if include_change_type_prefix else same_slicer(before, alo, ahi) + same_slicer(after, blo, bhi) elif include_added and tag == 'insert': - prefix = "(added) " if include_change_type_prefix else '' - yield [f"{prefix}{line}" for line in same_slicer(after, blo, bhi)] + if html_colour: + yield [f'{line}' for line in same_slicer(after, blo, bhi)] + else: + yield [f"(added) {line}" for line in same_slicer(after, blo, bhi)] if include_change_type_prefix else same_slicer(after, blo, bhi) def render_diff( previous_version_file_contents: str, @@ -55,11 +68,12 @@ def render_diff( include_replaced: bool = True, line_feed_sep: str = "\n", include_change_type_prefix: bool = True, - patch_format: bool = False + patch_format: bool = False, + html_colour: bool = False ) -> str: """ Render the difference between two file contents. - + Args: previous_version_file_contents (str): Original file contents newest_version_file_contents (str): Modified file contents @@ -70,7 +84,8 @@ def render_diff( line_feed_sep (str): Separator for lines in output include_change_type_prefix (bool): Add prefixes to indicate change types patch_format (bool): Use patch format for output - + html_colour (bool): Use HTML background colors for differences + Returns: str: Rendered difference """ @@ -88,10 +103,11 @@ def render_diff( include_removed=include_removed, include_added=include_added, include_replaced=include_replaced, - include_change_type_prefix=include_change_type_prefix + include_change_type_prefix=include_change_type_prefix, + html_colour=html_colour ) def flatten(lst: List[Union[str, List[str]]]) -> str: return line_feed_sep.join(flatten(x) if isinstance(x, list) else x for x in lst) - return flatten(rendered_diff) + return flatten(rendered_diff) \ No newline at end of file diff --git a/changedetectionio/flask_app.py b/changedetectionio/flask_app.py index a1bb862e..7d0c9d5c 100644 --- a/changedetectionio/flask_app.py +++ b/changedetectionio/flask_app.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import datetime +from zoneinfo import ZoneInfo import flask_login import locale @@ -42,6 +43,7 @@ from loguru import logger from changedetectionio import html_tools, __version__ from changedetectionio import queuedWatchMetaData from changedetectionio.api import api_v1 +from .time_handler import is_within_schedule datastore = None @@ -53,6 +55,7 @@ extra_stylesheets = [] update_q = queue.PriorityQueue() notification_q = queue.Queue() +MAX_QUEUE_SIZE = 2000 app = Flask(__name__, static_url_path="", @@ -83,7 +86,7 @@ csrf = CSRFProtect() csrf.init_app(app) notification_debug_log=[] -# get locale ready +# Locale for correct presentation of prices etc default_locale = locale.getdefaultlocale() logger.info(f"System locale default is {default_locale}") try: @@ -537,21 +540,27 @@ def changedetection_app(config=None, datastore_o=None): import random from .apprise_asset import asset apobj = apprise.Apprise(asset=asset) + # so that the custom endpoints are registered from changedetectionio.apprise_plugin import apprise_custom_api_call_wrapper is_global_settings_form = request.args.get('mode', '') == 'global-settings' is_group_settings_form = request.args.get('mode', '') == 'group-settings' + # Use an existing random one on the global/main settings form if not watch_uuid and (is_global_settings_form or is_group_settings_form) \ and datastore.data.get('watching'): - logger.debug(f"Send test notification - Choosing random Watch {watch_uuid}") watch_uuid = random.choice(list(datastore.data['watching'].keys())) - watch = datastore.data['watching'].get(watch_uuid) - else: - watch = None - notification_urls = request.form['notification_urls'].strip().splitlines() + if not watch_uuid: + return make_response("Error: You must have atleast one watch configured for 'test notification' to work", 400) + + watch = datastore.data['watching'].get(watch_uuid) + + notification_urls = None + + if request.form.get('notification_urls'): + notification_urls = request.form['notification_urls'].strip().splitlines() if not notification_urls: logger.debug("Test notification - Trying by group/tag in the edit form if available") @@ -569,12 +578,12 @@ def changedetection_app(config=None, datastore_o=None): if not notification_urls: - return 'No Notification URLs set/found' + return 'Error: No Notification URLs set/found' for n_url in notification_urls: if len(n_url.strip()): if not apobj.add(n_url): - return f'Error - {n_url} is not a valid AppRise URL.' + return f'Error: {n_url} is not a valid AppRise URL.' try: # use the same as when it is triggered, but then override it with the form test values @@ -593,11 +602,13 @@ def changedetection_app(config=None, datastore_o=None): if 'notification_body' in request.form and request.form['notification_body'].strip(): n_object['notification_body'] = request.form.get('notification_body', '').strip() + n_object.update(watch.extra_notification_token_values()) + from . import update_worker new_worker = update_worker.update_worker(update_q, notification_q, app, datastore) new_worker.queue_notification_for_watch(notification_q=notification_q, n_object=n_object, watch=watch) except Exception as e: - return make_response({'error': str(e)}, 400) + return make_response(f"Error: str(e)", 400) return 'OK - Sent test notifications' @@ -707,7 +718,8 @@ def changedetection_app(config=None, datastore_o=None): form = form_class(formdata=request.form if request.method == 'POST' else None, data=default, - extra_notification_tokens=default.extra_notification_token_values() + extra_notification_tokens=default.extra_notification_token_values(), + default_system_settings=datastore.data['settings'] ) # For the form widget tag UUID back to "string name" for the field @@ -795,14 +807,41 @@ def changedetection_app(config=None, datastore_o=None): # But in the case something is added we should save straight away datastore.needs_write_urgent = True - # Queue the watch for immediate recheck, with a higher priority - update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) + # Do not queue on edit if its not within the time range + + # @todo maybe it should never queue anyway on edit... + is_in_schedule = True + watch = datastore.data['watching'].get(uuid) + + if watch.get('time_between_check_use_default'): + time_schedule_limit = datastore.data['settings']['requests'].get('time_schedule_limit', {}) + else: + time_schedule_limit = watch.get('time_schedule_limit') + + tz_name = time_schedule_limit.get('timezone') + if not tz_name: + tz_name = datastore.data['settings']['application'].get('timezone', 'UTC') + + if time_schedule_limit and time_schedule_limit.get('enabled'): + try: + is_in_schedule = is_within_schedule(time_schedule_limit=time_schedule_limit, + default_tz=tz_name + ) + except Exception as e: + logger.error( + f"{uuid} - Recheck scheduler, error handling timezone, check skipped - TZ name '{tz_name}' - {str(e)}") + return False + + ############################# + if not datastore.data['watching'][uuid].get('paused') and is_in_schedule: + # Queue the watch for immediate recheck, with a higher priority + update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) # Diff page [edit] link should go back to diff page if request.args.get("next") and request.args.get("next") == 'diff': return redirect(url_for('diff_history_page', uuid=uuid)) - return redirect(url_for('index')) + return redirect(url_for('index', tag=request.args.get("tag",''))) else: if request.method == 'POST' and not form.validate(): @@ -826,15 +865,18 @@ def changedetection_app(config=None, datastore_o=None): if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'): is_html_webdriver = True + from zoneinfo import available_timezones + # Only works reliably with Playwright visualselector_enabled = os.getenv('PLAYWRIGHT_DRIVER_URL', False) and is_html_webdriver template_args = { 'available_processors': processors.available_processors(), + 'available_timezones': sorted(available_timezones()), 'browser_steps_config': browser_step_ui_config, 'emailprefix': os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False), - 'extra_title': f" - Edit - {watch.label}", - 'extra_processor_config': form.extra_tab_content(), 'extra_notification_token_placeholder_info': datastore.get_unique_notification_token_placeholders_available(), + 'extra_processor_config': form.extra_tab_content(), + 'extra_title': f" - Edit - {watch.label}", 'form': form, 'has_default_notification_urls': True if len(datastore.data['settings']['application']['notification_urls']) else False, 'has_extra_headers_file': len(datastore.get_all_headers_in_textfile_for_watch(uuid=uuid)) > 0, @@ -843,6 +885,7 @@ def changedetection_app(config=None, datastore_o=None): 'jq_support': jq_support, 'playwright_enabled': os.getenv('PLAYWRIGHT_DRIVER_URL', False), 'settings_application': datastore.data['settings']['application'], + 'timezone_default_config': datastore.data['settings']['application'].get('timezone'), 'using_global_webdriver_wait': not default['webdriver_delay'], 'uuid': uuid, 'visualselector_enabled': visualselector_enabled, @@ -872,6 +915,8 @@ def changedetection_app(config=None, datastore_o=None): @login_optionally_required def settings_page(): from changedetectionio import forms + from datetime import datetime + from zoneinfo import available_timezones default = deepcopy(datastore.data['settings']) if datastore.proxy_list is not None: @@ -939,14 +984,20 @@ def changedetection_app(config=None, datastore_o=None): else: flash("An error occurred, please see below.", "error") + # Convert to ISO 8601 format, all date/time relative events stored as UTC time + utc_time = datetime.now(ZoneInfo("UTC")).isoformat() + output = render_template("settings.html", api_key=datastore.data['settings']['application'].get('api_access_token'), + available_timezones=sorted(available_timezones()), emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False), extra_notification_token_placeholder_info=datastore.get_unique_notification_token_placeholders_available(), form=form, hide_remove_pass=os.getenv("SALTED_PASS", False), min_system_recheck_seconds=int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3)), - settings_application=datastore.data['settings']['application'] + settings_application=datastore.data['settings']['application'], + timezone_default_config=datastore.data['settings']['application'].get('timezone'), + utc_time=utc_time, ) return output @@ -1227,78 +1278,6 @@ def changedetection_app(config=None, datastore_o=None): return output - # We're good but backups are even better! - @app.route("/backup", methods=['GET']) - @login_optionally_required - def get_backup(): - - import zipfile - from pathlib import Path - - # Remove any existing backup file, for now we just keep one file - - for previous_backup_filename in Path(datastore_o.datastore_path).rglob('changedetection-backup-*.zip'): - os.unlink(previous_backup_filename) - - # create a ZipFile object - timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S") - backupname = "changedetection-backup-{}.zip".format(timestamp) - backup_filepath = os.path.join(datastore_o.datastore_path, backupname) - - with zipfile.ZipFile(backup_filepath, "w", - compression=zipfile.ZIP_DEFLATED, - compresslevel=8) as zipObj: - - # Be sure we're written fresh - datastore.sync_to_json() - - # Add the index - zipObj.write(os.path.join(datastore_o.datastore_path, "url-watches.json"), arcname="url-watches.json") - - # Add the flask app secret - zipObj.write(os.path.join(datastore_o.datastore_path, "secret.txt"), arcname="secret.txt") - - # Add any data in the watch data directory. - for uuid, w in datastore.data['watching'].items(): - for f in Path(w.watch_data_dir).glob('*'): - zipObj.write(f, - # Use the full path to access the file, but make the file 'relative' in the Zip. - arcname=os.path.join(f.parts[-2], f.parts[-1]), - compress_type=zipfile.ZIP_DEFLATED, - compresslevel=8) - - # Create a list file with just the URLs, so it's easier to port somewhere else in the future - list_file = "url-list.txt" - with open(os.path.join(datastore_o.datastore_path, list_file), "w") as f: - for uuid in datastore.data["watching"]: - url = datastore.data["watching"][uuid]["url"] - f.write("{}\r\n".format(url)) - list_with_tags_file = "url-list-with-tags.txt" - with open( - os.path.join(datastore_o.datastore_path, list_with_tags_file), "w" - ) as f: - for uuid in datastore.data["watching"]: - url = datastore.data["watching"][uuid].get('url') - tag = datastore.data["watching"][uuid].get('tags', {}) - f.write("{} {}\r\n".format(url, tag)) - - # Add it to the Zip - zipObj.write( - os.path.join(datastore_o.datastore_path, list_file), - arcname=list_file, - compress_type=zipfile.ZIP_DEFLATED, - compresslevel=8, - ) - zipObj.write( - os.path.join(datastore_o.datastore_path, list_with_tags_file), - arcname=list_with_tags_file, - compress_type=zipfile.ZIP_DEFLATED, - compresslevel=8, - ) - - # Send_from_directory needs to be the full absolute path - return send_from_directory(os.path.abspath(datastore_o.datastore_path), backupname, as_attachment=True) - @app.route("/static//", methods=['GET']) def static_content(group, filename): from flask import make_response @@ -1331,12 +1310,23 @@ def changedetection_app(config=None, datastore_o=None): # These files should be in our subdirectory try: - # set nocache, set content-type - response = make_response(send_from_directory(os.path.join(datastore_o.datastore_path, filename), "elements.json")) - response.headers['Content-type'] = 'application/json' - response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate' - response.headers['Pragma'] = 'no-cache' - response.headers['Expires'] = 0 + # set nocache, set content-type, + # `filename` is actually directory UUID of the watch + watch_directory = str(os.path.join(datastore_o.datastore_path, filename)) + response = None + if os.path.isfile(os.path.join(watch_directory, "elements.deflate")): + response = make_response(send_from_directory(watch_directory, "elements.deflate")) + response.headers['Content-Type'] = 'application/json' + response.headers['Content-Encoding'] = 'deflate' + else: + logger.error(f'Request elements.deflate at "{watch_directory}" but was notfound.') + abort(404) + + if response: + response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate' + response.headers['Pragma'] = 'no-cache' + response.headers['Expires'] = "0" + return response except FileNotFoundError: @@ -1405,13 +1395,13 @@ def changedetection_app(config=None, datastore_o=None): if new_uuid: if add_paused: flash('Watch added in Paused state, saving will unpause.') - return redirect(url_for('edit_page', uuid=new_uuid, unpause_on_save=1)) + return redirect(url_for('edit_page', uuid=new_uuid, unpause_on_save=1, tag=request.args.get('tag'))) else: # Straight into the queue. update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid})) flash("Watch added.") - return redirect(url_for('index')) + return redirect(url_for('index', tag=request.args.get('tag',''))) @@ -1678,13 +1668,15 @@ def changedetection_app(config=None, datastore_o=None): import changedetectionio.blueprint.check_proxies as check_proxies app.register_blueprint(check_proxies.construct_blueprint(datastore=datastore), url_prefix='/check_proxy') + import changedetectionio.blueprint.backups as backups + app.register_blueprint(backups.construct_blueprint(datastore), url_prefix='/backups') # @todo handle ctrl break ticker_thread = threading.Thread(target=ticker_thread_check_time_launch_checks).start() threading.Thread(target=notification_runner).start() # Check for new release version, but not when running in test/build or pytest - if not os.getenv("GITHUB_REF", False) and not config.get('disable_checkver') == True: + if not os.getenv("GITHUB_REF", False) and not strtobool(os.getenv('DISABLE_VERSION_CHECK', 'no')): threading.Thread(target=check_for_new_version).start() return app @@ -1768,7 +1760,6 @@ def notification_runner(): def ticker_thread_check_time_launch_checks(): import random from changedetectionio import update_worker - proxy_last_called_time = {} recheck_time_minimum_seconds = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3)) @@ -1802,12 +1793,14 @@ def ticker_thread_check_time_launch_checks(): except RuntimeError as e: # RuntimeError: dictionary changed size during iteration time.sleep(0.1) + watch_uuid_list = [] else: break # Re #438 - Don't place more watches in the queue to be checked if the queue is already large while update_q.qsize() >= 2000: - time.sleep(1) + logger.warning(f"Recheck watches queue size limit reached ({MAX_QUEUE_SIZE}), skipping adding more items") + time.sleep(3) recheck_time_system_seconds = int(datastore.threshold_seconds) @@ -1824,6 +1817,28 @@ def ticker_thread_check_time_launch_checks(): if watch['paused']: continue + # @todo - Maybe make this a hook? + # Time schedule limit - Decide between watch or global settings + if watch.get('time_between_check_use_default'): + time_schedule_limit = datastore.data['settings']['requests'].get('time_schedule_limit', {}) + logger.trace(f"{uuid} Time scheduler - Using system/global settings") + else: + time_schedule_limit = watch.get('time_schedule_limit') + logger.trace(f"{uuid} Time scheduler - Using watch settings (not global settings)") + tz_name = datastore.data['settings']['application'].get('timezone', 'UTC') + + if time_schedule_limit and time_schedule_limit.get('enabled'): + try: + result = is_within_schedule(time_schedule_limit=time_schedule_limit, + default_tz=tz_name + ) + if not result: + logger.trace(f"{uuid} Time scheduler - not within schedule skipping.") + continue + except Exception as e: + logger.error( + f"{uuid} - Recheck scheduler, error handling timezone, check skipped - TZ name '{tz_name}' - {str(e)}") + return False # If they supplied an individual entry minutes to threshold. threshold = recheck_time_system_seconds if watch.get('time_between_check_use_default') else watch.threshold_seconds() diff --git a/changedetectionio/forms.py b/changedetectionio/forms.py index f99496e5..11792d62 100644 --- a/changedetectionio/forms.py +++ b/changedetectionio/forms.py @@ -1,12 +1,14 @@ import os import re from loguru import logger +from wtforms.widgets.core import TimeInput from changedetectionio.strtobool import strtobool from wtforms import ( BooleanField, Form, + Field, IntegerField, RadioField, SelectField, @@ -125,6 +127,87 @@ class StringTagUUID(StringField): return 'error' +class TimeDurationForm(Form): + hours = SelectField(choices=[(f"{i}", f"{i}") for i in range(0, 25)], default="24", validators=[validators.Optional()]) + minutes = SelectField(choices=[(f"{i}", f"{i}") for i in range(0, 60)], default="00", validators=[validators.Optional()]) + +class TimeStringField(Field): + """ + A WTForms field for time inputs (HH:MM) that stores the value as a string. + """ + widget = TimeInput() # Use the built-in time input widget + + def _value(self): + """ + Returns the value for rendering in the form. + """ + return self.data if self.data is not None else "" + + def process_formdata(self, valuelist): + """ + Processes the raw input from the form and stores it as a string. + """ + if valuelist: + time_str = valuelist[0] + # Simple validation for HH:MM format + if not time_str or len(time_str.split(":")) != 2: + raise ValidationError("Invalid time format. Use HH:MM.") + self.data = time_str + + +class validateTimeZoneName(object): + """ + Flask wtform validators wont work with basic auth + """ + + def __init__(self, message=None): + self.message = message + + def __call__(self, form, field): + from zoneinfo import available_timezones + python_timezones = available_timezones() + if field.data and field.data not in python_timezones: + raise ValidationError("Not a valid timezone name") + +class ScheduleLimitDaySubForm(Form): + enabled = BooleanField("not set", default=True) + start_time = TimeStringField("Start At", default="00:00", render_kw={"placeholder": "HH:MM"}, validators=[validators.Optional()]) + duration = FormField(TimeDurationForm, label="Run duration") + +class ScheduleLimitForm(Form): + enabled = BooleanField("Use time scheduler", default=False) + # Because the label for=""" doesnt line up/work with the actual checkbox + monday = FormField(ScheduleLimitDaySubForm, label="") + tuesday = FormField(ScheduleLimitDaySubForm, label="") + wednesday = FormField(ScheduleLimitDaySubForm, label="") + thursday = FormField(ScheduleLimitDaySubForm, label="") + friday = FormField(ScheduleLimitDaySubForm, label="") + saturday = FormField(ScheduleLimitDaySubForm, label="") + sunday = FormField(ScheduleLimitDaySubForm, label="") + + timezone = StringField("Optional timezone to run in", + render_kw={"list": "timezones"}, + validators=[validateTimeZoneName()] + ) + def __init__( + self, + formdata=None, + obj=None, + prefix="", + data=None, + meta=None, + **kwargs, + ): + super().__init__(formdata, obj, prefix, data, meta, **kwargs) + self.monday.form.enabled.label.text="Monday" + self.tuesday.form.enabled.label.text = "Tuesday" + self.wednesday.form.enabled.label.text = "Wednesday" + self.thursday.form.enabled.label.text = "Thursday" + self.friday.form.enabled.label.text = "Friday" + self.saturday.form.enabled.label.text = "Saturday" + self.sunday.form.enabled.label.text = "Sunday" + + class TimeBetweenCheckForm(Form): weeks = IntegerField('Weeks', validators=[validators.Optional(), validators.NumberRange(min=0, message="Should contain zero or more seconds")]) days = IntegerField('Days', validators=[validators.Optional(), validators.NumberRange(min=0, message="Should contain zero or more seconds")]) @@ -225,8 +308,12 @@ class ValidateAppRiseServers(object): # so that the custom endpoints are registered from changedetectionio.apprise_plugin import apprise_custom_api_call_wrapper for server_url in field.data: - if not apobj.add(server_url): - message = field.gettext('\'%s\' is not a valid AppRise URL.' % (server_url)) + url = server_url.strip() + if url.startswith("#"): + continue + + if not apobj.add(url): + message = field.gettext('\'%s\' is not a valid AppRise URL.' % (url)) raise ValidationError(message) class ValidateJinja2Template(object): @@ -279,6 +366,7 @@ class validateURL(object): # This should raise a ValidationError() or not validate_url(field.data) + def validate_url(test_url): # If hosts that only contain alphanumerics are allowed ("localhost" for example) try: @@ -438,6 +526,7 @@ class commonSettingsForm(Form): notification_title = StringField('Notification Title', default='ChangeDetection.io Notification - {{ watch_url }}', validators=[validators.Optional(), ValidateJinja2Template()]) notification_urls = StringListField('Notification URL List', validators=[validators.Optional(), ValidateAppRiseServers(), ValidateJinja2Template()]) processor = RadioField( label=u"Processor - What do you want to achieve?", choices=processors.available_processors(), default="text_json_diff") + timezone = StringField("Timezone for watch schedule", render_kw={"list": "timezones"}, validators=[validateTimeZoneName()]) webdriver_delay = IntegerField('Wait seconds before extracting text', validators=[validators.Optional(), validators.NumberRange(min=1, message="Should contain one or more seconds")]) @@ -448,7 +537,6 @@ class importForm(Form): xlsx_file = FileField('Upload .xlsx file', validators=[FileAllowed(['xlsx'], 'Must be .xlsx file!')]) file_mapping = SelectField('File mapping', [validators.DataRequired()], choices={('wachete', 'Wachete mapping'), ('custom','Custom mapping')}) - class SingleBrowserStep(Form): operation = SelectField('Operation', [validators.Optional()], choices=browser_step_ui_config.keys()) @@ -466,6 +554,9 @@ class processor_text_json_diff_form(commonSettingsForm): tags = StringTagUUID('Group tag', [validators.Optional()], default='') time_between_check = FormField(TimeBetweenCheckForm) + + time_schedule_limit = FormField(ScheduleLimitForm) + time_between_check_use_default = BooleanField('Use global settings for time between check', default=False) include_filters = StringListField('CSS/JSONPath/JQ/XPath Filters', [ValidateCSSJSONXPATHInput()], default='') @@ -515,6 +606,7 @@ class processor_text_json_diff_form(commonSettingsForm): if not super().validate(): return False + from changedetectionio.safe_jinja import render as jinja_render result = True # Fail form validation when a body is set for a GET @@ -524,20 +616,65 @@ class processor_text_json_diff_form(commonSettingsForm): # Attempt to validate jinja2 templates in the URL try: - from changedetectionio.safe_jinja import render as jinja_render jinja_render(template_str=self.url.data) except ModuleNotFoundError as e: # incase jinja2_time or others is missing logger.error(e) - self.url.errors.append(e) + self.url.errors.append(f'Invalid template syntax configuration: {e}') result = False except Exception as e: logger.error(e) - self.url.errors.append('Invalid template syntax') + self.url.errors.append(f'Invalid template syntax: {e}') result = False + # Attempt to validate jinja2 templates in the body + if self.body.data and self.body.data.strip(): + try: + jinja_render(template_str=self.body.data) + except ModuleNotFoundError as e: + # incase jinja2_time or others is missing + logger.error(e) + self.body.errors.append(f'Invalid template syntax configuration: {e}') + result = False + except Exception as e: + logger.error(e) + self.body.errors.append(f'Invalid template syntax: {e}') + result = False + + # Attempt to validate jinja2 templates in the headers + if len(self.headers.data) > 0: + try: + for header, value in self.headers.data.items(): + jinja_render(template_str=value) + except ModuleNotFoundError as e: + # incase jinja2_time or others is missing + logger.error(e) + self.headers.errors.append(f'Invalid template syntax configuration: {e}') + result = False + except Exception as e: + logger.error(e) + self.headers.errors.append(f'Invalid template syntax in "{header}" header: {e}') + result = False + return result + def __init__( + self, + formdata=None, + obj=None, + prefix="", + data=None, + meta=None, + **kwargs, + ): + super().__init__(formdata, obj, prefix, data, meta, **kwargs) + if kwargs and kwargs.get('default_system_settings'): + default_tz = kwargs.get('default_system_settings').get('application', {}).get('timezone') + if default_tz: + self.time_schedule_limit.form.timezone.render_kw['placeholder'] = default_tz + + + class SingleExtraProxy(Form): # maybe better to set some + + +
@@ -32,6 +35,12 @@
{{ render_field(form.requests.form.time_between_check, class="time-check-widget") }} Default recheck time for all watches, current system minimum is {{min_system_recheck_seconds}} seconds (more info). +
+ +
+ {{ render_time_schedule_form(form.requests, available_timezones, timezone_default_config) }} +
+
{{ render_field(form.requests.form.jitter_seconds, class="jitter_seconds") }} @@ -211,6 +220,23 @@ nav

+
+
+ Ensure the settings below are correct, they are used to manage the time schedule for checking your web page watches. +
+
+

UTC Time & Date from Server: {{ utc_time }}

+

Local Time & Date in Browser:

+

+ {{ render_field(form.application.form.timezone) }} + +

+
+