From 8d5b0b5576291185dba8f8cd3ab8551362a714f0 Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Wed, 12 Oct 2022 10:51:39 +0200 Subject: [PATCH 01/14] Update README.md --- README.md | 44 ++++---------------------------------------- 1 file changed, 4 insertions(+), 40 deletions(-) diff --git a/README.md b/README.md index 03b12463..0f167828 100644 --- a/README.md +++ b/README.md @@ -161,50 +161,14 @@ This will re-parse the JSON and apply formatting to the text, making it super ea ### JSONPath or jq? -For more complex parsing, filtering, and modifying of JSON data, jq is recommended due to the built-in operators and functions. Refer to the [documentation](https://stedolan.github.io/jq/manual/) for more information on jq. +For more complex parsing, filtering, and modifying of JSON data, jq is recommended due to the built-in operators and functions. Refer to the [documentation](https://stedolan.github.io/jq/manual/) for more specifc information on jq. -Notes: -- `jq` must be added manually separately from the installation of changedetection.io (simply run `pip3 install jq`) -- `jq` is not available on Windows or must be manually compiled (No "wheel" package available on pypi) +One big advantage of `jq` is that you can use logic in your JSON filter, such as filters to only show items that have a value greater than/less than etc. -- The example below adds the price in dollars to each item in the JSON data, and then filters to only show items that are greater than 10. +See the wiki https://github.com/dgtlmoon/changedetection.io/wiki/JSON-Selector-Filter-help for more information and examples -#### Sample input data from API -``` -{ - "items": [ - { - "name": "Product A", - "priceInCents": 2500 - }, - { - "name": "Product B", - "priceInCents": 500 - }, - { - "name": "Product C", - "priceInCents": 2000 - } - ] -} -``` - -#### Sample jq -`jq:.items[] | . + { "priceInDollars": (.priceInCents / 100) } | select(.priceInDollars > 10)` +Note: `jq` library must be added separately (`pip3 install jq`) -#### Sample output data -``` -{ - "name": "Product A", - "priceInCents": 2500, - "priceInDollars": 25 -} -{ - "name": "Product C", - "priceInCents": 2000, - "priceInDollars": 20 -} -``` ### Parse JSON embedded in HTML! From 63095f70eaf8eb601493bcfb9331044a0d3d82b9 Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Mon, 17 Oct 2022 17:13:15 +0200 Subject: [PATCH 02/14] Also include tests in pip build --- MANIFEST.in | 1 + 1 file changed, 1 insertion(+) diff --git a/MANIFEST.in b/MANIFEST.in index 3f41f906..4b3eb3ad 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -2,6 +2,7 @@ recursive-include changedetectionio/api * recursive-include changedetectionio/templates * recursive-include changedetectionio/static * recursive-include changedetectionio/model * +recursive-include changedetectionio/tests * include changedetection.py global-exclude *.pyc global-exclude node_modules From 85897e0bf9315d4a5af2d5a4aa17deec4df33821 Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Mon, 17 Oct 2022 17:40:28 +0200 Subject: [PATCH 03/14] Windows - diff file handling improvements (#1031) --- changedetectionio/__init__.py | 10 ++++++---- changedetectionio/model/Watch.py | 11 ++++++----- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/changedetectionio/__init__.py b/changedetectionio/__init__.py index c8d8c52f..096070ef 100644 --- a/changedetectionio/__init__.py +++ b/changedetectionio/__init__.py @@ -816,9 +816,11 @@ def changedetection_app(config=None, datastore_o=None): newest_file = history[dates[-1]] + # Read as binary and force decode as UTF-8 + # Windows may fail decode in python if we just use 'r' mode (chardet decode exception) try: - with open(newest_file, 'r') as f: - newest_version_file_contents = f.read() + with open(newest_file, 'rb') as f: + newest_version_file_contents = f.read().decode('utf-8') except Exception as e: newest_version_file_contents = "Unable to read {}.\n".format(newest_file) @@ -830,8 +832,8 @@ def changedetection_app(config=None, datastore_o=None): previous_file = history[dates[-2]] try: - with open(previous_file, 'r') as f: - previous_version_file_contents = f.read() + with open(previous_file, 'rb') as f: + previous_version_file_contents = f.read().decode('utf-8') except Exception as e: previous_version_file_contents = "Unable to read {}.\n".format(previous_file) diff --git a/changedetectionio/model/Watch.py b/changedetectionio/model/Watch.py index b7aaca86..9a87ad71 100644 --- a/changedetectionio/model/Watch.py +++ b/changedetectionio/model/Watch.py @@ -151,28 +151,29 @@ class model(dict): import uuid import logging - output_path = "{}/{}".format(self.__datastore_path, self['uuid']) + output_path = os.path.join(self.__datastore_path, self['uuid']) self.ensure_data_dir_exists() + snapshot_fname = os.path.join(output_path, str(uuid.uuid4())) - snapshot_fname = "{}/{}.stripped.txt".format(output_path, uuid.uuid4()) logging.debug("Saving history text {}".format(snapshot_fname)) + # in /diff/ we are going to assume for now that it's UTF-8 when reading with open(snapshot_fname, 'wb') as f: f.write(contents) f.close() # Append to index # @todo check last char was \n - index_fname = "{}/history.txt".format(output_path) + index_fname = os.path.join(output_path, "history.txt") with open(index_fname, 'a') as f: f.write("{},{}\n".format(timestamp, snapshot_fname)) f.close() self.__newest_history_key = timestamp - self.__history_n+=1 + self.__history_n += 1 - #@todo bump static cache of the last timestamp so we dont need to examine the file to set a proper ''viewed'' status + # @todo bump static cache of the last timestamp so we dont need to examine the file to set a proper ''viewed'' status return snapshot_fname @property From 957a3c1c16baca4f1ee140986ec9e28bfe773a8e Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Mon, 17 Oct 2022 17:43:35 +0200 Subject: [PATCH 04/14] 0.39.20.3 --- changedetectionio/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changedetectionio/__init__.py b/changedetectionio/__init__.py index 096070ef..c745dd3e 100644 --- a/changedetectionio/__init__.py +++ b/changedetectionio/__init__.py @@ -33,7 +33,7 @@ from flask_wtf import CSRFProtect from changedetectionio import html_tools from changedetectionio.api import api_v1 -__version__ = '0.39.20.2' +__version__ = '0.39.20.3' datastore = None From 4be0260381e078052cdd213ab9a6779a7f4c681a Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Mon, 17 Oct 2022 18:36:22 +0200 Subject: [PATCH 05/14] Better cross platform file handling in diff and preview (#1034) --- changedetectionio/__init__.py | 10 +++++----- changedetectionio/model/Watch.py | 3 ++- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/changedetectionio/__init__.py b/changedetectionio/__init__.py index c745dd3e..19873cce 100644 --- a/changedetectionio/__init__.py +++ b/changedetectionio/__init__.py @@ -819,8 +819,8 @@ def changedetection_app(config=None, datastore_o=None): # Read as binary and force decode as UTF-8 # Windows may fail decode in python if we just use 'r' mode (chardet decode exception) try: - with open(newest_file, 'rb') as f: - newest_version_file_contents = f.read().decode('utf-8') + with open(newest_file, 'r', encoding='utf-8', errors='ignore') as f: + newest_version_file_contents = f.read() except Exception as e: newest_version_file_contents = "Unable to read {}.\n".format(newest_file) @@ -832,8 +832,8 @@ def changedetection_app(config=None, datastore_o=None): previous_file = history[dates[-2]] try: - with open(previous_file, 'rb') as f: - previous_version_file_contents = f.read().decode('utf-8') + with open(previous_file, 'r', encoding='utf-8', errors='ignore') as f: + previous_version_file_contents = f.read() except Exception as e: previous_version_file_contents = "Unable to read {}.\n".format(previous_file) @@ -909,7 +909,7 @@ def changedetection_app(config=None, datastore_o=None): timestamp = list(watch.history.keys())[-1] filename = watch.history[timestamp] try: - with open(filename, 'r') as f: + with open(filename, 'r', encoding='utf-8', errors='ignore') as f: tmp = f.readlines() # Get what needs to be highlighted diff --git a/changedetectionio/model/Watch.py b/changedetectionio/model/Watch.py index 9a87ad71..566eb88e 100644 --- a/changedetectionio/model/Watch.py +++ b/changedetectionio/model/Watch.py @@ -158,7 +158,8 @@ class model(dict): logging.debug("Saving history text {}".format(snapshot_fname)) - # in /diff/ we are going to assume for now that it's UTF-8 when reading + # in /diff/ and /preview/ we are going to assume for now that it's UTF-8 when reading + # most sites are utf-8 and some are even broken utf-8 with open(snapshot_fname, 'wb') as f: f.write(contents) f.close() From 4cbcc594615a2db474ecad03f211ee613926557d Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Mon, 17 Oct 2022 18:36:47 +0200 Subject: [PATCH 06/14] 0.39.20.4 --- changedetectionio/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changedetectionio/__init__.py b/changedetectionio/__init__.py index 19873cce..c6f95f1e 100644 --- a/changedetectionio/__init__.py +++ b/changedetectionio/__init__.py @@ -33,7 +33,7 @@ from flask_wtf import CSRFProtect from changedetectionio import html_tools from changedetectionio.api import api_v1 -__version__ = '0.39.20.3' +__version__ = '0.39.20.4' datastore = None From 3c31f023ce1e7698955c0a4f8880143bc78202a8 Mon Sep 17 00:00:00 2001 From: Michael McMillan Date: Tue, 18 Oct 2022 09:16:22 +0200 Subject: [PATCH 07/14] Option to Hide the Referer header from monitored websites. (#996) --- changedetectionio/changedetection.py | 8 ++++++++ docker-compose.yml | 3 +++ 2 files changed, 11 insertions(+) diff --git a/changedetectionio/changedetection.py b/changedetectionio/changedetection.py index 32c21ac4..461476e1 100755 --- a/changedetectionio/changedetection.py +++ b/changedetectionio/changedetection.py @@ -102,6 +102,14 @@ def main(): has_password=datastore.data['settings']['application']['password'] != False ) + # Monitored websites will not receive a Referer header + # when a user clicks on an outgoing link. + @app.after_request + def hide_referrer(response): + if os.getenv("HIDE_REFERER", False): + response.headers["Referrer-Policy"] = "no-referrer" + return response + # Proxy sub-directory support # Set environment var USE_X_SETTINGS=1 on this script # And then in your proxy_pass settings diff --git a/docker-compose.yml b/docker-compose.yml index 65417ee7..c04fcf0c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -45,6 +45,9 @@ services: # Respect proxy_pass type settings, `proxy_set_header Host "localhost";` and `proxy_set_header X-Forwarded-Prefix /app;` # More here https://github.com/dgtlmoon/changedetection.io/wiki/Running-changedetection.io-behind-a-reverse-proxy-sub-directory # - USE_X_SETTINGS=1 + # + # Hides the `Referer` header so that monitored websites can't see the changedetection.io hostname. + # - HIDE_REFERER=true # Comment out ports: when using behind a reverse proxy , enable networks: etc. ports: From 5a43a350dea113ddb0903010e5b7a4546a67e341 Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Wed, 19 Oct 2022 22:41:13 +0200 Subject: [PATCH 08/14] History index safety check - Be sure that only valid history index lines are read (#1042) --- changedetectionio/model/Watch.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/changedetectionio/model/Watch.py b/changedetectionio/model/Watch.py index 566eb88e..5834b532 100644 --- a/changedetectionio/model/Watch.py +++ b/changedetectionio/model/Watch.py @@ -118,7 +118,10 @@ class model(dict): if os.path.isfile(fname): logging.debug("Reading history index " + str(time.time())) with open(fname, "r") as f: - tmp_history = dict(i.strip().split(',', 2) for i in f.readlines()) + for i in f.readlines(): + if ',' in i: + k, v = i.strip().split(',', 2) + tmp_history[k] = v if len(tmp_history): self.__newest_history_key = list(tmp_history.keys())[-1] From 9c5588c79100f0fc4dfaa1518d86d197681b5c22 Mon Sep 17 00:00:00 2001 From: Entepotenz <19738301+Entepotenz@users.noreply.github.com> Date: Sun, 23 Oct 2022 11:25:29 +0200 Subject: [PATCH 09/14] update path for validation in the CONTRIBUTING.md (#1046) --- CONTRIBUTING.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 9641dd16..8478a7ab 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -6,7 +6,7 @@ Otherwise, it's always best to PR into the `dev` branch. Please be sure that all new functionality has a matching test! -Use `pytest` to validate/test, you can run the existing tests as `pytest tests/test_notifications.py` for example +Use `pytest` to validate/test, you can run the existing tests as `pytest tests/test_notification.py` for example ``` pip3 install -r requirements-dev From 7839551d6b69af6d16cfff4a18a745de70de96e1 Mon Sep 17 00:00:00 2001 From: Entepotenz <19738301+Entepotenz@users.noreply.github.com> Date: Sun, 23 Oct 2022 11:26:32 +0200 Subject: [PATCH 10/14] Testing - Use same version of playwright while running tests as in production builds (#1047) --- changedetectionio/run_all_tests.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/changedetectionio/run_all_tests.sh b/changedetectionio/run_all_tests.sh index 28dd85c6..4eff9e93 100755 --- a/changedetectionio/run_all_tests.sh +++ b/changedetectionio/run_all_tests.sh @@ -9,6 +9,8 @@ # exit when any command fails set -e +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) + find tests/test_*py -type f|while read test_name do echo "TEST RUNNING $test_name" @@ -45,7 +47,9 @@ docker kill $$-test_selenium echo "TESTING WEBDRIVER FETCH > PLAYWRIGHT/BROWSERLESS..." # Not all platforms support playwright (not ARM/rPI), so it's not packaged in requirements.txt -pip3 install playwright~=1.24 +PLAYWRIGHT_VERSION=$(grep -i -E "RUN pip install.+" "$SCRIPT_DIR/../Dockerfile" | grep --only-matching -i -E "playwright[=><~+]+[0-9\.]+") +echo "using $PLAYWRIGHT_VERSION" +pip3 install "$PLAYWRIGHT_VERSION" docker run -d --name $$-test_browserless -e "DEFAULT_LAUNCH_ARGS=[\"--window-size=1920,1080\"]" --rm -p 3000:3000 --shm-size="2g" browserless/chrome:1.53-chrome-stable # takes a while to spin up sleep 5 From 0394a56be57f6278f884908fc38fffcb6ed4f685 Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Sun, 23 Oct 2022 15:54:19 +0200 Subject: [PATCH 11/14] Building - Test container build on PR --- .github/workflows/test-container-build.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/test-container-build.yml b/.github/workflows/test-container-build.yml index dc6ab712..cc457286 100644 --- a/.github/workflows/test-container-build.yml +++ b/.github/workflows/test-container-build.yml @@ -1,8 +1,7 @@ name: ChangeDetection.io Container Build Test # Triggers the workflow on push or pull request events -on: - push: +on: [push, pull_request] paths: - requirements.txt - Dockerfile From 492bbce6b67908d1b7557f58642746d1cb3519bc Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Sun, 23 Oct 2022 16:02:13 +0200 Subject: [PATCH 12/14] Build - Fix syntax in container build test (#1050) --- .github/workflows/test-container-build.yml | 12 +++++++++++- Dockerfile | 1 + requirements.txt | 7 ++++--- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test-container-build.yml b/.github/workflows/test-container-build.yml index cc457286..7d59ad0b 100644 --- a/.github/workflows/test-container-build.yml +++ b/.github/workflows/test-container-build.yml @@ -1,7 +1,17 @@ name: ChangeDetection.io Container Build Test # Triggers the workflow on push or pull request events -on: [push, pull_request] + +# This line doesnt work, even tho it is the documented one +#on: [push, pull_request] + +on: + push: + paths: + - requirements.txt + - Dockerfile + + pull_request: paths: - requirements.txt - Dockerfile diff --git a/Dockerfile b/Dockerfile index d422918e..978a912c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -64,6 +64,7 @@ EXPOSE 5000 # The actual flask app COPY changedetectionio /app/changedetectionio + # The eventlet server wrapper COPY changedetection.py /app/changedetection.py diff --git a/requirements.txt b/requirements.txt index bffc2a7f..500f45f9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,8 @@ -flask~= 2.0 +flask ~= 2.0 flask_wtf -eventlet>=0.31.0 +eventlet >= 0.31.0 validators -timeago ~=1.0 +timeago ~= 1.0 inscriptis ~= 2.2 feedgen ~= 0.9 flask-login ~= 0.5 @@ -47,3 +47,4 @@ selenium ~= 4.1.0 werkzeug ~= 2.0.0 # playwright is installed at Dockerfile build time because it's not available on all platforms + From 5d40e16c73b74888a19abecb911e01156d0172de Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Sun, 23 Oct 2022 19:15:11 +0200 Subject: [PATCH 13/14] API - Adding basic system info/system state API (#1051) --- changedetectionio/__init__.py | 3 +++ changedetectionio/api/api_v1.py | 30 +++++++++++++++++++++++++++++ changedetectionio/store.py | 6 +++--- changedetectionio/tests/test_api.py | 10 ++++++++++ 4 files changed, 46 insertions(+), 3 deletions(-) diff --git a/changedetectionio/__init__.py b/changedetectionio/__init__.py index c6f95f1e..8bbb747d 100644 --- a/changedetectionio/__init__.py +++ b/changedetectionio/__init__.py @@ -194,6 +194,9 @@ def changedetection_app(config=None, datastore_o=None): watch_api.add_resource(api_v1.Watch, '/api/v1/watch/', resource_class_kwargs={'datastore': datastore, 'update_q': update_q}) + watch_api.add_resource(api_v1.SystemInfo, '/api/v1/systeminfo', + resource_class_kwargs={'datastore': datastore, 'update_q': update_q}) + diff --git a/changedetectionio/api/api_v1.py b/changedetectionio/api/api_v1.py index a432bc67..d44c990e 100644 --- a/changedetectionio/api/api_v1.py +++ b/changedetectionio/api/api_v1.py @@ -122,3 +122,33 @@ class CreateWatch(Resource): return {'status': "OK"}, 200 return list, 200 + +class SystemInfo(Resource): + def __init__(self, **kwargs): + # datastore is a black box dependency + self.datastore = kwargs['datastore'] + self.update_q = kwargs['update_q'] + + @auth.check_token + def get(self): + import time + overdue_watches = [] + + # Check all watches and report which have not been checked but should have been + + for uuid, watch in self.datastore.data.get('watching', {}).items(): + # see if now - last_checked is greater than the time that should have been + # this is not super accurate (maybe they just edited it) but better than nothing + t = watch.threshold_seconds() + if not t: + t = self.datastore.threshold_seconds + time_since_check = time.time() - watch.get('last_checked') + if time_since_check > t: + overdue_watches.append(uuid) + + return { + 'queue_size': self.update_q.qsize(), + 'overdue_watches': overdue_watches, + 'uptime': round(time.time() - self.datastore.start_time, 2), + 'watch_count': len(self.datastore.data.get('watching', {})) + }, 200 diff --git a/changedetectionio/store.py b/changedetectionio/store.py index bd86039a..6182aef8 100644 --- a/changedetectionio/store.py +++ b/changedetectionio/store.py @@ -30,14 +30,14 @@ class ChangeDetectionStore: def __init__(self, datastore_path="/datastore", include_default_watches=True, version_tag="0.0.0"): # Should only be active for docker # logging.basicConfig(filename='/dev/stdout', level=logging.INFO) - self.needs_write = False + self.__data = App.model() self.datastore_path = datastore_path self.json_store_path = "{}/url-watches.json".format(self.datastore_path) + self.needs_write = False self.proxy_list = None + self.start_time = time.time() self.stop_thread = False - self.__data = App.model() - # Base definition for all watchers # deepcopy part of #569 - not sure why its needed exactly self.generic_definition = deepcopy(Watch.model(datastore_path = datastore_path, default={})) diff --git a/changedetectionio/tests/test_api.py b/changedetectionio/tests/test_api.py index dd66012e..504a9554 100644 --- a/changedetectionio/tests/test_api.py +++ b/changedetectionio/tests/test_api.py @@ -147,6 +147,16 @@ def test_api_simple(client, live_server): # @todo how to handle None/default global values? assert watch['history_n'] == 2, "Found replacement history section, which is in its own API" + # basic systeminfo check + res = client.get( + url_for("systeminfo"), + headers={'x-api-key': api_key}, + ) + info = json.loads(res.data) + assert info.get('watch_count') == 1 + assert info.get('uptime') > 0.5 + + # Finally delete the watch res = client.delete( url_for("watch", uuid=watch_uuid), From 4eb4b401a1891c6af359abc0a58b243cf58acc19 Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Sun, 23 Oct 2022 23:12:28 +0200 Subject: [PATCH 14/14] API - system info - allow 5 minutes grace before watch is considered 'overdue' --- changedetectionio/api/api_v1.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/changedetectionio/api/api_v1.py b/changedetectionio/api/api_v1.py index d44c990e..40131ca5 100644 --- a/changedetectionio/api/api_v1.py +++ b/changedetectionio/api/api_v1.py @@ -141,9 +141,13 @@ class SystemInfo(Resource): # this is not super accurate (maybe they just edited it) but better than nothing t = watch.threshold_seconds() if not t: + # Use the system wide default t = self.datastore.threshold_seconds + time_since_check = time.time() - watch.get('last_checked') - if time_since_check > t: + + # Allow 5 minutes of grace time before we decide it's overdue + if time_since_check - (5 * 60) > t: overdue_watches.append(uuid) return {