Merge branch 'master' into diff-filters

2 years ago · 62b6645810
parent e5e8b3bbbd 4eb4b401a1
commit 62b6645810
14 changed files with 103 additions and 58 deletions
--- a/.github/workflows/test-container-build.yml
+++ b/.github/workflows/test-container-build.yml
@ -1,12 +1,21 @@
 name: ChangeDetection.io Container Build Test
 # Triggers the workflow on push or pull request events
 # This line doesnt work, even tho it is the documented one
 #on: [push, pull_request]
 on:
  push:
    paths:
      - requirements.txt
      - Dockerfile
  pull_request:
    paths:
      - requirements.txt
      - Dockerfile
  # Changes to requirements.txt packages and Dockerfile may or may not always be compatible with arm etc, so worth testing
  # @todo: some kind of path filter for requirements.txt and Dockerfile
 jobs:
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -6,7 +6,7 @@ Otherwise, it's always best to PR into the `dev` branch.
 Please be sure that all new functionality has a matching test!
-Use `pytest` to validate/test, you can run the existing tests as `pytest tests/test_notifications.py` for example
+Use `pytest` to validate/test, you can run the existing tests as `pytest tests/test_notification.py` for example
 ```
 pip3 install -r requirements-dev
--- a/1
+++ b/1
@ -64,6 +64,7 @@ EXPOSE 5000
 # The actual flask app
 COPY changedetectionio /app/changedetectionio
 # The eventlet server wrapper
 COPY changedetection.py /app/changedetection.py
--- a/MANIFEST.in
+++ b/MANIFEST.in
@ -2,6 +2,7 @@ recursive-include changedetectionio/api *
 recursive-include changedetectionio/templates *
 recursive-include changedetectionio/static *
 recursive-include changedetectionio/model *
 recursive-include changedetectionio/tests *
 include changedetection.py
 global-exclude *.pyc
 global-exclude node_modules
--- a/README.md
+++ b/README.md
@ -161,50 +161,14 @@ This will re-parse the JSON and apply formatting to the text, making it super ea
 ### JSONPath or jq?
-For more complex parsing, filtering, and modifying of JSON data, jq is recommended due to the built-in operators and functions. Refer to the [documentation](https://stedolan.github.io/jq/manual/) for more information on jq.
+For more complex parsing, filtering, and modifying of JSON data, jq is recommended due to the built-in operators and functions. Refer to the [documentation](https://stedolan.github.io/jq/manual/) for more specifc information on jq.
-Notes:
+One big advantage of `jq` is that you can use logic in your JSON filter, such as filters to only show items that have a value greater than/less than etc.
 - `jq` must be added manually separately from the installation of changedetection.io (simply run `pip3 install jq`)
 - `jq` is not available on Windows or must be manually compiled (No "wheel" package available on pypi)
- The example below adds the price in dollars to each item in the JSON data, and then filters to only show items that are greater than 10.
+See the wiki https://github.com/dgtlmoon/changedetection.io/wiki/JSON-Selector-Filter-help for more information and examples
-#### Sample input data from API
+Note: `jq` library must be added separately (`pip3 install jq`)
 ```
 {
    "items": [
        {
           "name": "Product A",
           "priceInCents": 2500
        },
        {
           "name": "Product B",
           "priceInCents": 500
        },
        {
           "name": "Product C",
           "priceInCents": 2000
        }
    ]
 }
 ```
 #### Sample jq
 `jq:.items[] | . + { "priceInDollars": (.priceInCents / 100) } | select(.priceInDollars > 10)`
 #### Sample output data
 ```
 {
  "name": "Product A",
  "priceInCents": 2500,
  "priceInDollars": 25
 }
 {
  "name": "Product C",
  "priceInCents": 2000,
  "priceInDollars": 20
 }
 ```
 ### Parse JSON embedded in HTML!
--- a/changedetectionio/init.py
+++ b/changedetectionio/init.py
@ -33,7 +33,7 @@ from flask_wtf import CSRFProtect
 from changedetectionio import html_tools
 from changedetectionio.api import api_v1
-__version__ = '0.39.20.2'
+__version__ = '0.39.20.4'
 datastore = None
@ -194,6 +194,9 @@ def changedetection_app(config=None, datastore_o=None):
    watch_api.add_resource(api_v1.Watch, '/api/v1/watch/<string:uuid>',
                           resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
    watch_api.add_resource(api_v1.SystemInfo, '/api/v1/systeminfo',
                           resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
@ -816,8 +819,10 @@ def changedetection_app(config=None, datastore_o=None):
        newest_file = history[dates[-1]]
        # Read as binary and force decode as UTF-8
        # Windows may fail decode in python if we just use 'r' mode (chardet decode exception)
        try:
-            with open(newest_file, 'r') as f:
+            with open(newest_file, 'r', encoding='utf-8', errors='ignore') as f:
                newest_version_file_contents = f.read()
        except Exception as e:
            newest_version_file_contents = "Unable to read {}.\n".format(newest_file)
@ -830,7 +835,7 @@ def changedetection_app(config=None, datastore_o=None):
            previous_file = history[dates[-2]]
        try:
-            with open(previous_file, 'r') as f:
+            with open(previous_file, 'r', encoding='utf-8', errors='ignore') as f:
                previous_version_file_contents = f.read()
        except Exception as e:
            previous_version_file_contents = "Unable to read {}.\n".format(previous_file)
@ -907,7 +912,7 @@ def changedetection_app(config=None, datastore_o=None):
        timestamp = list(watch.history.keys())[-1]
        filename = watch.history[timestamp]
        try:
-            with open(filename, 'r') as f:
+            with open(filename, 'r', encoding='utf-8', errors='ignore') as f:
                tmp = f.readlines()
                # Get what needs to be highlighted
--- a/changedetectionio/api/api_v1.py
+++ b/changedetectionio/api/api_v1.py
@ -122,3 +122,37 @@ class CreateWatch(Resource):
            return {'status': "OK"}, 200
        return list, 200
 class SystemInfo(Resource):
    def __init__(self, **kwargs):
        # datastore is a black box dependency
        self.datastore = kwargs['datastore']
        self.update_q = kwargs['update_q']
    @auth.check_token
    def get(self):
        import time
        overdue_watches = []
        # Check all watches and report which have not been checked but should have been
        for uuid, watch in self.datastore.data.get('watching', {}).items():
            # see if now - last_checked is greater than the time that should have been
            # this is not super accurate (maybe they just edited it) but better than nothing
            t = watch.threshold_seconds()
            if not t:
                # Use the system wide default
                t = self.datastore.threshold_seconds
            time_since_check = time.time() - watch.get('last_checked')
            # Allow 5 minutes of grace time before we decide it's overdue
            if time_since_check - (5 * 60) > t:
                overdue_watches.append(uuid)
        return {
                   'queue_size': self.update_q.qsize(),
                   'overdue_watches': overdue_watches,
                   'uptime': round(time.time() - self.datastore.start_time, 2),
                   'watch_count': len(self.datastore.data.get('watching', {}))
               }, 200
--- a/changedetectionio/changedetection.py
+++ b/changedetectionio/changedetection.py
@ -102,6 +102,14 @@ def main():
                    has_password=datastore.data['settings']['application']['password'] != False
                    )
    # Monitored websites will not receive a Referer header
    # when a user clicks on an outgoing link.
    @app.after_request
    def hide_referrer(response):
        if os.getenv("HIDE_REFERER", False):
            response.headers["Referrer-Policy"] = "no-referrer"
        return response
    # Proxy sub-directory support
    # Set environment var USE_X_SETTINGS=1 on this script
    # And then in your proxy_pass settings
--- a/changedetectionio/model/Watch.py
+++ b/changedetectionio/model/Watch.py
@ -120,7 +120,10 @@ class model(dict):
        if os.path.isfile(fname):
            logging.debug("Reading history index " + str(time.time()))
            with open(fname, "r") as f:
-                tmp_history = dict(i.strip().split(',', 2) for i in f.readlines())
+                for i in f.readlines():
                    if ',' in i:
                        k, v = i.strip().split(',', 2)
                        tmp_history[k] = v
        if len(tmp_history):
            self.__newest_history_key = list(tmp_history.keys())[-1]
@ -153,20 +156,22 @@ class model(dict):
        import uuid
        import logging
-        output_path = "{}/{}".format(self.__datastore_path, self['uuid'])
+        output_path = os.path.join(self.__datastore_path, self['uuid'])
        self.ensure_data_dir_exists()
        snapshot_fname = os.path.join(output_path, str(uuid.uuid4()))
        snapshot_fname = "{}/{}.stripped.txt".format(output_path, uuid.uuid4())
        logging.debug("Saving history text {}".format(snapshot_fname))
        # in /diff/ and /preview/ we are going to assume for now that it's UTF-8 when reading
        # most sites are utf-8 and some are even broken utf-8
        with open(snapshot_fname, 'wb') as f:
            f.write(contents)
            f.close()
        # Append to index
        # @todo check last char was \n
-        index_fname = "{}/history.txt".format(output_path)
+        index_fname = os.path.join(output_path, "history.txt")
        with open(index_fname, 'a') as f:
            f.write("{},{}\n".format(timestamp, snapshot_fname))
            f.close()
--- a/changedetectionio/run_all_tests.sh
+++ b/changedetectionio/run_all_tests.sh
@ -9,6 +9,8 @@
 # exit when any command fails
 set -e
 SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
 find tests/test_*py -type f|while read test_name
 do
  echo "TEST RUNNING $test_name"
@ -45,7 +47,9 @@ docker kill $$-test_selenium
 echo "TESTING WEBDRIVER FETCH > PLAYWRIGHT/BROWSERLESS..."
 # Not all platforms support playwright (not ARM/rPI), so it's not packaged in requirements.txt
-pip3 install playwright~=1.24
+PLAYWRIGHT_VERSION=$(grep -i -E "RUN pip install.+" "$SCRIPT_DIR/../Dockerfile" | grep --only-matching -i -E "playwright[=><~+]+[0-9\.]+")
 echo "using $PLAYWRIGHT_VERSION"
 pip3 install "$PLAYWRIGHT_VERSION"
 docker run -d --name $$-test_browserless -e "DEFAULT_LAUNCH_ARGS=[\"--window-size=1920,1080\"]" --rm  -p 3000:3000  --shm-size="2g"  browserless/chrome:1.53-chrome-stable
 # takes a while to spin up
 sleep 5
--- a/changedetectionio/store.py
+++ b/changedetectionio/store.py
@ -30,14 +30,14 @@ class ChangeDetectionStore:
    def __init__(self, datastore_path="/datastore", include_default_watches=True, version_tag="0.0.0"):
        # Should only be active for docker
        # logging.basicConfig(filename='/dev/stdout', level=logging.INFO)
-        self.needs_write = False
+        self.__data = App.model()
        self.datastore_path = datastore_path
        self.json_store_path = "{}/url-watches.json".format(self.datastore_path)
        self.needs_write = False
        self.proxy_list = None
        self.start_time = time.time()
        self.stop_thread = False
        self.__data = App.model()
        # Base definition for all watchers
        # deepcopy part of #569 - not sure why its needed exactly
        self.generic_definition = deepcopy(Watch.model(datastore_path = datastore_path, default={}))
--- a/changedetectionio/tests/test_api.py
+++ b/changedetectionio/tests/test_api.py
@ -147,6 +147,16 @@ def test_api_simple(client, live_server):
    # @todo how to handle None/default global values?
    assert watch['history_n'] == 2, "Found replacement history section, which is in its own API"
    # basic systeminfo check
    res = client.get(
        url_for("systeminfo"),
        headers={'x-api-key': api_key},
    )
    info = json.loads(res.data)
    assert info.get('watch_count') == 1
    assert info.get('uptime') > 0.5
    # Finally delete the watch
    res = client.delete(
        url_for("watch", uuid=watch_uuid),
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -45,6 +45,9 @@ services:
  #        Respect proxy_pass type settings, `proxy_set_header Host "localhost";` and `proxy_set_header X-Forwarded-Prefix /app;`
  #        More here https://github.com/dgtlmoon/changedetection.io/wiki/Running-changedetection.io-behind-a-reverse-proxy-sub-directory
  #      - USE_X_SETTINGS=1
  #
  #        Hides the `Referer` header so that monitored websites can't see the changedetection.io hostname.
  #      - HIDE_REFERER=true
      # Comment out ports: when using behind a reverse proxy , enable networks: etc.
      ports:
--- a/requirements.txt
+++ b/requirements.txt
@ -47,3 +47,4 @@ selenium ~= 4.1.0
 werkzeug ~= 2.0.0
 # playwright is installed at Dockerfile build time because it's not available on all platforms
`@ -47,3 +47,4 @@ selenium ~= 4.1.0`
	`werkzeug ~= 2.0.0`	`werkzeug ~= 2.0.0`

	`# playwright is installed at Dockerfile build time because it's not available on all platforms`	`# playwright is installed at Dockerfile build time because it's not available on all platforms`