diff --git a/.github/workflows/containers.yml b/.github/workflows/containers.yml index 0397c026..8c0145d7 100644 --- a/.github/workflows/containers.yml +++ b/.github/workflows/containers.yml @@ -88,14 +88,14 @@ jobs: - name: Build and push :dev id: docker_build if: ${{ github.ref }} == "refs/heads/master" - uses: docker/build-push-action@v5 + uses: docker/build-push-action@v6 with: context: ./ file: ./Dockerfile push: true tags: | ${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:dev,ghcr.io/${{ github.repository }}:dev - platforms: linux/amd64,linux/arm64,linux/arm/v6,linux/arm/v7,linux/arm/v8 + platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8,linux/arm64/v8 cache-from: type=gha cache-to: type=gha,mode=max @@ -106,7 +106,7 @@ jobs: - name: Build and push :tag id: docker_build_tag_release if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.') - uses: docker/build-push-action@v5 + uses: docker/build-push-action@v6 with: context: ./ file: ./Dockerfile @@ -116,7 +116,7 @@ jobs: ghcr.io/dgtlmoon/changedetection.io:${{ github.event.release.tag_name }} ${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:latest ghcr.io/dgtlmoon/changedetection.io:latest - platforms: linux/amd64,linux/arm64,linux/arm/v6,linux/arm/v7,linux/arm/v8 + platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8,linux/arm64/v8 cache-from: type=gha cache-to: type=gha,mode=max # Looks like this was disabled diff --git a/.github/workflows/test-container-build.yml b/.github/workflows/test-container-build.yml index c6fd9efb..1e5257bc 100644 --- a/.github/workflows/test-container-build.yml +++ b/.github/workflows/test-container-build.yml @@ -51,7 +51,7 @@ jobs: # Check we can still build under alpine/musl - name: Test that the docker containers can build (musl via alpine check) id: docker_build_musl - uses: docker/build-push-action@v5 + uses: docker/build-push-action@v6 with: context: ./ file: ./.github/test/Dockerfile-alpine @@ -59,12 +59,12 @@ jobs: - name: Test that the docker containers can build id: docker_build - uses: docker/build-push-action@v5 + uses: docker/build-push-action@v6 # https://github.com/docker/build-push-action#customizing with: context: ./ file: ./Dockerfile - platforms: linux/amd64,linux/arm64,linux/arm/v6,linux/arm/v7,linux/arm/v8 + platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8,linux/arm64/v8 cache-from: type=local,src=/tmp/.buildx-cache cache-to: type=local,dest=/tmp/.buildx-cache diff --git a/.github/workflows/test-only.yml b/.github/workflows/test-only.yml index 5483e6f6..69e42cba 100644 --- a/.github/workflows/test-only.yml +++ b/.github/workflows/test-only.yml @@ -4,17 +4,10 @@ name: ChangeDetection.io App Test on: [push, pull_request] jobs: - test-application: + lint-code: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - # Mainly just for link/flake8 - - name: Set up Python 3.11 - uses: actions/setup-python@v5 - with: - python-version: '3.11' - - name: Lint with flake8 run: | pip3 install flake8 @@ -23,202 +16,24 @@ jobs: # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - - name: Spin up ancillary testable services - run: | - - docker network create changedet-network - - # Selenium - docker run --network changedet-network -d --hostname selenium -p 4444:4444 --rm --shm-size="2g" selenium/standalone-chrome:4 - - # SocketPuppetBrowser + Extra for custom browser test - docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest - docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser-custom-url --hostname sockpuppetbrowser-custom-url -p 3001:3000 --rm dgtlmoon/sockpuppetbrowser:latest - - - name: Build changedetection.io container for testing - run: | - # Build a changedetection.io container and start testing inside - docker build --build-arg LOGGER_LEVEL=TRACE -t test-changedetectionio . - # Debug info - docker run test-changedetectionio bash -c 'pip list' - - - name: Spin up ancillary SMTP+Echo message test server - run: | - # Debug SMTP server/echo message back server - docker run --network changedet-network -d -p 11025:11025 -p 11080:11080 --hostname mailserver test-changedetectionio bash -c 'python changedetectionio/tests/smtp/smtp-test-server.py' - - - name: Show docker container state and other debug info - run: | - set -x - echo "Running processes in docker..." - docker ps - - - name: Test built container with Pytest (generally as requests/plaintext fetching) - run: | - # Unit tests - echo "run test with unittest" - docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_notification_diff' - docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_watch_model' - docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_jinja2_security' - - # All tests - echo "run test with pytest" - # The default pytest logger_level is TRACE - # To change logger_level for pytest(test/conftest.py), - # append the docker option. e.g. '-e LOGGER_LEVEL=DEBUG' - docker run --name test-cdio-basic-tests --network changedet-network test-changedetectionio bash -c 'cd changedetectionio && ./run_basic_tests.sh' - -# PLAYWRIGHT/NODE-> CDP - - name: Playwright and SocketPuppetBrowser - Specific tests in built container - run: | - # Playwright via Sockpuppetbrowser fetch - # tests/visualselector/test_fetch_data.py will do browser steps - docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py' - docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py' - docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py' - docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py' - - - - name: Playwright and SocketPuppetBrowser - Headers and requests - run: | - # Settings headers playwright tests - Call back in from Sockpuppetbrowser, check headers - docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000?dumpio=true" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py' - - - name: Playwright and SocketPuppetBrowser - Restock detection - run: | - # restock detection via playwright - added name=changedet here so that playwright and sockpuppetbrowser can connect to it - docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py' - -# STRAIGHT TO CDP - - name: Pyppeteer and SocketPuppetBrowser - Specific tests in built container - run: | - # Playwright via Sockpuppetbrowser fetch - docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py' - docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py' - docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py' - docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py' - - - name: Pyppeteer and SocketPuppetBrowser - Headers and requests checks - run: | - # Settings headers playwright tests - Call back in from Sockpuppetbrowser, check headers - docker run --name "changedet" --hostname changedet --rm -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000?dumpio=true" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py' + test-application-3-10: + needs: lint-code + uses: ./.github/workflows/test-stack-reusable-workflow.yml + with: + python-version: '3.10' - - name: Pyppeteer and SocketPuppetBrowser - Restock detection - run: | - # restock detection via playwright - added name=changedet here so that playwright and sockpuppetbrowser can connect to it - docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py' -# SELENIUM - - name: Specific tests in built container for Selenium - run: | - # Selenium fetch - docker run --rm -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py' - - - name: Specific tests in built container for headers and requests checks with Selenium - run: | - docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py' - -# OTHER STUFF - - name: Test SMTP notification mime types - run: | - # SMTP content types - needs the 'Debug SMTP server/echo message back server' container from above - docker run --rm --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/smtp/test_notification_smtp.py' - - # @todo Add a test via playwright/puppeteer - # squid with auth is tested in run_proxy_tests.sh -> tests/proxy_list/test_select_custom_proxy.py - - name: Test proxy squid style interaction - run: | - cd changedetectionio - ./run_proxy_tests.sh - cd .. - - - name: Test proxy SOCKS5 style interaction - run: | - cd changedetectionio - ./run_socks_proxy_tests.sh - cd .. + test-application-3-11: + needs: lint-code + uses: ./.github/workflows/test-stack-reusable-workflow.yml + with: + python-version: '3.11' + skip-pypuppeteer: true - - name: Test custom browser URL - run: | - cd changedetectionio - ./run_custom_browser_url_tests.sh - cd .. - - - name: Test changedetection.io container starts+runs basically without error - run: | - docker run --name test-changedetectionio -p 5556:5000 -d test-changedetectionio - sleep 3 - # Should return 0 (no error) when grep finds it - curl --retry-connrefused --retry 6 -s http://localhost:5556 |grep -q checkbox-uuid - - # and IPv6 - curl --retry-connrefused --retry 6 -s -g -6 "http://[::1]:5556"|grep -q checkbox-uuid - - # Check whether TRACE log is enabled. - # Also, check whether TRACE is came from STDERR - docker logs test-changedetectionio 2>&1 1>/dev/null | grep 'TRACE log is enabled' || exit 1 - # Check whether DEBUG is came from STDOUT - docker logs test-changedetectionio 2>/dev/null | grep 'DEBUG' || exit 1 - - docker kill test-changedetectionio - - - name: Test changedetection.io SIGTERM and SIGINT signal shutdown - run: | - - echo SIGINT Shutdown request test - docker run --name sig-test -d test-changedetectionio - sleep 3 - echo ">>> Sending SIGINT to sig-test container" - docker kill --signal=SIGINT sig-test - sleep 3 - # invert the check (it should be not 0/not running) - docker ps - # check signal catch(STDERR) log. Because of - # changedetectionio/__init__.py: logger.add(sys.stderr, level=logger_level) - docker logs sig-test 2>&1 | grep 'Shutdown: Got Signal - SIGINT' || exit 1 - test -z "`docker ps|grep sig-test`" - if [ $? -ne 0 ] - then - echo "Looks like container was running when it shouldnt be" - docker ps - exit 1 - fi - - # @todo - scan the container log to see the right "graceful shutdown" text exists - docker rm sig-test - - echo SIGTERM Shutdown request test - docker run --name sig-test -d test-changedetectionio - sleep 3 - echo ">>> Sending SIGTERM to sig-test container" - docker kill --signal=SIGTERM sig-test - sleep 3 - # invert the check (it should be not 0/not running) - docker ps - # check signal catch(STDERR) log. Because of - # changedetectionio/__init__.py: logger.add(sys.stderr, level=logger_level) - docker logs sig-test 2>&1 | grep 'Shutdown: Got Signal - SIGTERM' || exit 1 - test -z "`docker ps|grep sig-test`" - if [ $? -ne 0 ] - then - echo "Looks like container was running when it shouldnt be" - docker ps - exit 1 - fi - - # @todo - scan the container log to see the right "graceful shutdown" text exists - docker rm sig-test - - - name: Dump container log - if: always() - run: | - mkdir output-logs - docker logs test-cdio-basic-tests > output-logs/test-cdio-basic-tests-stdout.txt - docker logs test-cdio-basic-tests 2> output-logs/test-cdio-basic-tests-stderr.txt + test-application-3-12: + needs: lint-code + uses: ./.github/workflows/test-stack-reusable-workflow.yml + with: + python-version: '3.12' + skip-pypuppeteer: true - - name: Store container log - if: always() - uses: actions/upload-artifact@v4 - with: - name: test-cdio-basic-tests-output - path: output-logs diff --git a/.github/workflows/test-stack-reusable-workflow.yml b/.github/workflows/test-stack-reusable-workflow.yml new file mode 100644 index 00000000..f2864680 --- /dev/null +++ b/.github/workflows/test-stack-reusable-workflow.yml @@ -0,0 +1,239 @@ +name: ChangeDetection.io App Test + +on: + workflow_call: + inputs: + python-version: + description: 'Python version to use' + required: true + type: string + default: '3.10' + skip-pypuppeteer: + description: 'Skip PyPuppeteer (not supported in 3.11/3.12)' + required: false + type: boolean + default: false + +jobs: + test-application: + runs-on: ubuntu-latest + env: + PYTHON_VERSION: ${{ inputs.python-version }} + steps: + - uses: actions/checkout@v4 + + # Mainly just for link/flake8 + - name: Set up Python ${{ env.PYTHON_VERSION }} + uses: actions/setup-python@v5 + with: + python-version: ${{ env.PYTHON_VERSION }} + + - name: Build changedetection.io container for testing under Python ${{ env.PYTHON_VERSION }} + run: | + echo "---- Building for Python ${{ env.PYTHON_VERSION }} -----" + # Build a changedetection.io container and start testing inside + docker build --build-arg PYTHON_VERSION=${{ env.PYTHON_VERSION }} --build-arg LOGGER_LEVEL=TRACE -t test-changedetectionio . + # Debug info + docker run test-changedetectionio bash -c 'pip list' + + - name: We should be Python ${{ env.PYTHON_VERSION }} ... + run: | + docker run test-changedetectionio bash -c 'python3 --version' + + - name: Spin up ancillary testable services + run: | + + docker network create changedet-network + + # Selenium + docker run --network changedet-network -d --hostname selenium -p 4444:4444 --rm --shm-size="2g" selenium/standalone-chrome:4 + + # SocketPuppetBrowser + Extra for custom browser test + docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest + docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser-custom-url --hostname sockpuppetbrowser-custom-url -p 3001:3000 --rm dgtlmoon/sockpuppetbrowser:latest + + - name: Spin up ancillary SMTP+Echo message test server + run: | + # Debug SMTP server/echo message back server + docker run --network changedet-network -d -p 11025:11025 -p 11080:11080 --hostname mailserver test-changedetectionio bash -c 'pip3 install aiosmtpd && python changedetectionio/tests/smtp/smtp-test-server.py' + docker ps + + - name: Show docker container state and other debug info + run: | + set -x + echo "Running processes in docker..." + docker ps + + - name: Test built container with Pytest (generally as requests/plaintext fetching) + run: | + # Unit tests + echo "run test with unittest" + docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_notification_diff' + docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_watch_model' + docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_jinja2_security' + + # All tests + echo "run test with pytest" + # The default pytest logger_level is TRACE + # To change logger_level for pytest(test/conftest.py), + # append the docker option. e.g. '-e LOGGER_LEVEL=DEBUG' + docker run --name test-cdio-basic-tests --network changedet-network test-changedetectionio bash -c 'cd changedetectionio && ./run_basic_tests.sh' + +# PLAYWRIGHT/NODE-> CDP + - name: Playwright and SocketPuppetBrowser - Specific tests in built container + run: | + # Playwright via Sockpuppetbrowser fetch + # tests/visualselector/test_fetch_data.py will do browser steps + docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py' + docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py' + docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py' + docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py' + + + - name: Playwright and SocketPuppetBrowser - Headers and requests + run: | + # Settings headers playwright tests - Call back in from Sockpuppetbrowser, check headers + docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000?dumpio=true" --network changedet-network test-changedetectionio bash -c 'find .; cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py; pwd;find .' + + - name: Playwright and SocketPuppetBrowser - Restock detection + run: | + # restock detection via playwright - added name=changedet here so that playwright and sockpuppetbrowser can connect to it + docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py' + +# STRAIGHT TO CDP + - name: Pyppeteer and SocketPuppetBrowser - Specific tests in built container + if: ${{ inputs.skip-pypuppeteer == false }} + run: | + # Playwright via Sockpuppetbrowser fetch + docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py' + docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py' + docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py' + docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py' + + - name: Pyppeteer and SocketPuppetBrowser - Headers and requests checks + if: ${{ inputs.skip-pypuppeteer == false }} + run: | + # Settings headers playwright tests - Call back in from Sockpuppetbrowser, check headers + docker run --name "changedet" --hostname changedet --rm -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000?dumpio=true" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py' + + - name: Pyppeteer and SocketPuppetBrowser - Restock detection + if: ${{ inputs.skip-pypuppeteer == false }} + run: | + # restock detection via playwright - added name=changedet here so that playwright and sockpuppetbrowser can connect to it + docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py' + +# SELENIUM + - name: Specific tests in built container for Selenium + run: | + # Selenium fetch + docker run --rm -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py' + + - name: Specific tests in built container for headers and requests checks with Selenium + run: | + docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py' + +# OTHER STUFF + - name: Test SMTP notification mime types + run: | + # SMTP content types - needs the 'Debug SMTP server/echo message back server' container from above + # "mailserver" hostname defined above + docker run --rm --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/smtp/test_notification_smtp.py' + + # @todo Add a test via playwright/puppeteer + # squid with auth is tested in run_proxy_tests.sh -> tests/proxy_list/test_select_custom_proxy.py + - name: Test proxy squid style interaction + run: | + cd changedetectionio + ./run_proxy_tests.sh + cd .. + + - name: Test proxy SOCKS5 style interaction + run: | + cd changedetectionio + ./run_socks_proxy_tests.sh + cd .. + + - name: Test custom browser URL + run: | + cd changedetectionio + ./run_custom_browser_url_tests.sh + cd .. + + - name: Test changedetection.io container starts+runs basically without error + run: | + docker run --name test-changedetectionio -p 5556:5000 -d test-changedetectionio + sleep 3 + # Should return 0 (no error) when grep finds it + curl --retry-connrefused --retry 6 -s http://localhost:5556 |grep -q checkbox-uuid + + # and IPv6 + curl --retry-connrefused --retry 6 -s -g -6 "http://[::1]:5556"|grep -q checkbox-uuid + + # Check whether TRACE log is enabled. + # Also, check whether TRACE is came from STDERR + docker logs test-changedetectionio 2>&1 1>/dev/null | grep 'TRACE log is enabled' || exit 1 + # Check whether DEBUG is came from STDOUT + docker logs test-changedetectionio 2>/dev/null | grep 'DEBUG' || exit 1 + + docker kill test-changedetectionio + + - name: Test changedetection.io SIGTERM and SIGINT signal shutdown + run: | + + echo SIGINT Shutdown request test + docker run --name sig-test -d test-changedetectionio + sleep 3 + echo ">>> Sending SIGINT to sig-test container" + docker kill --signal=SIGINT sig-test + sleep 3 + # invert the check (it should be not 0/not running) + docker ps + # check signal catch(STDERR) log. Because of + # changedetectionio/__init__.py: logger.add(sys.stderr, level=logger_level) + docker logs sig-test 2>&1 | grep 'Shutdown: Got Signal - SIGINT' || exit 1 + test -z "`docker ps|grep sig-test`" + if [ $? -ne 0 ] + then + echo "Looks like container was running when it shouldnt be" + docker ps + exit 1 + fi + + # @todo - scan the container log to see the right "graceful shutdown" text exists + docker rm sig-test + + echo SIGTERM Shutdown request test + docker run --name sig-test -d test-changedetectionio + sleep 3 + echo ">>> Sending SIGTERM to sig-test container" + docker kill --signal=SIGTERM sig-test + sleep 3 + # invert the check (it should be not 0/not running) + docker ps + # check signal catch(STDERR) log. Because of + # changedetectionio/__init__.py: logger.add(sys.stderr, level=logger_level) + docker logs sig-test 2>&1 | grep 'Shutdown: Got Signal - SIGTERM' || exit 1 + test -z "`docker ps|grep sig-test`" + if [ $? -ne 0 ] + then + echo "Looks like container was running when it shouldnt be" + docker ps + exit 1 + fi + + # @todo - scan the container log to see the right "graceful shutdown" text exists + docker rm sig-test + + - name: Dump container log + if: always() + run: | + mkdir output-logs + docker logs test-cdio-basic-tests > output-logs/test-cdio-basic-tests-stdout-${{ env.PYTHON_VERSION }}.txt + docker logs test-cdio-basic-tests 2> output-logs/test-cdio-basic-tests-stderr-${{ env.PYTHON_VERSION }}.txt + + - name: Store everything including test-datastore + if: always() + uses: actions/upload-artifact@v4 + with: + name: test-cdio-basic-tests-output-py${{ env.PYTHON_VERSION }} + path: . diff --git a/COMMERCIAL_LICENCE.md b/COMMERCIAL_LICENCE.md new file mode 100644 index 00000000..9ac72335 --- /dev/null +++ b/COMMERCIAL_LICENCE.md @@ -0,0 +1,54 @@ +# Generally + +In any commercial activity involving 'Hosting' (as defined herein), whether in part or in full, this license must be executed and adhered to. + +# Commercial License Agreement + +This Commercial License Agreement ("Agreement") is entered into by and between Mr Morresi (the original creator of this software) here-in ("Licensor") and (your company or personal name) _____________ ("Licensee"). This Agreement sets forth the terms and conditions under which Licensor provides its software ("Software") and services to Licensee for the purpose of reselling the software either in part or full, as part of any commercial activity where the activity involves a third party. + +### Definition of Hosting + +For the purposes of this Agreement, "hosting" means making the functionality of the Program or modified version available to third parties as a service. This includes, without limitation: +- Enabling third parties to interact with the functionality of the Program or modified version remotely through a computer network. +- Offering a service the value of which entirely or primarily derives from the value of the Program or modified version. +- Offering a service that accomplishes for users the primary purpose of the Program or modified version. + +## 1. Grant of License +Subject to the terms and conditions of this Agreement, Licensor grants Licensee a non-exclusive, non-transferable license to install, use, and resell the Software. Licensee may: +- Resell the Software as part of a service offering or as a standalone product. +- Host the Software on a server and provide it as a hosted service (e.g., Software as a Service - SaaS). +- Integrate the Software into a larger product or service that is then sold or provided for commercial purposes, where the software is used either in part or full. + +## 2. License Fees +Licensee agrees to pay Licensor the license fees specified in the ordering document. License fees are due and payable as specified in the ordering document. The fees may include initial licensing costs and recurring fees based on the number of end users, instances of the Software resold, or revenue generated from the resale activities. + +## 3. Resale Conditions +Licensee must comply with the following conditions when reselling the Software, whether the software is resold in part or full: +- Provide end users with access to the source code under the same open-source license conditions as provided by Licensor. +- Clearly state in all marketing and sales materials that the Software is provided under a commercial license from Licensor, and provide a link back to https://changedetection.io. +- Ensure end users are aware of and agree to the terms of the commercial license prior to resale. +- Do not sublicense or transfer the Software to third parties except as part of an authorized resale activity. + +## 4. Hosting and Provision of Services +Licensee may host the Software (either in part or full) on its servers and provide it as a hosted service to end users. The following conditions apply: +- Licensee must ensure that all hosted versions of the Software comply with the terms of this Agreement. +- Licensee must provide Licensor with regular reports detailing the number of end users and instances of the hosted service. +- Any modifications to the Software made by Licensee for hosting purposes must be made available to end users under the same open-source license conditions, unless agreed otherwise. + +## 5. Services +Licensor will provide support and maintenance services as described in the support policy referenced in the ordering document should such an agreement be signed by all parties. Additional fees may apply for support services provided to end users resold by Licensee. + +## 6. Reporting and Audits +Licensee agrees to provide Licensor with regular reports detailing the number of instances, end users, and revenue generated from the resale of the Software. Licensor reserves the right to audit Licensee’s records to ensure compliance with this Agreement. + +## 7. Term and Termination +This Agreement shall commence on the effective date and continue for the period set forth in the ordering document unless terminated earlier in accordance with this Agreement. Either party may terminate this Agreement if the other party breaches any material term and fails to cure such breach within thirty (30) days after receipt of written notice. + +## 8. Limitation of Liability and Disclaimer of Warranty +Executing this commercial license does not waive the Limitation of Liability or Disclaimer of Warranty as stated in the open-source LICENSE provided with the Software. The Software is provided "as is," without warranty of any kind, express or implied, including but not limited to the warranties of merchantability, fitness for a particular purpose, and noninfringement. In no event shall the authors or copyright holders be liable for any claim, damages, or other liability, whether in an action of contract, tort, or otherwise, arising from, out of, or in connection with the Software or the use or other dealings in the Software. + +## 9. Governing Law +This Agreement shall be governed by and construed in accordance with the laws of the Czech Republic. + +## Contact Information +For commercial licensing inquiries, please contact contact@changedetection.io and dgtlmoon@gmail.com. diff --git a/Dockerfile b/Dockerfile index e592c9bb..6641b947 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,10 @@ # @NOTE! I would love to move to 3.11 but it breaks the async handler in changedetectionio/content_fetchers/puppeteer.py # If you know how to fix it, please do! and test it for both 3.10 and 3.11 -FROM python:3.10-slim-bookworm as builder + +ARG PYTHON_VERSION=3.11 + +FROM python:${PYTHON_VERSION}-slim-bookworm AS builder # See `cryptography` pin comment in requirements.txt ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1 @@ -23,7 +26,8 @@ WORKDIR /install COPY requirements.txt /requirements.txt -RUN pip install --target=/dependencies -r /requirements.txt +# --extra-index-url https://www.piwheels.org/simple is for cryptography module to be prebuilt (or rustc etc needs to be installed) +RUN pip install --extra-index-url https://www.piwheels.org/simple --target=/dependencies -r /requirements.txt # Playwright is an alternative to Selenium # Excluded this package from requirements.txt to prevent arm/v6 and arm/v7 builds from failing @@ -32,10 +36,12 @@ RUN pip install --target=/dependencies playwright~=1.41.2 \ || echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled." # Final image stage -FROM python:3.10-slim-bookworm +FROM python:${PYTHON_VERSION}-slim-bookworm RUN apt-get update && apt-get install -y --no-install-recommends \ libxslt1.1 \ + # For presenting price amounts correctly in the restock/price detection overview + locales \ # For pdftohtml poppler-utils \ zlib1g \ diff --git a/README.md b/README.md index d941eccb..87451d24 100644 --- a/README.md +++ b/README.md @@ -41,6 +41,20 @@ Using the **Browser Steps** configuration, add basic steps before performing cha After **Browser Steps** have been run, then visit the **Visual Selector** tab to refine the content you're interested in. Requires Playwright to be enabled. +### Awesome restock and price change notifications + +Enable the _"Re-stock & Price detection for single product pages"_ option to activate the best way to monitor product pricing, this will extract any meta-data in the HTML page and give you many options to follow the pricing of the product. + +Easily organise and monitor prices for products from the dashboard, get alerts and notifications when the price of a product changes or comes back in stock again! + +[Easily keep an eye on product price changes directly from the UI](https://changedetection.io?src=github) + +Set price change notification parameters, upper and lower price, price change percentage and more. +Always know when a product for sale drops in price. + +[Set upper lower and percentage price change notification values](https://changedetection.io?src=github) + + ### Example use cases @@ -272,6 +286,10 @@ I offer commercial support, this software is depended on by network security, ae [release-link]: https://github.com/dgtlmoon/changedetection.io/releases [docker-link]: https://hub.docker.com/r/dgtlmoon/changedetection.io +## Commercial Licencing + +If you are reselling this software either in part or full as part of any commercial arrangement, you must abide by our COMMERCIAL_LICENCE.md found in our code repository, please contact dgtlmoon@gmail.com and contact@changedetection.io . + ## Third-party licenses changedetectionio.html_tools.elementpath_tostring: Copyright (c), 2018-2021, SISSA (Scuola Internazionale Superiore di Studi Avanzati), Licensed under [MIT license](https://github.com/sissaschool/elementpath/blob/master/LICENSE) diff --git a/changedetection.py b/changedetection.py index 2d0b5d2c..ead2b8c5 100755 --- a/changedetection.py +++ b/changedetection.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # Only exists for direct CLI usage diff --git a/changedetectionio/__init__.py b/changedetectionio/__init__.py index 6fb76ce4..5ec6f891 100644 --- a/changedetectionio/__init__.py +++ b/changedetectionio/__init__.py @@ -1,8 +1,8 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # Read more https://github.com/dgtlmoon/changedetection.io/wiki -__version__ = '0.45.23' +__version__ = '0.46.02' from changedetectionio.strtobool import strtobool from json.decoder import JSONDecodeError diff --git a/changedetectionio/api/api_v1.py b/changedetectionio/api/api_v1.py index 85e2b30e..9b3eb440 100644 --- a/changedetectionio/api/api_v1.py +++ b/changedetectionio/api/api_v1.py @@ -12,9 +12,10 @@ import copy # See docs/README.md for rebuilding the docs/apidoc information from . import api_schema +from ..model import watch_base # Build a JSON Schema atleast partially based on our Watch model -from changedetectionio.model.Watch import base_config as watch_base_config +watch_base_config = watch_base() schema = api_schema.build_watch_json_schema(watch_base_config) schema_create_watch = copy.deepcopy(schema) @@ -170,23 +171,33 @@ class WatchSingleHistory(Resource): curl http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091/history/1677092977 -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" @apiName Get single snapshot content @apiGroup Watch History + @apiParam {String} [html] Optional Set to =1 to return the last HTML (only stores last 2 snapshots, use `latest` as timestamp) @apiSuccess (200) {String} OK @apiSuccess (404) {String} ERR Not found """ watch = self.datastore.data['watching'].get(uuid) if not watch: - abort(404, message='No watch exists with the UUID of {}'.format(uuid)) + abort(404, message=f"No watch exists with the UUID of {uuid}") if not len(watch.history): - abort(404, message='Watch found but no history exists for the UUID {}'.format(uuid)) + abort(404, message=f"Watch found but no history exists for the UUID {uuid}") if timestamp == 'latest': timestamp = list(watch.history.keys())[-1] - content = watch.get_history_snapshot(timestamp) + if request.args.get('html'): + content = watch.get_fetched_html(timestamp) + if content: + response = make_response(content, 200) + response.mimetype = "text/html" + else: + response = make_response("No content found", 404) + response.mimetype = "text/plain" + else: + content = watch.get_history_snapshot(timestamp) + response = make_response(content, 200) + response.mimetype = "text/plain" - response = make_response(content, 200) - response.mimetype = "text/plain" return response diff --git a/changedetectionio/blueprint/browser_steps/__init__.py b/changedetectionio/blueprint/browser_steps/__init__.py index 30797099..f92bf9f8 100644 --- a/changedetectionio/blueprint/browser_steps/__init__.py +++ b/changedetectionio/blueprint/browser_steps/__init__.py @@ -187,8 +187,10 @@ def construct_blueprint(datastore: ChangeDetectionStore): u = browsersteps_sessions[browsersteps_session_id]['browserstepper'].page.url if is_last_step and u: (screenshot, xpath_data) = browsersteps_sessions[browsersteps_session_id]['browserstepper'].request_visualselector_data() - datastore.save_screenshot(watch_uuid=uuid, screenshot=screenshot) - datastore.save_xpath_data(watch_uuid=uuid, data=xpath_data) + watch = datastore.data['watching'].get(uuid) + if watch: + watch.save_screenshot(screenshot=screenshot) + watch.save_xpath_data(data=xpath_data) # if not this_session.page: # cleanup_playwright_session() diff --git a/changedetectionio/blueprint/browser_steps/browser_steps.py b/changedetectionio/blueprint/browser_steps/browser_steps.py index 76f3d756..6f38be2e 100644 --- a/changedetectionio/blueprint/browser_steps/browser_steps.py +++ b/changedetectionio/blueprint/browser_steps/browser_steps.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 import os import time @@ -255,8 +255,9 @@ class browsersteps_live_ui(steppable_browser_interface): def get_current_state(self): """Return the screenshot and interactive elements mapping, generally always called after action_()""" - from pkg_resources import resource_string - xpath_element_js = resource_string(__name__, "../../content_fetchers/res/xpath_element_scraper.js").decode('utf-8') + import importlib.resources + xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text() + now = time.time() self.page.wait_for_timeout(1 * 1000) @@ -287,11 +288,9 @@ class browsersteps_live_ui(steppable_browser_interface): :param current_include_filters: :return: """ - + import importlib.resources self.page.evaluate("var include_filters=''") - from pkg_resources import resource_string - # The code that scrapes elements and makes a list of elements/size/position to click on in the VisualSelector - xpath_element_js = resource_string(__name__, "../../content_fetchers/res/xpath_element_scraper.js").decode('utf-8') + xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text() from changedetectionio.content_fetchers import visualselector_xpath_selectors xpath_element_js = xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors) xpath_data = self.page.evaluate("async () => {" + xpath_element_js + "}") diff --git a/changedetectionio/blueprint/check_proxies/__init__.py b/changedetectionio/blueprint/check_proxies/__init__.py index 62a7dab3..8d7df73f 100644 --- a/changedetectionio/blueprint/check_proxies/__init__.py +++ b/changedetectionio/blueprint/check_proxies/__init__.py @@ -30,7 +30,7 @@ def construct_blueprint(datastore: ChangeDetectionStore): def long_task(uuid, preferred_proxy): import time from changedetectionio.content_fetchers import exceptions as content_fetcher_exceptions - from changedetectionio.processors import text_json_diff + from changedetectionio.processors.text_json_diff import text_json_diff from changedetectionio.safe_jinja import render as jinja_render status = {'status': '', 'length': 0, 'text': ''} diff --git a/changedetectionio/blueprint/price_data_follower/__init__.py b/changedetectionio/blueprint/price_data_follower/__init__.py index 89a2fc67..a41552d8 100644 --- a/changedetectionio/blueprint/price_data_follower/__init__.py +++ b/changedetectionio/blueprint/price_data_follower/__init__.py @@ -17,6 +17,8 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue @price_data_follower_blueprint.route("//accept", methods=['GET']) def accept(uuid): datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT + datastore.data['watching'][uuid]['processor'] = 'restock_diff' + datastore.data['watching'][uuid].clear_watch() update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False})) return redirect(url_for("index")) diff --git a/changedetectionio/blueprint/tags/__init__.py b/changedetectionio/blueprint/tags/__init__.py index 7a49822b..ca974666 100644 --- a/changedetectionio/blueprint/tags/__init__.py +++ b/changedetectionio/blueprint/tags/__init__.py @@ -1,4 +1,6 @@ -from flask import Blueprint, request, make_response, render_template, flash, url_for, redirect +from flask import Blueprint, request, render_template, flash, url_for, redirect + + from changedetectionio.store import ChangeDetectionStore from changedetectionio.flask_app import login_optionally_required @@ -96,22 +98,55 @@ def construct_blueprint(datastore: ChangeDetectionStore): @tags_blueprint.route("/edit/", methods=['GET']) @login_optionally_required def form_tag_edit(uuid): - from changedetectionio import forms - + from changedetectionio.blueprint.tags.form import group_restock_settings_form if uuid == 'first': uuid = list(datastore.data['settings']['application']['tags'].keys()).pop() default = datastore.data['settings']['application']['tags'].get(uuid) - form = forms.watchForm(formdata=request.form if request.method == 'POST' else None, - data=default, - ) - form.datastore=datastore # needed? + form = group_restock_settings_form(formdata=request.form if request.method == 'POST' else None, + data=default, + extra_notification_tokens=datastore.get_unique_notification_tokens_available() + ) + + template_args = { + 'data': default, + 'form': form, + 'watch': default, + 'extra_notification_token_placeholder_info': datastore.get_unique_notification_token_placeholders_available(), + } + + included_content = {} + if form.extra_form_content(): + # So that the extra panels can access _helpers.html etc, we set the environment to load from templates/ + # And then render the code from the module + from jinja2 import Environment, FileSystemLoader + import importlib.resources + templates_dir = str(importlib.resources.files("changedetectionio").joinpath('templates')) + env = Environment(loader=FileSystemLoader(templates_dir)) + template_str = """{% from '_helpers.html' import render_field, render_checkbox_field, render_button %} + +
+
+
+ {{ render_checkbox_field(form.overrides_watch) }} + Used for watches in "Restock & Price detection" mode +
+
+ """ + template_str += form.extra_form_content() + template = env.from_string(template_str) + included_content = template.render(**template_args) output = render_template("edit-tag.html", - data=default, - form=form, settings_application=datastore.data['settings']['application'], + extra_tab_content=form.extra_tab_content() if form.extra_tab_content() else None, + extra_form_content=included_content, + **template_args ) return output @@ -120,14 +155,15 @@ def construct_blueprint(datastore: ChangeDetectionStore): @tags_blueprint.route("/edit/", methods=['POST']) @login_optionally_required def form_tag_edit_submit(uuid): - from changedetectionio import forms + from changedetectionio.blueprint.tags.form import group_restock_settings_form if uuid == 'first': uuid = list(datastore.data['settings']['application']['tags'].keys()).pop() default = datastore.data['settings']['application']['tags'].get(uuid) - form = forms.watchForm(formdata=request.form if request.method == 'POST' else None, + form = group_restock_settings_form(formdata=request.form if request.method == 'POST' else None, data=default, + extra_notification_tokens=datastore.get_unique_notification_tokens_available() ) # @todo subclass form so validation works #if not form.validate(): @@ -136,6 +172,7 @@ def construct_blueprint(datastore: ChangeDetectionStore): # return redirect(url_for('tags.form_tag_edit_submit', uuid=uuid)) datastore.data['settings']['application']['tags'][uuid].update(form.data) + datastore.data['settings']['application']['tags'][uuid]['processor'] = 'restock_diff' datastore.needs_write_urgent = True flash("Updated") diff --git a/changedetectionio/blueprint/tags/form.py b/changedetectionio/blueprint/tags/form.py index 22e8b077..6ff3a503 100644 --- a/changedetectionio/blueprint/tags/form.py +++ b/changedetectionio/blueprint/tags/form.py @@ -1,16 +1,15 @@ from wtforms import ( - BooleanField, Form, - IntegerField, - RadioField, - SelectField, StringField, SubmitField, - TextAreaField, validators, ) +from wtforms.fields.simple import BooleanField +from changedetectionio.processors.restock_diff.forms import processor_settings_form as restock_settings_form +class group_restock_settings_form(restock_settings_form): + overrides_watch = BooleanField('Activate for individual watches in this tag/group?', default=False) class SingleTag(Form): diff --git a/changedetectionio/blueprint/tags/templates/edit-tag.html b/changedetectionio/blueprint/tags/templates/edit-tag.html index 1d297c81..2ccc68a0 100644 --- a/changedetectionio/blueprint/tags/templates/edit-tag.html +++ b/changedetectionio/blueprint/tags/templates/edit-tag.html @@ -26,6 +26,9 @@ @@ -63,7 +66,7 @@ xpath://body/div/span[contains(@class, 'example-class')]",
  • JSONPath: Prefix with json:, use json:$ to force re-formatting if required, test your JSONPath here.
  • {% if jq_support %} -
  • jq: Prefix with jq: and test your jq here. Using jq allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation here.
  • +
  • jq: Prefix with jq: and test your jq here. Using jq allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation here. Prefix jqraw: outputs the results as text instead of a JSON list.
  • {% else %}
  • jq support not installed
  • {% endif %} @@ -97,6 +100,12 @@ nav + {# rendered sub Template #} + {% if extra_form_content %} +
    + {{ extra_form_content|safe }} +
    + {% endif %}
    @@ -119,7 +128,7 @@ nav {% endif %} Use system defaults - {{ render_common_settings_form(form, emailprefix, settings_application) }} + {{ render_common_settings_form(form, emailprefix, settings_application, extra_notification_token_placeholder_info) }}
    diff --git a/changedetectionio/content_fetchers/base.py b/changedetectionio/content_fetchers/base.py index f817341d..66dd7403 100644 --- a/changedetectionio/content_fetchers/base.py +++ b/changedetectionio/content_fetchers/base.py @@ -28,7 +28,7 @@ def manage_user_agent(headers, current_ua=''): :return: """ # Ask it what the user agent is, if its obviously ChromeHeadless, switch it to the default - ua_in_custom_headers = next((v for k, v in headers.items() if k.lower() == "user-agent"), None) + ua_in_custom_headers = headers.get('User-Agent') if ua_in_custom_headers: return ua_in_custom_headers @@ -64,10 +64,9 @@ class Fetcher(): render_extract_delay = 0 def __init__(self): - from pkg_resources import resource_string - # The code that scrapes elements and makes a list of elements/size/position to click on in the VisualSelector - self.xpath_element_js = resource_string(__name__, "res/xpath_element_scraper.js").decode('utf-8') - self.instock_data_js = resource_string(__name__, "res/stock-not-in-stock.js").decode('utf-8') + import importlib.resources + self.xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text() + self.instock_data_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text() @abstractmethod def get_error(self): @@ -82,7 +81,8 @@ class Fetcher(): request_method, ignore_status_codes=False, current_include_filters=None, - is_binary=False): + is_binary=False, + empty_pages_are_a_change=False): # Should set self.error, self.status_code and self.content pass @@ -96,6 +96,9 @@ class Fetcher(): @abstractmethod def screenshot_step(self, step_n): + if self.browser_steps_screenshot_path and not os.path.isdir(self.browser_steps_screenshot_path): + logger.debug(f"> Creating data dir {self.browser_steps_screenshot_path}") + os.mkdir(self.browser_steps_screenshot_path) return None @abstractmethod @@ -169,5 +172,8 @@ class Fetcher(): if os.path.isfile(f): os.unlink(f) - def save_step_html(self, param): + def save_step_html(self, step_n): + if self.browser_steps_screenshot_path and not os.path.isdir(self.browser_steps_screenshot_path): + logger.debug(f"> Creating data dir {self.browser_steps_screenshot_path}") + os.mkdir(self.browser_steps_screenshot_path) pass diff --git a/changedetectionio/content_fetchers/exceptions/__init__.py b/changedetectionio/content_fetchers/exceptions/__init__.py index 00752a3e..80ebae69 100644 --- a/changedetectionio/content_fetchers/exceptions/__init__.py +++ b/changedetectionio/content_fetchers/exceptions/__init__.py @@ -1,6 +1,5 @@ from loguru import logger - class Non200ErrorCodeReceived(Exception): def __init__(self, status_code, url, screenshot=None, xpath_data=None, page_html=None): # Set this so we can use it in other parts of the app @@ -81,17 +80,18 @@ class ScreenshotUnavailable(Exception): self.status_code = status_code self.url = url if page_html: - from html_tools import html_to_text + from changedetectionio.html_tools import html_to_text self.page_text = html_to_text(page_html) return class ReplyWithContentButNoText(Exception): - def __init__(self, status_code, url, screenshot=None, has_filters=False, html_content=''): + def __init__(self, status_code, url, screenshot=None, has_filters=False, html_content='', xpath_data=None): # Set this so we can use it in other parts of the app self.status_code = status_code self.url = url self.screenshot = screenshot self.has_filters = has_filters self.html_content = html_content + self.xpath_data = xpath_data return diff --git a/changedetectionio/content_fetchers/playwright.py b/changedetectionio/content_fetchers/playwright.py index 04ab2759..53be33f1 100644 --- a/changedetectionio/content_fetchers/playwright.py +++ b/changedetectionio/content_fetchers/playwright.py @@ -58,6 +58,7 @@ class fetcher(Fetcher): self.proxy['password'] = parsed.password def screenshot_step(self, step_n=''): + super().screenshot_step(step_n=step_n) screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("SCREENSHOT_QUALITY", 72))) if self.browser_steps_screenshot_path is not None: @@ -67,6 +68,7 @@ class fetcher(Fetcher): f.write(screenshot) def save_step_html(self, step_n): + super().save_step_html(step_n=step_n) content = self.page.content() destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.html'.format(step_n)) logger.debug(f"Saving step HTML to {destination}") @@ -81,7 +83,8 @@ class fetcher(Fetcher): request_method, ignore_status_codes=False, current_include_filters=None, - is_binary=False): + is_binary=False, + empty_pages_are_a_change=False): from playwright.sync_api import sync_playwright import playwright._impl._errors @@ -128,7 +131,7 @@ class fetcher(Fetcher): if response is None: context.close() browser.close() - logger.debug("Content Fetcher > Response object was none") + logger.debug("Content Fetcher > Response object from the browser communication was none") raise EmptyReply(url=url, status_code=None) try: @@ -164,10 +167,10 @@ class fetcher(Fetcher): raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot) - if len(self.page.content().strip()) == 0: + if not empty_pages_are_a_change and len(self.page.content().strip()) == 0: + logger.debug("Content Fetcher > Content was empty, empty_pages_are_a_change = False") context.close() browser.close() - logger.debug("Content Fetcher > Content was empty") raise EmptyReply(url=url, status_code=response.status) # Run Browser Steps here diff --git a/changedetectionio/content_fetchers/puppeteer.py b/changedetectionio/content_fetchers/puppeteer.py index 725be3b3..9dd06c38 100644 --- a/changedetectionio/content_fetchers/puppeteer.py +++ b/changedetectionio/content_fetchers/puppeteer.py @@ -75,7 +75,8 @@ class fetcher(Fetcher): request_method, ignore_status_codes, current_include_filters, - is_binary + is_binary, + empty_pages_are_a_change ): from changedetectionio.content_fetchers import visualselector_xpath_selectors @@ -115,12 +116,11 @@ class fetcher(Fetcher): # This user agent is similar to what was used when tweaking the evasions in inject_evasions_into_page(..) user_agent = None - if request_headers: - user_agent = next((value for key, value in request_headers.items() if key.lower().strip() == 'user-agent'), None) - if user_agent: - await self.page.setUserAgent(user_agent) - # Remove it so it's not sent again with headers after - [request_headers.pop(key) for key in list(request_headers) if key.lower().strip() == 'user-agent'.lower().strip()] + if request_headers and request_headers.get('User-Agent'): + # Request_headers should now be CaaseInsensitiveDict + # Remove it so it's not sent again with headers after + user_agent = request_headers.pop('User-Agent').strip() + await self.page.setUserAgent(user_agent) if not user_agent: # Attempt to strip 'HeadlessChrome' etc @@ -154,7 +154,7 @@ class fetcher(Fetcher): if response is None: await self.page.close() await browser.close() - logger.warning("Content Fetcher > Response object was none") + logger.warning("Content Fetcher > Response object was none (as in, the response from the browser was empty, not just the content)") raise EmptyReply(url=url, status_code=None) self.headers = response.headers @@ -187,10 +187,11 @@ class fetcher(Fetcher): raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot) content = await self.page.content - if len(content.strip()) == 0: + + if not empty_pages_are_a_change and len(content.strip()) == 0: + logger.error("Content Fetcher > Content was empty (empty_pages_are_a_change is False), closing browsers") await self.page.close() await browser.close() - logger.error("Content Fetcher > Content was empty") raise EmptyReply(url=url, status_code=response.status) # Run Browser Steps here @@ -248,7 +249,7 @@ class fetcher(Fetcher): await self.fetch_page(**kwargs) def run(self, url, timeout, request_headers, request_body, request_method, ignore_status_codes=False, - current_include_filters=None, is_binary=False): + current_include_filters=None, is_binary=False, empty_pages_are_a_change=False): #@todo make update_worker async which could run any of these content_fetchers within memory and time constraints max_time = os.getenv('PUPPETEER_MAX_PROCESSING_TIMEOUT_SECONDS', 180) @@ -263,7 +264,8 @@ class fetcher(Fetcher): request_method=request_method, ignore_status_codes=ignore_status_codes, current_include_filters=current_include_filters, - is_binary=is_binary + is_binary=is_binary, + empty_pages_are_a_change=empty_pages_are_a_change ), timeout=max_time)) except asyncio.TimeoutError: raise(BrowserFetchTimedOut(msg=f"Browser connected but was unable to process the page in {max_time} seconds.")) diff --git a/changedetectionio/content_fetchers/requests.py b/changedetectionio/content_fetchers/requests.py index 2c28cda7..c39b2636 100644 --- a/changedetectionio/content_fetchers/requests.py +++ b/changedetectionio/content_fetchers/requests.py @@ -1,9 +1,9 @@ +from loguru import logger +import chardet import hashlib import os - -import chardet import requests - +from changedetectionio import strtobool from changedetectionio.content_fetchers.exceptions import BrowserStepsInUnsupportedFetcher, EmptyReply, Non200ErrorCodeReceived from changedetectionio.content_fetchers.base import Fetcher @@ -25,7 +25,8 @@ class fetcher(Fetcher): request_method, ignore_status_codes=False, current_include_filters=None, - is_binary=False): + is_binary=False, + empty_pages_are_a_change=False): if self.browser_steps_get_valid_steps(): raise BrowserStepsInUnsupportedFetcher(url=url) @@ -45,13 +46,19 @@ class fetcher(Fetcher): if self.system_https_proxy: proxies['https'] = self.system_https_proxy - r = requests.request(method=request_method, - data=request_body, - url=url, - headers=request_headers, - timeout=timeout, - proxies=proxies, - verify=False) + session = requests.Session() + + if strtobool(os.getenv('ALLOW_FILE_URI', 'false')) and url.startswith('file://'): + from requests_file import FileAdapter + session.mount('file://', FileAdapter()) + + r = session.request(method=request_method, + data=request_body.encode('utf-8') if type(request_body) is str else request_body, + url=url, + headers=request_headers, + timeout=timeout, + proxies=proxies, + verify=False) # If the response did not tell us what encoding format to expect, Then use chardet to override what `requests` thinks. # For example - some sites don't tell us it's utf-8, but return utf-8 content @@ -67,7 +74,10 @@ class fetcher(Fetcher): self.headers = r.headers if not r.content or not len(r.content): - raise EmptyReply(url=url, status_code=r.status_code) + if not empty_pages_are_a_change: + raise EmptyReply(url=url, status_code=r.status_code) + else: + logger.debug(f"URL {url} gave zero byte content reply with Status Code {r.status_code}, but empty_pages_are_a_change = True") # @todo test this # @todo maybe you really want to test zero-byte return pages? diff --git a/changedetectionio/content_fetchers/res/__init__.py b/changedetectionio/content_fetchers/res/__init__.py new file mode 100644 index 00000000..b41a98a5 --- /dev/null +++ b/changedetectionio/content_fetchers/res/__init__.py @@ -0,0 +1 @@ +# resources for browser injection/scraping diff --git a/changedetectionio/content_fetchers/res/stock-not-in-stock.js b/changedetectionio/content_fetchers/res/stock-not-in-stock.js index b9529152..94c6350d 100644 --- a/changedetectionio/content_fetchers/res/stock-not-in-stock.js +++ b/changedetectionio/content_fetchers/res/stock-not-in-stock.js @@ -30,14 +30,21 @@ function isItemInStock() { 'dieser artikel ist bald wieder verfügbar', 'dostępne wkrótce', 'en rupture de stock', - 'ist derzeit nicht auf lager', + 'isn\'t in stock right now', + 'isnt in stock right now', + 'isn’t in stock right now', 'item is no longer available', 'let me know when it\'s available', + 'mail me when available', 'message if back in stock', 'nachricht bei', 'nicht auf lager', + 'nicht lagernd', 'nicht lieferbar', + 'nicht verfügbar', + 'nicht vorrätig', 'nicht zur verfügung', + 'nie znaleziono produktów', 'niet beschikbaar', 'niet leverbaar', 'niet op voorraad', @@ -48,6 +55,7 @@ function isItemInStock() { 'not currently available', 'not in stock', 'notify me when available', + 'notify me', 'notify when available', 'não estamos a aceitar encomendas', 'out of stock', @@ -62,12 +70,16 @@ function isItemInStock() { 'this item is currently unavailable', 'tickets unavailable', 'tijdelijk uitverkocht', + 'unavailable nearby', 'unavailable tickets', + 'vergriffen', + 'vorbestellen', 'vorbestellung ist bald möglich', 'we couldn\'t find any products that match', 'we do not currently have an estimate of when this product will be back in stock.', 'we don\'t know when or if this item will be back in stock.', 'we were not able to find a match', + 'when this arrives in stock', 'zur zeit nicht an lager', '品切れ', '已售', diff --git a/changedetectionio/content_fetchers/res/xpath_element_scraper.js b/changedetectionio/content_fetchers/res/xpath_element_scraper.js index 326889ea..87c0df70 100644 --- a/changedetectionio/content_fetchers/res/xpath_element_scraper.js +++ b/changedetectionio/content_fetchers/res/xpath_element_scraper.js @@ -182,6 +182,7 @@ visibleElementsArray.forEach(function (element) { // Inject the current one set in the include_filters, which may be a CSS rule // used for displaying the current one in VisualSelector, where its not one we generated. if (include_filters.length) { + let results; // Foreach filter, go and find it on the page and add it to the results so we can visualise it again for (const f of include_filters) { bbox = false; @@ -197,10 +198,15 @@ if (include_filters.length) { if (f.startsWith('/') || f.startsWith('xpath')) { var qry_f = f.replace(/xpath(:|\d:)/, '') console.log("[xpath] Scanning for included filter " + qry_f) - q = document.evaluate(qry_f, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue; + let xpathResult = document.evaluate(qry_f, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null); + results = []; + for (let i = 0; i < xpathResult.snapshotLength; i++) { + results.push(xpathResult.snapshotItem(i)); + } } else { console.log("[css] Scanning for included filter " + f) - q = document.querySelector(f); + console.log("[css] Scanning for included filter " + f); + results = document.querySelectorAll(f); } } catch (e) { // Maybe catch DOMException and alert? @@ -208,44 +214,45 @@ if (include_filters.length) { console.log(e); } - if (q) { - // Try to resolve //something/text() back to its /something so we can atleast get the bounding box - try { - if (typeof q.nodeName == 'string' && q.nodeName === '#text') { - q = q.parentElement - } - } catch (e) { - console.log(e) - console.log("xpath_element_scraper: #text resolver") - } + if (results != null && results.length) { - // #1231 - IN the case XPath attribute filter is applied, we will have to traverse up and find the element. - if (typeof q.getBoundingClientRect == 'function') { - bbox = q.getBoundingClientRect(); - console.log("xpath_element_scraper: Got filter element, scroll from top was " + scroll_y) - } else { + // Iterate over the results + results.forEach(node => { + // Try to resolve //something/text() back to its /something so we can atleast get the bounding box try { - // Try and see we can find its ownerElement - bbox = q.ownerElement.getBoundingClientRect(); - console.log("xpath_element_scraper: Got filter by ownerElement element, scroll from top was " + scroll_y) + if (typeof node.nodeName == 'string' && node.nodeName === '#text') { + node = node.parentElement + } } catch (e) { console.log(e) - console.log("xpath_element_scraper: error looking up q.ownerElement") + console.log("xpath_element_scraper: #text resolver") } - } - } - if (!q) { - console.log("xpath_element_scraper: filter element " + f + " was not found"); - } + // #1231 - IN the case XPath attribute filter is applied, we will have to traverse up and find the element. + if (typeof node.getBoundingClientRect == 'function') { + bbox = node.getBoundingClientRect(); + console.log("xpath_element_scraper: Got filter element, scroll from top was " + scroll_y) + } else { + try { + // Try and see we can find its ownerElement + bbox = node.ownerElement.getBoundingClientRect(); + console.log("xpath_element_scraper: Got filter by ownerElement element, scroll from top was " + scroll_y) + } catch (e) { + console.log(e) + console.log("xpath_element_scraper: error looking up q.ownerElement") + } + } - if (bbox && bbox['width'] > 0 && bbox['height'] > 0) { - size_pos.push({ - xpath: f, - width: parseInt(bbox['width']), - height: parseInt(bbox['height']), - left: parseInt(bbox['left']), - top: parseInt(bbox['top']) + scroll_y + if (bbox && bbox['width'] > 0 && bbox['height'] > 0) { + size_pos.push({ + xpath: f, + width: parseInt(bbox['width']), + height: parseInt(bbox['height']), + left: parseInt(bbox['left']), + top: parseInt(bbox['top']) + scroll_y, + highlight_as_custom_filter: true + }); + } }); } } diff --git a/changedetectionio/content_fetchers/webdriver_selenium.py b/changedetectionio/content_fetchers/webdriver_selenium.py index a45746f0..72e80b15 100644 --- a/changedetectionio/content_fetchers/webdriver_selenium.py +++ b/changedetectionio/content_fetchers/webdriver_selenium.py @@ -56,7 +56,8 @@ class fetcher(Fetcher): request_method, ignore_status_codes=False, current_include_filters=None, - is_binary=False): + is_binary=False, + empty_pages_are_a_change=False): from selenium import webdriver from selenium.webdriver.chrome.options import Options as ChromeOptions diff --git a/changedetectionio/diff.py b/changedetectionio/diff.py index 3371ad4d..859abe6a 100644 --- a/changedetectionio/diff.py +++ b/changedetectionio/diff.py @@ -1,62 +1,97 @@ -# used for the notifications, the front-end is using a JS library - import difflib +from typing import List, Iterator, Union +def same_slicer(lst: List[str], start: int, end: int) -> List[str]: + """Return a slice of the list, or a single element if start == end.""" + return lst[start:end] if start != end else [lst[start]] -def same_slicer(l, a, b): - if a == b: - return [l[a]] - else: - return l[a:b] - -# like .compare but a little different output -def customSequenceMatcher(before, after, include_equal=False, include_removed=True, include_added=True, include_replaced=True, include_change_type_prefix=True): - cruncher = difflib.SequenceMatcher(isjunk=lambda x: x in " \\t", a=before, b=after) - - # @todo Line-by-line mode instead of buncghed, including `after` that is not in `before` (maybe unset?) +def customSequenceMatcher( + before: List[str], + after: List[str], + include_equal: bool = False, + include_removed: bool = True, + include_added: bool = True, + include_replaced: bool = True, + include_change_type_prefix: bool = True +) -> Iterator[List[str]]: + """ + Compare two sequences and yield differences based on specified parameters. + + Args: + before (List[str]): Original sequence + after (List[str]): Modified sequence + include_equal (bool): Include unchanged parts + include_removed (bool): Include removed parts + include_added (bool): Include added parts + include_replaced (bool): Include replaced parts + include_change_type_prefix (bool): Add prefixes to indicate change types + + Yields: + List[str]: Differences between sequences + """ + cruncher = difflib.SequenceMatcher(isjunk=lambda x: x in " \t", a=before, b=after) + for tag, alo, ahi, blo, bhi in cruncher.get_opcodes(): if include_equal and tag == 'equal': - g = before[alo:ahi] - yield g + yield before[alo:ahi] elif include_removed and tag == 'delete': - row_prefix = "(removed) " if include_change_type_prefix else '' - g = [ row_prefix + i for i in same_slicer(before, alo, ahi)] - yield g + prefix = "(removed) " if include_change_type_prefix else '' + yield [f"{prefix}{line}" for line in same_slicer(before, alo, ahi)] elif include_replaced and tag == 'replace': - row_prefix = "(changed) " if include_change_type_prefix else '' - g = [row_prefix + i for i in same_slicer(before, alo, ahi)] - row_prefix = "(into) " if include_change_type_prefix else '' - g += [row_prefix + i for i in same_slicer(after, blo, bhi)] - yield g + prefix_changed = "(changed) " if include_change_type_prefix else '' + prefix_into = "(into) " if include_change_type_prefix else '' + yield [f"{prefix_changed}{line}" for line in same_slicer(before, alo, ahi)] + \ + [f"{prefix_into}{line}" for line in same_slicer(after, blo, bhi)] elif include_added and tag == 'insert': - row_prefix = "(added) " if include_change_type_prefix else '' - g = [row_prefix + i for i in same_slicer(after, blo, bhi)] - yield g + prefix = "(added) " if include_change_type_prefix else '' + yield [f"{prefix}{line}" for line in same_slicer(after, blo, bhi)] -# only_differences - only return info about the differences, no context -# line_feed_sep could be "
    " or "
  • " or "\n" etc -def render_diff(previous_version_file_contents, newest_version_file_contents, include_equal=False, include_removed=True, include_added=True, include_replaced=True, line_feed_sep="\n", include_change_type_prefix=True, patch_format=False): - - newest_version_file_contents = [line.rstrip() for line in newest_version_file_contents.splitlines()] - - if previous_version_file_contents: - previous_version_file_contents = [line.rstrip() for line in previous_version_file_contents.splitlines()] - else: - previous_version_file_contents = "" +def render_diff( + previous_version_file_contents: str, + newest_version_file_contents: str, + include_equal: bool = False, + include_removed: bool = True, + include_added: bool = True, + include_replaced: bool = True, + line_feed_sep: str = "\n", + include_change_type_prefix: bool = True, + patch_format: bool = False +) -> str: + """ + Render the difference between two file contents. + + Args: + previous_version_file_contents (str): Original file contents + newest_version_file_contents (str): Modified file contents + include_equal (bool): Include unchanged parts + include_removed (bool): Include removed parts + include_added (bool): Include added parts + include_replaced (bool): Include replaced parts + line_feed_sep (str): Separator for lines in output + include_change_type_prefix (bool): Add prefixes to indicate change types + patch_format (bool): Use patch format for output + + Returns: + str: Rendered difference + """ + newest_lines = [line.rstrip() for line in newest_version_file_contents.splitlines()] + previous_lines = [line.rstrip() for line in previous_version_file_contents.splitlines()] if previous_version_file_contents else [] if patch_format: - patch = difflib.unified_diff(previous_version_file_contents, newest_version_file_contents) + patch = difflib.unified_diff(previous_lines, newest_lines) return line_feed_sep.join(patch) - rendered_diff = customSequenceMatcher(before=previous_version_file_contents, - after=newest_version_file_contents, - include_equal=include_equal, - include_removed=include_removed, - include_added=include_added, - include_replaced=include_replaced, - include_change_type_prefix=include_change_type_prefix) + rendered_diff = customSequenceMatcher( + before=previous_lines, + after=newest_lines, + include_equal=include_equal, + include_removed=include_removed, + include_added=include_added, + include_replaced=include_replaced, + include_change_type_prefix=include_change_type_prefix + ) + + def flatten(lst: List[Union[str, List[str]]]) -> str: + return line_feed_sep.join(flatten(x) if isinstance(x, list) else x for x in lst) - # Recursively join lists - f = lambda L: line_feed_sep.join([f(x) if type(x) is list else x for x in L]) - p= f(rendered_diff) - return p + return flatten(rendered_diff) diff --git a/changedetectionio/flask_app.py b/changedetectionio/flask_app.py index 41f80a77..0ecfb75b 100644 --- a/changedetectionio/flask_app.py +++ b/changedetectionio/flask_app.py @@ -1,18 +1,22 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 import datetime +import flask_login +import locale import os +import pytz import queue import threading import time +import timeago + +from .processors import find_processors, get_parent_module, get_custom_watch_obj_for_processor from .safe_jinja import render as jinja_render from changedetectionio.strtobool import strtobool from copy import deepcopy from functools import wraps from threading import Event -import flask_login -import pytz -import timeago + from feedgen.feed import FeedGenerator from flask import ( Flask, @@ -79,6 +83,14 @@ csrf = CSRFProtect() csrf.init_app(app) notification_debug_log=[] +# get locale ready +default_locale = locale.getdefaultlocale() +logger.info(f"System locale default is {default_locale}") +try: + locale.setlocale(locale.LC_ALL, default_locale) +except locale.Error: + logger.warning(f"Unable to set locale {default_locale}, locale is not installed maybe?") + watch_api = Api(app, decorators=[csrf.exempt]) def init_app_secret(datastore_path): @@ -108,6 +120,14 @@ def get_darkmode_state(): def get_css_version(): return __version__ +@app.template_filter('format_number_locale') +def _jinja2_filter_format_number_locale(value: float) -> str: + "Formats for example 4000.10 to the local locale default of 4,000.10" + # Format the number with two decimal places (locale format string will return 6 decimal) + formatted_value = locale.format_string("%.2f", value, grouping=True) + + return formatted_value + # We use the whole watch object from the store/JSON so we can see if there's some related status in terms of a thread # running or something similar. @app.template_filter('format_last_checked_time') @@ -124,10 +144,10 @@ def _jinja2_filter_datetime(watch_obj, format="%Y-%m-%d %H:%M:%S"): @app.template_filter('format_timestamp_timeago') def _jinja2_filter_datetimestamp(timestamp, format="%Y-%m-%d %H:%M:%S"): - if timestamp == False: + if not timestamp: return 'Not yet' - return timeago.format(timestamp, time.time()) + return timeago.format(int(timestamp), time.time()) @app.template_filter('pagination_slice') @@ -339,7 +359,7 @@ def changedetection_app(config=None, datastore_o=None): # @todo needs a .itemsWithTag() or something - then we can use that in Jinaj2 and throw this away for uuid, watch in datastore.data['watching'].items(): # @todo tag notification_muted skip also (improve Watch model) - if watch.get('notification_muted'): + if datastore.data['settings']['application'].get('rss_hide_muted_watches') and watch.get('notification_muted'): continue if limit_tag and not limit_tag in watch['tags']: continue @@ -472,7 +492,7 @@ def changedetection_app(config=None, datastore_o=None): # Don't link to hosting when we're on the hosting environment active_tag=active_tag, active_tag_uuid=active_tag_uuid, - app_rss_token=datastore.data['settings']['application']['rss_access_token'], + app_rss_token=datastore.data['settings']['application'].get('rss_access_token'), datastore=datastore, errored_count=errored_count, form=form, @@ -512,12 +532,21 @@ def changedetection_app(config=None, datastore_o=None): @login_optionally_required def ajax_callback_send_notification_test(watch_uuid=None): - # Watch_uuid could be unsuet in the case its used in tag editor, global setings + # Watch_uuid could be unset in the case its used in tag editor, global setings import apprise + import random from .apprise_asset import asset apobj = apprise.Apprise(asset=asset) - watch = datastore.data['watching'].get(watch_uuid) if watch_uuid else None + is_global_settings_form = request.args.get('mode', '') == 'global-settings' + is_group_settings_form = request.args.get('mode', '') == 'group-settings' + + # Use an existing random one on the global/main settings form + if not watch_uuid and (is_global_settings_form or is_group_settings_form): + logger.debug(f"Send test notification - Choosing random Watch {watch_uuid}") + watch_uuid = random.choice(list(datastore.data['watching'].keys())) + + watch = datastore.data['watching'].get(watch_uuid) notification_urls = request.form['notification_urls'].strip().splitlines() @@ -529,8 +558,6 @@ def changedetection_app(config=None, datastore_o=None): tag = datastore.tag_exists_by_name(k.strip()) notification_urls = tag.get('notifications_urls') if tag and tag.get('notifications_urls') else None - is_global_settings_form = request.args.get('mode', '') == 'global-settings' - is_group_settings_form = request.args.get('mode', '') == 'group-settings' if not notification_urls and not is_global_settings_form and not is_group_settings_form: # In the global settings, use only what is typed currently in the text box logger.debug("Test notification - Trying by global system settings notifications") @@ -549,7 +576,7 @@ def changedetection_app(config=None, datastore_o=None): try: # use the same as when it is triggered, but then override it with the form test values n_object = { - 'watch_url': request.form['window_url'], + 'watch_url': request.form.get('window_url', "https://changedetection.io"), 'notification_urls': notification_urls } @@ -616,11 +643,11 @@ def changedetection_app(config=None, datastore_o=None): @login_optionally_required # https://stackoverflow.com/questions/42984453/wtforms-populate-form-with-data-if-data-exists # https://wtforms.readthedocs.io/en/3.0.x/forms/#wtforms.form.Form.populate_obj ? - def edit_page(uuid): from . import forms from .blueprint.browser_steps.browser_steps import browser_step_ui_config from . import processors + import importlib # More for testing, possible to return the first/only if not datastore.data['watching'].keys(): @@ -652,14 +679,40 @@ def changedetection_app(config=None, datastore_o=None): # Radio needs '' not None, or incase that the chosen one no longer exists if default['proxy'] is None or not any(default['proxy'] in tup for tup in datastore.proxy_list): default['proxy'] = '' - # proxy_override set to the json/text list of the items - form = forms.watchForm(formdata=request.form if request.method == 'POST' else None, - data=default - ) - # For the form widget tag uuid lookup - form.tags.datastore = datastore # in _value + # Does it use some custom form? does one exist? + processor_name = datastore.data['watching'][uuid].get('processor', '') + processor_classes = next((tpl for tpl in find_processors() if tpl[1] == processor_name), None) + if not processor_classes: + flash(f"Cannot load the edit form for processor/plugin '{processor_classes[1]}', plugin missing?", 'error') + return redirect(url_for('index')) + + parent_module = get_parent_module(processor_classes[0]) + + try: + # Get the parent of the "processor.py" go up one, get the form (kinda spaghetti but its reusing existing code) + forms_module = importlib.import_module(f"{parent_module.__name__}.forms") + # Access the 'processor_settings_form' class from the 'forms' module + form_class = getattr(forms_module, 'processor_settings_form') + except ModuleNotFoundError as e: + # .forms didnt exist + form_class = forms.processor_text_json_diff_form + except AttributeError as e: + # .forms exists but no useful form + form_class = forms.processor_text_json_diff_form + + form = form_class(formdata=request.form if request.method == 'POST' else None, + data=default, + extra_notification_tokens=default.extra_notification_token_values() + ) + + # For the form widget tag UUID back to "string name" for the field + form.tags.datastore = datastore + + # Used by some forms that need to dig deeper + form.datastore = datastore + form.watch = default for p in datastore.extra_browsers: form.fetch_backend.choices.append(p) @@ -679,7 +732,15 @@ def changedetection_app(config=None, datastore_o=None): if request.method == 'POST' and form.validate(): - extra_update_obj = {} + # If they changed processor, it makes sense to reset it. + if datastore.data['watching'][uuid].get('processor') != form.data.get('processor'): + datastore.data['watching'][uuid].clear_watch() + flash("Reset watch history due to change of processor") + + extra_update_obj = { + 'consecutive_filter_failures': 0, + 'last_error' : False + } if request.args.get('unpause_on_save'): extra_update_obj['paused'] = False @@ -717,10 +778,16 @@ def changedetection_app(config=None, datastore_o=None): datastore.data['watching'][uuid].update(form.data) datastore.data['watching'][uuid].update(extra_update_obj) - if request.args.get('unpause_on_save'): - flash("Updated watch - unpaused!.") - else: - flash("Updated watch.") + if not datastore.data['watching'][uuid].get('tags'): + # Force it to be a list, because form.data['tags'] will be string if nothing found + # And del(form.data['tags'] ) wont work either for some reason + datastore.data['watching'][uuid]['tags'] = [] + + # Recast it if need be to right data Watch handler + watch_class = get_custom_watch_obj_for_processor(form.data.get('processor')) + datastore.data['watching'][uuid] = watch_class(datastore_path=datastore_o.datastore_path, default=datastore.data['watching'][uuid]) + + flash("Updated watch - unpaused!" if request.args.get('unpause_on_save') else "Updated watch.") # Re #286 - We wait for syncing new data to disk in another thread every 60 seconds # But in the case something is added we should save straight away @@ -750,6 +817,7 @@ def changedetection_app(config=None, datastore_o=None): jq_support = False watch = datastore.data['watching'].get(uuid) + system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver' is_html_webdriver = False @@ -758,23 +826,42 @@ def changedetection_app(config=None, datastore_o=None): # Only works reliably with Playwright visualselector_enabled = os.getenv('PLAYWRIGHT_DRIVER_URL', False) and is_html_webdriver + template_args = { + 'available_processors': processors.available_processors(), + 'browser_steps_config': browser_step_ui_config, + 'emailprefix': os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False), + 'extra_title': f" - Edit - {watch.label}", + 'extra_processor_config': form.extra_tab_content(), + 'extra_notification_token_placeholder_info': datastore.get_unique_notification_token_placeholders_available(), + 'form': form, + 'has_default_notification_urls': True if len(datastore.data['settings']['application']['notification_urls']) else False, + 'has_extra_headers_file': len(datastore.get_all_headers_in_textfile_for_watch(uuid=uuid)) > 0, + 'has_special_tag_options': _watch_has_tag_options_set(watch=watch), + 'is_html_webdriver': is_html_webdriver, + 'jq_support': jq_support, + 'playwright_enabled': os.getenv('PLAYWRIGHT_DRIVER_URL', False), + 'settings_application': datastore.data['settings']['application'], + 'using_global_webdriver_wait': not default['webdriver_delay'], + 'uuid': uuid, + 'visualselector_enabled': visualselector_enabled, + 'watch': watch + } + + included_content = None + if form.extra_form_content(): + # So that the extra panels can access _helpers.html etc, we set the environment to load from templates/ + # And then render the code from the module + from jinja2 import Environment, FileSystemLoader + import importlib.resources + templates_dir = str(importlib.resources.files("changedetectionio").joinpath('templates')) + env = Environment(loader=FileSystemLoader(templates_dir)) + template = env.from_string(form.extra_form_content()) + included_content = template.render(**template_args) + output = render_template("edit.html", - available_processors=processors.available_processors(), - browser_steps_config=browser_step_ui_config, - emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False), - extra_title=f" - Edit - {watch.label}", - form=form, - has_default_notification_urls=True if len(datastore.data['settings']['application']['notification_urls']) else False, - has_extra_headers_file=len(datastore.get_all_headers_in_textfile_for_watch(uuid=uuid)) > 0, - has_special_tag_options=_watch_has_tag_options_set(watch=watch), - is_html_webdriver=is_html_webdriver, - jq_support=jq_support, - playwright_enabled=os.getenv('PLAYWRIGHT_DRIVER_URL', False), - settings_application=datastore.data['settings']['application'], - using_global_webdriver_wait=not default['webdriver_delay'], - uuid=uuid, - visualselector_enabled=visualselector_enabled, - watch=watch + extra_tab_content=form.extra_tab_content() if form.extra_tab_content() else None, + extra_form_content=included_content, + **template_args ) return output @@ -800,7 +887,8 @@ def changedetection_app(config=None, datastore_o=None): # Don't use form.data on POST so that it doesnt overrid the checkbox status from the POST status form = forms.globalSettingsForm(formdata=request.form if request.method == 'POST' else None, - data=default + data=default, + extra_notification_tokens=datastore.get_unique_notification_tokens_available() ) # Remove the last option 'System default' @@ -852,6 +940,7 @@ def changedetection_app(config=None, datastore_o=None): output = render_template("settings.html", api_key=datastore.data['settings']['application'].get('api_access_token'), emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False), + extra_notification_token_placeholder_info=datastore.get_unique_notification_token_placeholders_available(), form=form, hide_remove_pass=os.getenv("SALTED_PASS", False), min_system_recheck_seconds=int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3)), @@ -884,7 +973,7 @@ def changedetection_app(config=None, datastore_o=None): if request.values.get('urls') and len(request.values.get('urls').strip()): # Import and push into the queue for immediate update check importer = import_url_list() - importer.run(data=request.values.get('urls'), flash=flash, datastore=datastore, processor=request.values.get('processor')) + importer.run(data=request.values.get('urls'), flash=flash, datastore=datastore, processor=request.values.get('processor', 'text_json_diff')) for uuid in importer.new_uuids: update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True})) @@ -1280,6 +1369,30 @@ def changedetection_app(config=None, datastore_o=None): except FileNotFoundError: abort(404) + @app.route("/edit//get-html", methods=['GET']) + @login_optionally_required + def watch_get_latest_html(uuid): + from io import BytesIO + from flask import send_file + import brotli + + watch = datastore.data['watching'].get(uuid) + if watch and os.path.isdir(watch.watch_data_dir): + latest_filename = list(watch.history.keys())[0] + html_fname = os.path.join(watch.watch_data_dir, f"{latest_filename}.html.br") + if html_fname.endswith('.br'): + # Read and decompress the Brotli file + with open(html_fname, 'rb') as f: + decompressed_data = brotli.decompress(f.read()) + + buffer = BytesIO(decompressed_data) + + return send_file(buffer, as_attachment=True, download_name=f"{latest_filename}.html", mimetype='text/html') + + + # Return a 500 error + abort(500) + @app.route("/form/add/quickwatch", methods=['POST']) @login_optionally_required def form_quick_watch_add(): @@ -1385,7 +1498,7 @@ def changedetection_app(config=None, datastore_o=None): update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid, 'skip_when_checksum_same': False})) i += 1 - flash("{} watches queued for rechecking.".format(i)) + flash(f"{i} watches queued for rechecking.") return redirect(url_for('index', tag=tag)) @app.route("/form/checkbox-operations", methods=['POST']) @@ -1479,9 +1592,13 @@ def changedetection_app(config=None, datastore_o=None): for uuid in uuids: uuid = uuid.strip() if datastore.data['watching'].get(uuid): + # Bug in old versions caused by bad edit page/tag handler + if isinstance(datastore.data['watching'][uuid]['tags'], str): + datastore.data['watching'][uuid]['tags'] = [] + datastore.data['watching'][uuid]['tags'].append(tag_uuid) - flash("{} watches assigned tag".format(len(uuids))) + flash(f"{len(uuids)} watches were tagged") return redirect(url_for('index')) diff --git a/changedetectionio/forms.py b/changedetectionio/forms.py index 673be9ca..b0b19f99 100644 --- a/changedetectionio/forms.py +++ b/changedetectionio/forms.py @@ -1,5 +1,6 @@ import os import re + from changedetectionio.strtobool import strtobool from wtforms import ( @@ -230,9 +231,6 @@ class ValidateJinja2Template(object): """ Validates that a {token} is from a valid set """ - def __init__(self, message=None): - self.message = message - def __call__(self, form, field): from changedetectionio import notification @@ -247,6 +245,10 @@ class ValidateJinja2Template(object): try: jinja2_env = ImmutableSandboxedEnvironment(loader=BaseLoader) jinja2_env.globals.update(notification.valid_tokens) + # Extra validation tokens provided on the form_class(... extra_tokens={}) setup + if hasattr(field, 'extra_notification_tokens'): + jinja2_env.globals.update(field.extra_notification_tokens) + jinja2_env.from_string(joined_data).render() except TemplateSyntaxError as e: raise ValidationError(f"This is not a valid Jinja2 template: {e}") from e @@ -419,15 +421,24 @@ class quickWatchForm(Form): # Common to a single watch and the global settings class commonSettingsForm(Form): + from . import processors - notification_urls = StringListField('Notification URL List', validators=[validators.Optional(), ValidateAppRiseServers(), ValidateJinja2Template()]) - notification_title = StringField('Notification Title', default='ChangeDetection.io Notification - {{ watch_url }}', validators=[validators.Optional(), ValidateJinja2Template()]) + def __init__(self, formdata=None, obj=None, prefix="", data=None, meta=None, **kwargs): + super().__init__(formdata, obj, prefix, data, meta, **kwargs) + self.notification_body.extra_notification_tokens = kwargs.get('extra_notification_tokens', {}) + self.notification_title.extra_notification_tokens = kwargs.get('extra_notification_tokens', {}) + self.notification_urls.extra_notification_tokens = kwargs.get('extra_notification_tokens', {}) + + extract_title_as_title = BooleanField('Extract from document and use as watch title', default=False) + fetch_backend = RadioField(u'Fetch Method', choices=content_fetchers.available_fetchers(), validators=[ValidateContentFetcherIsReady()]) notification_body = TextAreaField('Notification Body', default='{{ watch_url }} had a change.', validators=[validators.Optional(), ValidateJinja2Template()]) notification_format = SelectField('Notification format', choices=valid_notification_formats.keys()) - fetch_backend = RadioField(u'Fetch Method', choices=content_fetchers.available_fetchers(), validators=[ValidateContentFetcherIsReady()]) - extract_title_as_title = BooleanField('Extract <title> from document and use as watch title', default=False) - webdriver_delay = IntegerField('Wait seconds before extracting text', validators=[validators.Optional(), validators.NumberRange(min=1, - message="Should contain one or more seconds")]) + notification_title = StringField('Notification Title', default='ChangeDetection.io Notification - {{ watch_url }}', validators=[validators.Optional(), ValidateJinja2Template()]) + notification_urls = StringListField('Notification URL List', validators=[validators.Optional(), ValidateAppRiseServers(), ValidateJinja2Template()]) + processor = RadioField( label=u"Processor - What do you want to achieve?", choices=processors.available_processors(), default="text_json_diff") + webdriver_delay = IntegerField('Wait seconds before extracting text', validators=[validators.Optional(), validators.NumberRange(min=1, message="Should contain one or more seconds")]) + + class importForm(Form): from . import processors processor = RadioField(u'Processor', choices=processors.available_processors(), default="text_json_diff") @@ -447,7 +458,7 @@ class SingleBrowserStep(Form): # remove_button = SubmitField('-', render_kw={"type": "button", "class": "pure-button pure-button-primary", 'title': 'Remove'}) # add_button = SubmitField('+', render_kw={"type": "button", "class": "pure-button pure-button-primary", 'title': 'Add new step after'}) -class watchForm(commonSettingsForm): +class processor_text_json_diff_form(commonSettingsForm): url = fields.URLField('URL', validators=[validateURL()]) tags = StringTagUUID('Group tag', [validators.Optional()], default='') @@ -475,9 +486,6 @@ class watchForm(commonSettingsForm): filter_text_replaced = BooleanField('Replaced/changed lines', default=True) filter_text_removed = BooleanField('Removed lines', default=True) - # @todo this class could be moved to its own text_json_diff_watchForm and this goes to restock_diff_Watchform perhaps - in_stock_only = BooleanField('Only trigger when product goes BACK to in-stock', default=True) - trigger_text = StringListField('Trigger/wait for text', [validators.Optional(), ValidateListRegex()]) if os.getenv("PLAYWRIGHT_DRIVER_URL"): browser_steps = FieldList(FormField(SingleBrowserStep), min_entries=10) @@ -493,6 +501,12 @@ class watchForm(commonSettingsForm): notification_muted = BooleanField('Notifications Muted / Off', default=False) notification_screenshot = BooleanField('Attach screenshot to notification (where possible)', default=False) + def extra_tab_content(self): + return None + + def extra_form_content(self): + return None + def validate(self, **kwargs): if not super().validate(): return False @@ -513,7 +527,6 @@ class watchForm(commonSettingsForm): result = False return result - class SingleExtraProxy(Form): # maybe better to set some <script>var.. @@ -572,6 +585,8 @@ class globalSettingsApplicationForm(commonSettingsForm): removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"}) render_anchor_tag_content = BooleanField('Render anchor tag content', default=False) shared_diff_access = BooleanField('Allow access to view diff page when password is enabled', default=False, validators=[validators.Optional()]) + rss_hide_muted_watches = BooleanField('Hide muted watches from RSS feed', default=True, + validators=[validators.Optional()]) filter_failure_notification_threshold_attempts = IntegerField('Number of times the filter can be missing before sending a notification', render_kw={"style": "width: 5em;"}, validators=[validators.NumberRange(min=0, @@ -582,6 +597,11 @@ class globalSettingsForm(Form): # Define these as FormFields/"sub forms", this way it matches the JSON storage # datastore.data['settings']['application'].. # datastore.data['settings']['requests'].. + def __init__(self, formdata=None, obj=None, prefix="", data=None, meta=None, **kwargs): + super().__init__(formdata, obj, prefix, data, meta, **kwargs) + self.application.notification_body.extra_notification_tokens = kwargs.get('extra_notification_tokens', {}) + self.application.notification_title.extra_notification_tokens = kwargs.get('extra_notification_tokens', {}) + self.application.notification_urls.extra_notification_tokens = kwargs.get('extra_notification_tokens', {}) requests = FormField(globalSettingsRequestForm) application = FormField(globalSettingsApplicationForm) diff --git a/changedetectionio/html_tools.py b/changedetectionio/html_tools.py index a4e283d6..74c5fef1 100644 --- a/changedetectionio/html_tools.py +++ b/changedetectionio/html_tools.py @@ -3,8 +3,6 @@ from bs4 import BeautifulSoup from inscriptis import get_text from jsonpath_ng.ext import parse from typing import List -from inscriptis.css_profiles import CSS_PROFILES, HtmlElement -from inscriptis.html_properties import Display from inscriptis.model.config import ParserConfig from xml.sax.saxutils import escape as xml_escape import json @@ -218,12 +216,12 @@ def extract_element(find='title', html_content=''): # def _parse_json(json_data, json_filter): - if 'json:' in json_filter: + if json_filter.startswith("json:"): jsonpath_expression = parse(json_filter.replace('json:', '')) match = jsonpath_expression.find(json_data) return _get_stripped_text_from_json_match(match) - if 'jq:' in json_filter: + if json_filter.startswith("jq:") or json_filter.startswith("jqraw:"): try: import jq @@ -231,10 +229,15 @@ def _parse_json(json_data, json_filter): # `jq` requires full compilation in windows and so isn't generally available raise Exception("jq not support not found") - jq_expression = jq.compile(json_filter.replace('jq:', '')) - match = jq_expression.input(json_data).all() + if json_filter.startswith("jq:"): + jq_expression = jq.compile(json_filter.removeprefix("jq:")) + match = jq_expression.input(json_data).all() + return _get_stripped_text_from_json_match(match) - return _get_stripped_text_from_json_match(match) + if json_filter.startswith("jqraw:"): + jq_expression = jq.compile(json_filter.removeprefix("jqraw:")) + match = jq_expression.input(json_data).all() + return '\n'.join(str(item) for item in match) def _get_stripped_text_from_json_match(match): s = [] @@ -262,7 +265,7 @@ def _get_stripped_text_from_json_match(match): # ensure_is_ldjson_info_type - str "product", optional, "@type == product" (I dont know how to do that as a json selector) def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None): stripped_text_from_html = False - +# https://github.com/dgtlmoon/changedetection.io/pull/2041#issuecomment-1848397161w # Try to parse/filter out the JSON, if we get some parser error, then maybe it's embedded within HTML tags try: stripped_text_from_html = _parse_json(json.loads(content), json_filter) @@ -301,17 +304,19 @@ def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None if isinstance(json_data, dict): # If it has LD JSON 'key' @type, and @type is 'product', and something was found for the search # (Some sites have multiple of the same ld+json @type='product', but some have the review part, some have the 'price' part) - # @type could also be a list (Product, SubType) + # @type could also be a list although non-standard ("@type": ["Product", "SubType"],) # LD_JSON auto-extract also requires some content PLUS the ldjson to be present # 1833 - could be either str or dict, should not be anything else - if json_data.get('@type') and stripped_text_from_html: - try: - if json_data.get('@type') == str or json_data.get('@type') == dict: - types = [json_data.get('@type')] if isinstance(json_data.get('@type'), str) else json_data.get('@type') - if ensure_is_ldjson_info_type.lower() in [x.lower().strip() for x in types]: - break - except: - continue + + t = json_data.get('@type') + if t and stripped_text_from_html: + + if isinstance(t, str) and t.lower() == ensure_is_ldjson_info_type.lower(): + break + # The non-standard part, some have a list + elif isinstance(t, list): + if ensure_is_ldjson_info_type.lower() in [x.lower().strip() for x in t]: + break elif stripped_text_from_html: break @@ -414,22 +419,23 @@ def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=Fals # Does LD+JSON exist with a @type=='product' and a .price set anywhere? def has_ldjson_product_info(content): - pricing_data = '' - try: - if not 'application/ld+json' in content: - return False - - for filter in LD_JSON_PRODUCT_OFFER_SELECTORS: - pricing_data += extract_json_as_string(content=content, - json_filter=filter, - ensure_is_ldjson_info_type="product") - + lc = content.lower() + if 'application/ld+json' in lc and lc.count('"price"') == 1 and '"pricecurrency"' in lc: + return True + +# On some pages this is really terribly expensive when they dont really need it +# (For example you never want price monitoring, but this runs on every watch to suggest it) +# for filter in LD_JSON_PRODUCT_OFFER_SELECTORS: +# pricing_data += extract_json_as_string(content=content, +# json_filter=filter, +# ensure_is_ldjson_info_type="product") except Exception as e: - # Totally fine + # OK too return False - x=bool(pricing_data) - return x + + return False + def workarounds_for_obfuscations(content): diff --git a/changedetectionio/model/App.py b/changedetectionio/model/App.py index 75384f17..fdd627ed 100644 --- a/changedetectionio/model/App.py +++ b/changedetectionio/model/App.py @@ -5,6 +5,7 @@ from changedetectionio.notification import ( default_notification_title, ) +# Equal to or greater than this number of FilterNotFoundInResponse exceptions will trigger a filter-not-found notification _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT = 6 DEFAULT_SETTINGS_HEADERS_USERAGENT='Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36' @@ -46,6 +47,8 @@ class model(dict): 'pager_size': 50, 'password': False, 'render_anchor_tag_content': False, + 'rss_access_token': None, + 'rss_hide_muted_watches': True, 'schema_version' : 0, 'shared_diff_access': False, 'webdriver_delay': None , # Extra delay in seconds before extracting text diff --git a/changedetectionio/model/Tag.py b/changedetectionio/model/Tag.py index 1592cf08..6dca480c 100644 --- a/changedetectionio/model/Tag.py +++ b/changedetectionio/model/Tag.py @@ -1,19 +1,14 @@ -from .Watch import base_config -import uuid -class model(dict): +from changedetectionio.model import watch_base - def __init__(self, *arg, **kw): - self.update(base_config) +class model(watch_base): + + def __init__(self, *arg, **kw): + super(model, self).__init__(*arg, **kw) - self['uuid'] = str(uuid.uuid4()) + self['overrides_watch'] = kw.get('default', {}).get('overrides_watch') if kw.get('default'): self.update(kw['default']) del kw['default'] - - - # Goes at the end so we update the default object with the initialiser - super(model, self).__init__(*arg, **kw) - diff --git a/changedetectionio/model/Watch.py b/changedetectionio/model/Watch.py index e262ac4e..d3167bf9 100644 --- a/changedetectionio/model/Watch.py +++ b/changedetectionio/model/Watch.py @@ -1,10 +1,8 @@ from changedetectionio.strtobool import strtobool from changedetectionio.safe_jinja import render as jinja_render - +from . import watch_base import os import re -import time -import uuid from pathlib import Path from loguru import logger @@ -15,69 +13,6 @@ SAFE_PROTOCOL_REGEX='^(http|https|ftp|file):' minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3)) mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7} -from changedetectionio.notification import ( - default_notification_format_for_watch -) - -base_config = { - 'body': None, - 'browser_steps': [], - 'browser_steps_last_error_step': None, - 'check_unique_lines': False, # On change-detected, compare against all history if its something new - 'check_count': 0, - 'date_created': None, - 'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine. - 'extract_text': [], # Extract text by regex after filters - 'extract_title_as_title': False, - 'fetch_backend': 'system', # plaintext, playwright etc - 'fetch_time': 0.0, - 'processor': 'text_json_diff', # could be restock_diff or others from .processors - 'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')), - 'filter_text_added': True, - 'filter_text_replaced': True, - 'filter_text_removed': True, - 'has_ldjson_price_data': None, - 'track_ldjson_price_data': None, - 'headers': {}, # Extra headers to send - 'ignore_text': [], # List of text to ignore when calculating the comparison checksum - 'in_stock' : None, - 'in_stock_only' : True, # Only trigger change on going to instock from out-of-stock - 'include_filters': [], - 'last_checked': 0, - 'last_error': False, - 'last_viewed': 0, # history key value of the last viewed via the [diff] link - 'method': 'GET', - 'notification_alert_count': 0, - # Custom notification content - 'notification_body': None, - 'notification_format': default_notification_format_for_watch, - 'notification_muted': False, - 'notification_title': None, - 'notification_screenshot': False, # Include the latest screenshot if available and supported by the apprise URL - 'notification_urls': [], # List of URLs to add to the notification Queue (Usually AppRise) - 'paused': False, - 'previous_md5': False, - 'previous_md5_before_filters': False, # Used for skipping changedetection entirely - 'proxy': None, # Preferred proxy connection - 'remote_server_reply': None, # From 'server' reply header - 'sort_text_alphabetically': False, - 'subtractive_selectors': [], - 'tag': '', # Old system of text name for a tag, to be removed - 'tags': [], # list of UUIDs to App.Tags - 'text_should_not_be_present': [], # Text that should not present - # Re #110, so then if this is set to None, we know to use the default value instead - # Requires setting to None on submit if it's the same as the default - # Should be all None by default, so we use the system default in this case. - 'time_between_check': {'weeks': None, 'days': None, 'hours': None, 'minutes': None, 'seconds': None}, - 'time_between_check_use_default': True, - 'title': None, - 'trigger_text': [], # List of text or regex to wait for until a change is detected - 'url': '', - 'uuid': str(uuid.uuid4()), - 'webdriver_delay': None, - 'webdriver_js_execute_code': None, # Run before change-detection -} - def is_safe_url(test_url): # See https://github.com/dgtlmoon/changedetection.io/issues/1358 @@ -94,30 +29,26 @@ def is_safe_url(test_url): return True -class model(dict): + +class model(watch_base): __newest_history_key = None __history_n = 0 jitter_seconds = 0 def __init__(self, *arg, **kw): - - self.update(base_config) self.__datastore_path = kw['datastore_path'] - - self['uuid'] = str(uuid.uuid4()) - del kw['datastore_path'] - + super(model, self).__init__(*arg, **kw) if kw.get('default'): self.update(kw['default']) del kw['default'] + if self.get('default'): + del self['default'] + # Be sure the cached timestamp is ready bump = self.history - # Goes at the end so we update the default object with the initialiser - super(model, self).__init__(*arg, **kw) - @property def viewed(self): # Don't return viewed when last_viewed is 0 and newest_key is 0 @@ -157,6 +88,33 @@ class model(dict): ready_url=ready_url.replace('source:', '') return ready_url + def clear_watch(self): + import pathlib + + # JSON Data, Screenshots, Textfiles (history index and snapshots), HTML in the future etc + for item in pathlib.Path(str(self.watch_data_dir)).rglob("*.*"): + os.unlink(item) + + # Force the attr to recalculate + bump = self.history + + # Do this last because it will trigger a recheck due to last_checked being zero + self.update({ + 'browser_steps_last_error_step': None, + 'check_count': 0, + 'fetch_time': 0.0, + 'has_ldjson_price_data': None, + 'last_checked': 0, + 'last_error': False, + 'last_notification_error': False, + 'last_viewed': 0, + 'previous_md5': False, + 'previous_md5_before_filters': False, + 'remote_server_reply': None, + 'track_ldjson_price_data': None + }) + return + @property def is_source_type_url(self): return self.get('url', '').startswith('source:') @@ -238,6 +196,8 @@ class model(dict): if len(tmp_history): self.__newest_history_key = list(tmp_history.keys())[-1] + else: + self.__newest_history_key = None self.__history_n = len(tmp_history) @@ -256,6 +216,13 @@ class model(dict): return has_browser_steps + @property + def has_restock_info(self): + if self.get('restock') and self['restock'].get('in_stock') != None: + return True + + return False + # Returns the newest key, but if theres only 1 record, then it's counted as not being new, so return 0. @property def newest_history_key(self): @@ -328,12 +295,9 @@ class model(dict): def save_history_text(self, contents, timestamp, snapshot_id): import brotli - self.ensure_data_dir_exists() + logger.trace(f"{self.get('uuid')} - Updating history.txt with timestamp {timestamp}") - # Small hack so that we sleep just enough to allow 1 second between history snapshots - # this is because history.txt indexes/keys snapshots by epoch seconds and we dont want dupe keys - if self.__newest_history_key and int(timestamp) == int(self.__newest_history_key): - time.sleep(timestamp - self.__newest_history_key) + self.ensure_data_dir_exists() threshold = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024)) skip_brotli = strtobool(os.getenv('DISABLE_BROTLI_TEXT_SNAPSHOT', 'False')) @@ -468,6 +432,17 @@ class model(dict): def toggle_mute(self): self['notification_muted'] ^= True + def extra_notification_token_values(self): + # Used for providing extra tokens + # return {'widget': 555} + return {} + + def extra_notification_token_placeholder_info(self): + # Used for providing extra tokens + # return [('widget', "Get widget amounts")] + return [] + + def extract_regex_from_all_history(self, regex): import csv import re @@ -526,8 +501,42 @@ class model(dict): # None is set return False + def save_error_text(self, contents): + self.ensure_data_dir_exists() + target_path = os.path.join(self.watch_data_dir, "last-error.txt") + with open(target_path, 'w') as f: + f.write(contents) + + def save_xpath_data(self, data, as_error=False): + import json + + if as_error: + target_path = os.path.join(self.watch_data_dir, "elements-error.json") + else: + target_path = os.path.join(self.watch_data_dir, "elements.json") + + self.ensure_data_dir_exists() - def get_last_fetched_before_filters(self): + with open(target_path, 'w') as f: + f.write(json.dumps(data)) + f.close() + + # Save as PNG, PNG is larger but better for doing visual diff in the future + def save_screenshot(self, screenshot: bytes, as_error=False): + + if as_error: + target_path = os.path.join(self.watch_data_dir, "last-error-screenshot.png") + else: + target_path = os.path.join(self.watch_data_dir, "last-screenshot.png") + + self.ensure_data_dir_exists() + + with open(target_path, 'wb') as f: + f.write(screenshot) + f.close() + + + def get_last_fetched_text_before_filters(self): import brotli filepath = os.path.join(self.watch_data_dir, 'last-fetched.br') @@ -542,12 +551,56 @@ class model(dict): with open(filepath, 'rb') as f: return(brotli.decompress(f.read()).decode('utf-8')) - def save_last_fetched_before_filters(self, contents): + def save_last_text_fetched_before_filters(self, contents): import brotli filepath = os.path.join(self.watch_data_dir, 'last-fetched.br') with open(filepath, 'wb') as f: f.write(brotli.compress(contents, mode=brotli.MODE_TEXT)) + def save_last_fetched_html(self, timestamp, contents): + import brotli + + self.ensure_data_dir_exists() + snapshot_fname = f"{timestamp}.html.br" + filepath = os.path.join(self.watch_data_dir, snapshot_fname) + + with open(filepath, 'wb') as f: + contents = contents.encode('utf-8') if isinstance(contents, str) else contents + try: + f.write(brotli.compress(contents)) + except Exception as e: + logger.warning(f"{self.get('uuid')} - Unable to compress snapshot, saving as raw data to {filepath}") + logger.warning(e) + f.write(contents) + + self._prune_last_fetched_html_snapshots() + + def get_fetched_html(self, timestamp): + import brotli + + snapshot_fname = f"{timestamp}.html.br" + filepath = os.path.join(self.watch_data_dir, snapshot_fname) + if os.path.isfile(filepath): + with open(filepath, 'rb') as f: + return (brotli.decompress(f.read()).decode('utf-8')) + + return False + + + def _prune_last_fetched_html_snapshots(self): + + dates = list(self.history.keys()) + dates.reverse() + + for index, timestamp in enumerate(dates): + snapshot_fname = f"{timestamp}.html.br" + filepath = os.path.join(self.watch_data_dir, snapshot_fname) + + # Keep only the first 2 + if index > 1 and os.path.isfile(filepath): + os.remove(filepath) + + @property def get_browsersteps_available_screenshots(self): "For knowing which screenshots are available to show the user in BrowserSteps UI" diff --git a/changedetectionio/model/__init__.py b/changedetectionio/model/__init__.py index e69de29b..e439de4f 100644 --- a/changedetectionio/model/__init__.py +++ b/changedetectionio/model/__init__.py @@ -0,0 +1,73 @@ +import os +import uuid + +from changedetectionio import strtobool +from changedetectionio.notification import default_notification_format_for_watch + +class watch_base(dict): + + def __init__(self, *arg, **kw): + self.update({ + # Custom notification content + # Re #110, so then if this is set to None, we know to use the default value instead + # Requires setting to None on submit if it's the same as the default + # Should be all None by default, so we use the system default in this case. + 'body': None, + 'browser_steps': [], + 'browser_steps_last_error_step': None, + 'check_count': 0, + 'check_unique_lines': False, # On change-detected, compare against all history if its something new + 'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine. + 'date_created': None, + 'extract_text': [], # Extract text by regex after filters + 'extract_title_as_title': False, + 'fetch_backend': 'system', # plaintext, playwright etc + 'fetch_time': 0.0, + 'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')), + 'filter_text_added': True, + 'filter_text_removed': True, + 'filter_text_replaced': True, + 'follow_price_changes': True, + 'has_ldjson_price_data': None, + 'headers': {}, # Extra headers to send + 'ignore_text': [], # List of text to ignore when calculating the comparison checksum + 'in_stock_only': True, # Only trigger change on going to instock from out-of-stock + 'include_filters': [], + 'last_checked': 0, + 'last_error': False, + 'last_viewed': 0, # history key value of the last viewed via the [diff] link + 'method': 'GET', + 'notification_alert_count': 0, + 'notification_body': None, + 'notification_format': default_notification_format_for_watch, + 'notification_muted': False, + 'notification_screenshot': False, # Include the latest screenshot if available and supported by the apprise URL + 'notification_title': None, + 'notification_urls': [], # List of URLs to add to the notification Queue (Usually AppRise) + 'paused': False, + 'previous_md5': False, + 'previous_md5_before_filters': False, # Used for skipping changedetection entirely + 'processor': 'text_json_diff', # could be restock_diff or others from .processors + 'price_change_threshold_percent': None, + 'proxy': None, # Preferred proxy connection + 'remote_server_reply': None, # From 'server' reply header + 'sort_text_alphabetically': False, + 'subtractive_selectors': [], + 'tag': '', # Old system of text name for a tag, to be removed + 'tags': [], # list of UUIDs to App.Tags + 'text_should_not_be_present': [], # Text that should not present + 'time_between_check': {'weeks': None, 'days': None, 'hours': None, 'minutes': None, 'seconds': None}, + 'time_between_check_use_default': True, + 'title': None, + 'track_ldjson_price_data': None, + 'trigger_text': [], # List of text or regex to wait for until a change is detected + 'url': '', + 'uuid': str(uuid.uuid4()), + 'webdriver_delay': None, + 'webdriver_js_execute_code': None, # Run before change-detection + }) + + super(watch_base, self).__init__(*arg, **kw) + + if self.get('default'): + del self['default'] \ No newline at end of file diff --git a/changedetectionio/notification.py b/changedetectionio/notification.py index 41285ce4..d685ab1d 100644 --- a/changedetectionio/notification.py +++ b/changedetectionio/notification.py @@ -107,7 +107,7 @@ def apprise_custom_api_call_wrapper(body, title, notify_type, *args, **kwargs): r(results.get('url'), auth=auth, - data=body, + data=body.encode('utf-8') if type(body) is str else body, headers=headers, params=params ) @@ -157,7 +157,7 @@ def process_notification(n_object, datastore): logger.warning(f"Process Notification: skipping empty notification URL.") continue - logger.info(">> Process Notification: AppRise notifying {}".format(url)) + logger.info(f">> Process Notification: AppRise notifying {url}") url = jinja_render(template_str=url, **notification_parameters) # Re 323 - Limit discord length to their 2000 char limit total or it wont send. @@ -230,6 +230,7 @@ def process_notification(n_object, datastore): log_value = logs.getvalue() if log_value and 'WARNING' in log_value or 'ERROR' in log_value: + logger.critical(log_value) raise Exception(log_value) # Return what was sent for better logging - after the for loop @@ -272,19 +273,18 @@ def create_notification_parameters(n_object, datastore): tokens.update( { 'base_url': base_url, - 'current_snapshot': n_object.get('current_snapshot', ''), - 'diff': n_object.get('diff', ''), # Null default in the case we use a test - 'diff_added': n_object.get('diff_added', ''), # Null default in the case we use a test - 'diff_full': n_object.get('diff_full', ''), # Null default in the case we use a test - 'diff_patch': n_object.get('diff_patch', ''), # Null default in the case we use a test - 'diff_removed': n_object.get('diff_removed', ''), # Null default in the case we use a test 'diff_url': diff_url, 'preview_url': preview_url, - 'triggered_text': n_object.get('triggered_text', ''), 'watch_tag': watch_tag if watch_tag is not None else '', 'watch_title': watch_title if watch_title is not None else '', 'watch_url': watch_url, 'watch_uuid': uuid, }) + # n_object will contain diff, diff_added etc etc + tokens.update(n_object) + + if uuid: + tokens.update(datastore.data['watching'].get(uuid).extra_notification_token_values()) + return tokens diff --git a/changedetectionio/processors/README.md b/changedetectionio/processors/README.md index 547ae4e8..0cc55572 100644 --- a/changedetectionio/processors/README.md +++ b/changedetectionio/processors/README.md @@ -8,4 +8,8 @@ The concept here is to be able to switch between different domain specific probl Some suggestions for the future - `graphical` -- `restock_and_price` - extract price AND stock text \ No newline at end of file + +## Todo + +- Make each processor return a extra list of sub-processed (so you could configure a single processor in different ways) +- move restock_diff to its own pip/github repo diff --git a/changedetectionio/processors/__init__.py b/changedetectionio/processors/__init__.py index 8702ee5d..529f57da 100644 --- a/changedetectionio/processors/__init__.py +++ b/changedetectionio/processors/__init__.py @@ -1,10 +1,14 @@ from abc import abstractmethod -import os -import hashlib -import re -from copy import deepcopy from changedetectionio.strtobool import strtobool + +from copy import deepcopy from loguru import logger +import hashlib +import os +import re +import importlib +import pkgutil +import inspect class difference_detection_processor(): @@ -21,6 +25,8 @@ class difference_detection_processor(): self.watch = deepcopy(self.datastore.data['watching'].get(watch_uuid)) def call_browser(self): + from requests.structures import CaseInsensitiveDict + from changedetectionio.content_fetchers.exceptions import EmptyReply # Protect against file:// access if re.search(r'^file://', self.watch.get('url', '').strip(), re.IGNORECASE): @@ -93,14 +99,16 @@ class difference_detection_processor(): self.fetcher.browser_steps_screenshot_path = os.path.join(self.datastore.datastore_path, self.watch.get('uuid')) # Tweak the base config with the per-watch ones - request_headers = self.watch.get('headers', []) - request_headers.update(self.datastore.get_all_base_headers()) - request_headers.update(self.datastore.get_all_headers_in_textfile_for_watch(uuid=self.watch.get('uuid'))) + request_headers = CaseInsensitiveDict() ua = self.datastore.data['settings']['requests'].get('default_ua') if ua and ua.get(prefer_fetch_backend): request_headers.update({'User-Agent': ua.get(prefer_fetch_backend)}) + request_headers.update(self.watch.get('headers', {})) + request_headers.update(self.datastore.get_all_base_headers()) + request_headers.update(self.datastore.get_all_headers_in_textfile_for_watch(uuid=self.watch.get('uuid'))) + # https://github.com/psf/requests/issues/4525 # Requests doesnt yet support brotli encoding, so don't put 'br' here, be totally sure that the user cannot # do this by accident. @@ -127,8 +135,18 @@ class difference_detection_processor(): is_binary = self.watch.is_pdf # And here we go! call the right browser with browser-specific settings - self.fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, self.watch.get('include_filters'), - is_binary=is_binary) + empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False) + + self.fetcher.run(url=url, + timeout=timeout, + request_headers=request_headers, + request_body=request_body, + request_method=request_method, + ignore_status_codes=ignore_status_codes, + current_include_filters=self.watch.get('include_filters'), + is_binary=is_binary, + empty_pages_are_a_change=empty_pages_are_a_change + ) #@todo .quit here could go on close object, so we can run JS if change-detected self.fetcher.quit() @@ -136,7 +154,7 @@ class difference_detection_processor(): # After init, call run_changedetection() which will do the actual change-detection @abstractmethod - def run_changedetection(self, uuid, skip_when_checksum_same=True): + def run_changedetection(self, watch, skip_when_checksum_same=True): update_obj = {'last_notification_error': False, 'last_error': False} some_data = 'xxxxx' update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest() @@ -144,8 +162,83 @@ class difference_detection_processor(): return changed_detected, update_obj, ''.encode('utf-8') +def find_sub_packages(package_name): + """ + Find all sub-packages within the given package. + + :param package_name: The name of the base package to scan for sub-packages. + :return: A list of sub-package names. + """ + package = importlib.import_module(package_name) + return [name for _, name, is_pkg in pkgutil.iter_modules(package.__path__) if is_pkg] + + +def find_processors(): + """ + Find all subclasses of DifferenceDetectionProcessor in the specified package. + + :param package_name: The name of the package to scan for processor modules. + :return: A list of (module, class) tuples. + """ + package_name = "changedetectionio.processors" # Name of the current package/module + + processors = [] + sub_packages = find_sub_packages(package_name) + + for sub_package in sub_packages: + module_name = f"{package_name}.{sub_package}.processor" + try: + module = importlib.import_module(module_name) + + # Iterate through all classes in the module + for name, obj in inspect.getmembers(module, inspect.isclass): + if issubclass(obj, difference_detection_processor) and obj is not difference_detection_processor: + processors.append((module, sub_package)) + except (ModuleNotFoundError, ImportError) as e: + logger.warning(f"Failed to import module {module_name}: {e} (find_processors())") + + return processors + + +def get_parent_module(module): + module_name = module.__name__ + if '.' not in module_name: + return None # Top-level module has no parent + parent_module_name = module_name.rsplit('.', 1)[0] + try: + return importlib.import_module(parent_module_name) + except Exception as e: + pass + + return False + + + +def get_custom_watch_obj_for_processor(processor_name): + from changedetectionio.model import Watch + watch_class = Watch.model + processor_classes = find_processors() + custom_watch_obj = next((tpl for tpl in processor_classes if tpl[1] == processor_name), None) + if custom_watch_obj: + # Parent of .processor.py COULD have its own Watch implementation + parent_module = get_parent_module(custom_watch_obj[0]) + if hasattr(parent_module, 'Watch'): + watch_class = parent_module.Watch + + return watch_class + + def available_processors(): - from . import restock_diff, text_json_diff - x=[('text_json_diff', text_json_diff.name), ('restock_diff', restock_diff.name)] - # @todo Make this smarter with introspection of sorts. - return x + """ + Get a list of processors by name and description for the UI elements + :return: A list :) + """ + + processor_classes = find_processors() + + available = [] + for package, processor_class in processor_classes: + available.append((processor_class, package.name)) + + return available + diff --git a/changedetectionio/processors/exceptions.py b/changedetectionio/processors/exceptions.py new file mode 100644 index 00000000..01c99a63 --- /dev/null +++ b/changedetectionio/processors/exceptions.py @@ -0,0 +1,10 @@ +class ProcessorException(Exception): + def __init__(self, message=None, status_code=None, url=None, screenshot=None, has_filters=False, html_content='', xpath_data=None): + self.message = message + self.status_code = status_code + self.url = url + self.screenshot = screenshot + self.has_filters = has_filters + self.html_content = html_content + self.xpath_data = xpath_data + return diff --git a/changedetectionio/processors/restock_diff.py b/changedetectionio/processors/restock_diff.py deleted file mode 100644 index e692e7cb..00000000 --- a/changedetectionio/processors/restock_diff.py +++ /dev/null @@ -1,66 +0,0 @@ - -from . import difference_detection_processor -from copy import deepcopy -from loguru import logger -import hashlib -import urllib3 - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - -name = 'Re-stock detection for single product pages' -description = 'Detects if the product goes back to in-stock' - -class UnableToExtractRestockData(Exception): - def __init__(self, status_code): - # Set this so we can use it in other parts of the app - self.status_code = status_code - return - -class perform_site_check(difference_detection_processor): - screenshot = None - xpath_data = None - - def run_changedetection(self, uuid, skip_when_checksum_same=True): - - # DeepCopy so we can be sure we don't accidently change anything by reference - watch = deepcopy(self.datastore.data['watching'].get(uuid)) - - if not watch: - raise Exception("Watch no longer exists.") - - # Unset any existing notification error - update_obj = {'last_notification_error': False, 'last_error': False} - - self.screenshot = self.fetcher.screenshot - self.xpath_data = self.fetcher.xpath_data - - # Track the content type - update_obj['content_type'] = self.fetcher.headers.get('Content-Type', '') - update_obj["last_check_status"] = self.fetcher.get_last_status_code() - - # Main detection method - fetched_md5 = None - if self.fetcher.instock_data: - fetched_md5 = hashlib.md5(self.fetcher.instock_data.encode('utf-8')).hexdigest() - # 'Possibly in stock' comes from stock-not-in-stock.js when no string found above the fold. - update_obj["in_stock"] = True if self.fetcher.instock_data == 'Possibly in stock' else False - logger.debug(f"Watch UUID {uuid} restock check returned '{self.fetcher.instock_data}' from JS scraper.") - else: - raise UnableToExtractRestockData(status_code=self.fetcher.status_code) - - # The main thing that all this at the moment comes down to :) - changed_detected = False - logger.debug(f"Watch UUID {uuid} restock check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}") - - if watch.get('previous_md5') and watch.get('previous_md5') != fetched_md5: - # Yes if we only care about it going to instock, AND we are in stock - if watch.get('in_stock_only') and update_obj["in_stock"]: - changed_detected = True - - if not watch.get('in_stock_only'): - # All cases - changed_detected = True - - # Always record the new checksum - update_obj["previous_md5"] = fetched_md5 - return changed_detected, update_obj, self.fetcher.instock_data.encode('utf-8').strip() diff --git a/changedetectionio/processors/restock_diff/__init__.py b/changedetectionio/processors/restock_diff/__init__.py new file mode 100644 index 00000000..1aeca8af --- /dev/null +++ b/changedetectionio/processors/restock_diff/__init__.py @@ -0,0 +1,80 @@ + +from changedetectionio.model.Watch import model as BaseWatch +import re +from babel.numbers import parse_decimal + +class Restock(dict): + + def parse_currency(self, raw_value: str) -> float: + # Clean and standardize the value (ie 1,400.00 should be 1400.00), even better would be store the whole thing as an integer. + standardized_value = raw_value + + if ',' in standardized_value and '.' in standardized_value: + # Identify the correct decimal separator + if standardized_value.rfind('.') > standardized_value.rfind(','): + standardized_value = standardized_value.replace(',', '') + else: + standardized_value = standardized_value.replace('.', '').replace(',', '.') + else: + standardized_value = standardized_value.replace(',', '.') + + # Remove any non-numeric characters except for the decimal point + standardized_value = re.sub(r'[^\d.-]', '', standardized_value) + + # Convert to float + return float(parse_decimal(standardized_value, locale='en')) + + def __init__(self, *args, **kwargs): + # Define default values + default_values = { + 'in_stock': None, + 'price': None, + 'currency': None, + 'original_price': None + } + + # Initialize the dictionary with default values + super().__init__(default_values) + + # Update with any provided positional arguments (dictionaries) + if args: + if len(args) == 1 and isinstance(args[0], dict): + self.update(args[0]) + else: + raise ValueError("Only one positional argument of type 'dict' is allowed") + + def __setitem__(self, key, value): + # Custom logic to handle setting price and original_price + if key == 'price' or key == 'original_price': + if isinstance(value, str): + value = self.parse_currency(raw_value=value) + + super().__setitem__(key, value) + +class Watch(BaseWatch): + def __init__(self, *arg, **kw): + super().__init__(*arg, **kw) + self['restock'] = Restock(kw['default']['restock']) if kw.get('default') and kw['default'].get('restock') else Restock() + + self['restock_settings'] = kw['default']['restock_settings'] if kw.get('default',{}).get('restock_settings') else { + 'follow_price_changes': True, + 'in_stock_processing' : 'in_stock_only' + } #@todo update + + def clear_watch(self): + super().clear_watch() + self.update({'restock': Restock()}) + + def extra_notification_token_values(self): + values = super().extra_notification_token_values() + values['restock'] = self.get('restock', {}) + return values + + def extra_notification_token_placeholder_info(self): + values = super().extra_notification_token_placeholder_info() + + values.append(('restock.price', "Price detected")) + values.append(('restock.original_price', "Original price at first check")) + + return values + diff --git a/changedetectionio/processors/restock_diff/forms.py b/changedetectionio/processors/restock_diff/forms.py new file mode 100644 index 00000000..39334aa3 --- /dev/null +++ b/changedetectionio/processors/restock_diff/forms.py @@ -0,0 +1,81 @@ +from wtforms import ( + BooleanField, + validators, + FloatField +) +from wtforms.fields.choices import RadioField +from wtforms.fields.form import FormField +from wtforms.form import Form + +from changedetectionio.forms import processor_text_json_diff_form + + +class RestockSettingsForm(Form): + in_stock_processing = RadioField(label='Re-stock detection', choices=[ + ('in_stock_only', "In Stock only (Out Of Stock -> In Stock only)"), + ('all_changes', "Any availability changes"), + ('off', "Off, don't follow availability/restock"), + ], default="in_stock_only") + + price_change_min = FloatField('Below price to trigger notification', [validators.Optional()], + render_kw={"placeholder": "No limit", "size": "10"}) + price_change_max = FloatField('Above price to trigger notification', [validators.Optional()], + render_kw={"placeholder": "No limit", "size": "10"}) + price_change_threshold_percent = FloatField('Threshold in % for price changes since the original price', validators=[ + + validators.Optional(), + validators.NumberRange(min=0, max=100, message="Should be between 0 and 100"), + ], render_kw={"placeholder": "0%", "size": "5"}) + + follow_price_changes = BooleanField('Follow price changes', default=True) + +class processor_settings_form(processor_text_json_diff_form): + restock_settings = FormField(RestockSettingsForm) + + def extra_tab_content(self): + return 'Restock & Price Detection' + + def extra_form_content(self): + output = "" + + if getattr(self, 'watch', None) and getattr(self, 'datastore'): + for tag_uuid in self.watch.get('tags'): + tag = self.datastore.data['settings']['application']['tags'].get(tag_uuid, {}) + if tag.get('overrides_watch'): + # @todo - Quick and dirty, cant access 'url_for' here because its out of scope somehow + output = f"""<p><strong>Note! A Group tag overrides the restock and price detection here.</strong></p><style>#restock-fieldset-price-group {{ opacity: 0.6; }}</style>""" + + output += """ + {% from '_helpers.html' import render_field, render_checkbox_field, render_button %} + <script> + $(document).ready(function () { + toggleOpacity('#restock_settings-follow_price_changes', '.price-change-minmax', true); + }); + </script> + + <fieldset id="restock-fieldset-price-group"> + <div class="pure-control-group"> + <fieldset class="pure-group inline-radio"> + {{ render_field(form.restock_settings.in_stock_processing) }} + </fieldset> + <fieldset class="pure-group"> + {{ render_checkbox_field(form.restock_settings.follow_price_changes) }} + <span class="pure-form-message-inline">Changes in price should trigger a notification</span> + </fieldset> + <fieldset class="pure-group price-change-minmax"> + {{ render_field(form.restock_settings.price_change_min, placeholder=watch.get('restock', {}).get('price')) }} + <span class="pure-form-message-inline">Minimum amount, Trigger a change/notification when the price drops <i>below</i> this value.</span> + </fieldset> + <fieldset class="pure-group price-change-minmax"> + {{ render_field(form.restock_settings.price_change_max, placeholder=watch.get('restock', {}).get('price')) }} + <span class="pure-form-message-inline">Maximum amount, Trigger a change/notification when the price rises <i>above</i> this value.</span> + </fieldset> + <fieldset class="pure-group price-change-minmax"> + {{ render_field(form.restock_settings.price_change_threshold_percent) }} + <span class="pure-form-message-inline">Price must change more than this % to trigger a change since the first check.</span><br> + <span class="pure-form-message-inline">For example, If the product is $1,000 USD originally, <strong>2%</strong> would mean it has to change more than $20 since the first check.</span><br> + </fieldset> + </div> + </fieldset> + """ + return output \ No newline at end of file diff --git a/changedetectionio/processors/restock_diff/processor.py b/changedetectionio/processors/restock_diff/processor.py new file mode 100644 index 00000000..b2184e35 --- /dev/null +++ b/changedetectionio/processors/restock_diff/processor.py @@ -0,0 +1,263 @@ +from .. import difference_detection_processor +from ..exceptions import ProcessorException +from . import Restock +from loguru import logger +import hashlib +import re +import urllib3 +import time + +urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) +name = 'Re-stock & Price detection for single product pages' +description = 'Detects if the product goes back to in-stock' + +class UnableToExtractRestockData(Exception): + def __init__(self, status_code): + # Set this so we can use it in other parts of the app + self.status_code = status_code + return + +class MoreThanOnePriceFound(Exception): + def __init__(self): + return + +def _search_prop_by_value(matches, value): + for properties in matches: + for prop in properties: + if value in prop[0]: + return prop[1] # Yield the desired value and exit the function + +# should return Restock() +# add casting? +def get_itemprop_availability(html_content) -> Restock: + """ + Kind of funny/cool way to find price/availability in one many different possibilities. + Use 'extruct' to find any possible RDFa/microdata/json-ld data, make a JSON string from the output then search it. + """ + from jsonpath_ng import parse + + now = time.time() + import extruct + logger.trace(f"Imported extruct module in {time.time() - now:.3f}s") + + value = {} + now = time.time() + # Extruct is very slow, I'm wondering if some ML is going to be faster (800ms on my i7), 'rdfa' seems to be the heaviest. + + syntaxes = ['dublincore', 'json-ld', 'microdata', 'microformat', 'opengraph'] + + data = extruct.extract(html_content, syntaxes=syntaxes) + logger.trace(f"Extruct basic extract of all metadata done in {time.time() - now:.3f}s") + + # First phase, dead simple scanning of anything that looks useful + value = Restock() + if data: + logger.debug(f"Using jsonpath to find price/availability/etc") + price_parse = parse('$..(price|Price)') + pricecurrency_parse = parse('$..(pricecurrency|currency|priceCurrency )') + availability_parse = parse('$..(availability|Availability)') + + price_result = price_parse.find(data) + if price_result: + # Right now, we just support single product items, maybe we will store the whole actual metadata seperately in teh future and + # parse that for the UI? + prices_found = set(str(item.value).replace('$', '') for item in price_result) + if len(price_result) > 1 and len(prices_found) > 1: + # See of all prices are different, in the case that one product has many embedded data types with the same price + # One might have $121.95 and another 121.95 etc + logger.warning(f"More than one price found {prices_found}, throwing exception, cant use this plugin.") + raise MoreThanOnePriceFound() + + value['price'] = price_result[0].value + + pricecurrency_result = pricecurrency_parse.find(data) + if pricecurrency_result: + value['currency'] = pricecurrency_result[0].value + + availability_result = availability_parse.find(data) + if availability_result: + value['availability'] = availability_result[0].value + + if value.get('availability'): + value['availability'] = re.sub(r'(?i)^(https|http)://schema.org/', '', + value.get('availability').strip(' "\'').lower()) if value.get('availability') else None + + # Second, go dig OpenGraph which is something that jsonpath_ng cant do because of the tuples and double-dots (:) + if not value.get('price') or value.get('availability'): + logger.debug(f"Alternatively digging through OpenGraph properties for restock/price info..") + jsonpath_expr = parse('$..properties') + + for match in jsonpath_expr.find(data): + if not value.get('price'): + value['price'] = _search_prop_by_value([match.value], "price:amount") + if not value.get('availability'): + value['availability'] = _search_prop_by_value([match.value], "product:availability") + if not value.get('currency'): + value['currency'] = _search_prop_by_value([match.value], "price:currency") + logger.trace(f"Processed with Extruct in {time.time()-now:.3f}s") + + return value + + +def is_between(number, lower=None, upper=None): + """ + Check if a number is between two values. + + Parameters: + number (float): The number to check. + lower (float or None): The lower bound (inclusive). If None, no lower bound. + upper (float or None): The upper bound (inclusive). If None, no upper bound. + + Returns: + bool: True if the number is between the lower and upper bounds, False otherwise. + """ + return (lower is None or lower <= number) and (upper is None or number <= upper) + + +class perform_site_check(difference_detection_processor): + screenshot = None + xpath_data = None + + def run_changedetection(self, watch, skip_when_checksum_same=True): + if not watch: + raise Exception("Watch no longer exists.") + + # Unset any existing notification error + update_obj = {'last_notification_error': False, 'last_error': False, 'restock': Restock()} + + self.screenshot = self.fetcher.screenshot + self.xpath_data = self.fetcher.xpath_data + + # Track the content type + update_obj['content_type'] = self.fetcher.headers.get('Content-Type', '') + update_obj["last_check_status"] = self.fetcher.get_last_status_code() + + # Which restock settings to compare against? + restock_settings = watch.get('restock_settings', {}) + + # See if any tags have 'activate for individual watches in this tag/group?' enabled and use the first we find + for tag_uuid in watch.get('tags'): + tag = self.datastore.data['settings']['application']['tags'].get(tag_uuid, {}) + if tag.get('overrides_watch'): + restock_settings = tag.get('restock_settings', {}) + logger.info(f"Watch {watch.get('uuid')} - Tag '{tag.get('title')}' selected for restock settings override") + break + + + itemprop_availability = {} + try: + itemprop_availability = get_itemprop_availability(html_content=self.fetcher.content) + except MoreThanOnePriceFound as e: + # Add the real data + raise ProcessorException(message="Cannot run, more than one price detected, this plugin is only for product pages with ONE product, try the content-change detection mode.", + url=watch.get('url'), + status_code=self.fetcher.get_last_status_code(), + screenshot=self.fetcher.screenshot, + xpath_data=self.fetcher.xpath_data + ) + + # Something valid in get_itemprop_availability() by scraping metadata ? + if itemprop_availability.get('price') or itemprop_availability.get('availability'): + # Store for other usage + update_obj['restock'] = itemprop_availability + + if itemprop_availability.get('availability'): + # @todo: Configurable? + if any(substring.lower() in itemprop_availability['availability'].lower() for substring in [ + 'instock', + 'instoreonly', + 'limitedavailability', + 'onlineonly', + 'presale'] + ): + update_obj['restock']['in_stock'] = True + else: + update_obj['restock']['in_stock'] = False + + # Main detection method + fetched_md5 = None + + # store original price if not set + if itemprop_availability and itemprop_availability.get('price') and not itemprop_availability.get('original_price'): + itemprop_availability['original_price'] = itemprop_availability.get('price') + update_obj['restock']["original_price"] = itemprop_availability.get('price') + + if not self.fetcher.instock_data and not itemprop_availability.get('availability'): + raise ProcessorException( + message=f"Unable to extract restock data for this page unfortunately. (Got code {self.fetcher.get_last_status_code()} from server), no embedded stock information was found and nothing interesting in the text, try using this watch with Chrome.", + url=watch.get('url'), + status_code=self.fetcher.get_last_status_code(), + screenshot=self.fetcher.screenshot, + xpath_data=self.fetcher.xpath_data + ) + + # Nothing automatic in microdata found, revert to scraping the page + if self.fetcher.instock_data and itemprop_availability.get('availability') is None: + # 'Possibly in stock' comes from stock-not-in-stock.js when no string found above the fold. + # Careful! this does not really come from chrome/js when the watch is set to plaintext + update_obj['restock']["in_stock"] = True if self.fetcher.instock_data == 'Possibly in stock' else False + logger.debug(f"Watch UUID {watch.get('uuid')} restock check returned '{self.fetcher.instock_data}' from JS scraper.") + + # What we store in the snapshot + price = update_obj.get('restock').get('price') if update_obj.get('restock').get('price') else "" + snapshot_content = f"In Stock: {update_obj.get('restock').get('in_stock')} - Price: {price}" + + # Main detection method + fetched_md5 = hashlib.md5(snapshot_content.encode('utf-8')).hexdigest() + + # The main thing that all this at the moment comes down to :) + changed_detected = False + logger.debug(f"Watch UUID {watch.get('uuid')} restock check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}") + + # out of stock -> back in stock only? + if watch.get('restock') and watch['restock'].get('in_stock') != update_obj['restock'].get('in_stock'): + # Yes if we only care about it going to instock, AND we are in stock + if restock_settings.get('in_stock_processing') == 'in_stock_only' and update_obj['restock']['in_stock']: + changed_detected = True + + if restock_settings.get('in_stock_processing') == 'all_changes': + # All cases + changed_detected = True + + if restock_settings.get('follow_price_changes') and watch.get('restock') and update_obj.get('restock') and update_obj['restock'].get('price'): + price = float(update_obj['restock'].get('price')) + # Default to current price if no previous price found + if watch['restock'].get('original_price'): + previous_price = float(watch['restock'].get('original_price')) + # It was different, but negate it further down + if price != previous_price: + changed_detected = True + + # Minimum/maximum price limit + if update_obj.get('restock') and update_obj['restock'].get('price'): + logger.debug( + f"{watch.get('uuid')} - Change was detected, 'price_change_max' is '{restock_settings.get('price_change_max', '')}' 'price_change_min' is '{restock_settings.get('price_change_min', '')}', price from website is '{update_obj['restock'].get('price', '')}'.") + if update_obj['restock'].get('price'): + min_limit = float(restock_settings.get('price_change_min')) if restock_settings.get('price_change_min') else None + max_limit = float(restock_settings.get('price_change_max')) if restock_settings.get('price_change_max') else None + + price = float(update_obj['restock'].get('price')) + logger.debug(f"{watch.get('uuid')} after float conversion - Min limit: '{min_limit}' Max limit: '{max_limit}' Price: '{price}'") + if min_limit or max_limit: + if is_between(number=price, lower=min_limit, upper=max_limit): + # Price was between min/max limit, so there was nothing todo in any case + logger.trace(f"{watch.get('uuid')} {price} is between {min_limit} and {max_limit}, nothing to check, forcing changed_detected = False (was {changed_detected})") + changed_detected = False + else: + logger.trace(f"{watch.get('uuid')} {price} is between {min_limit} and {max_limit}, continuing normal comparison") + + # Price comparison by % + if watch['restock'].get('original_price') and changed_detected and restock_settings.get('price_change_threshold_percent'): + previous_price = float(watch['restock'].get('original_price')) + pc = float(restock_settings.get('price_change_threshold_percent')) + change = abs((price - previous_price) / previous_price * 100) + if change and change <= pc: + logger.debug(f"{watch.get('uuid')} Override change-detected to FALSE because % threshold ({pc}%) was {change:.3f}%") + changed_detected = False + else: + logger.debug(f"{watch.get('uuid')} Price change was {change:.3f}% , (threshold {pc}%)") + + # Always record the new checksum + update_obj["previous_md5"] = fetched_md5 + + return changed_detected, update_obj, snapshot_content.encode('utf-8').strip() diff --git a/changedetectionio/processors/text_json_diff/__init__.py b/changedetectionio/processors/text_json_diff/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/changedetectionio/processors/text_json_diff.py b/changedetectionio/processors/text_json_diff/processor.py similarity index 92% rename from changedetectionio/processors/text_json_diff.py rename to changedetectionio/processors/text_json_diff/processor.py index e89e469d..1de5bafb 100644 --- a/changedetectionio/processors/text_json_diff.py +++ b/changedetectionio/processors/text_json_diff/processor.py @@ -6,22 +6,23 @@ import os import re import urllib3 -from . import difference_detection_processor -from ..html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text +from changedetectionio.processors import difference_detection_processor +from changedetectionio.html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text from changedetectionio import html_tools, content_fetchers from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT -import changedetectionio.content_fetchers -from copy import deepcopy from loguru import logger urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) name = 'Webpage Text/HTML, JSON and PDF changes' description = 'Detects all text changes where possible' -json_filter_prefixes = ['json:', 'jq:'] + +json_filter_prefixes = ['json:', 'jq:', 'jqraw:'] class FilterNotFoundInResponse(ValueError): - def __init__(self, msg): + def __init__(self, msg, screenshot=None, xpath_data=None): + self.screenshot = screenshot + self.xpath_data = xpath_data ValueError.__init__(self, msg) @@ -34,14 +35,12 @@ class PDFToHTMLToolNotFound(ValueError): # (set_proxy_from_list) class perform_site_check(difference_detection_processor): - def run_changedetection(self, uuid, skip_when_checksum_same=True): + def run_changedetection(self, watch, skip_when_checksum_same=True): changed_detected = False html_content = "" screenshot = False # as bytes stripped_text_from_html = "" - # DeepCopy so we can be sure we don't accidently change anything by reference - watch = deepcopy(self.datastore.data['watching'].get(uuid)) if not watch: raise Exception("Watch no longer exists.") @@ -116,12 +115,12 @@ class perform_site_check(difference_detection_processor): # Better would be if Watch.model could access the global data also # and then use getattr https://docs.python.org/3/reference/datamodel.html#object.__getitem__ # https://realpython.com/inherit-python-dict/ instead of doing it procedurely - include_filters_from_tags = self.datastore.get_tag_overrides_for_watch(uuid=uuid, attr='include_filters') + include_filters_from_tags = self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='include_filters') # 1845 - remove duplicated filters in both group and watch include filter include_filters_rule = list(dict.fromkeys(watch.get('include_filters', []) + include_filters_from_tags)) - subtractive_selectors = [*self.datastore.get_tag_overrides_for_watch(uuid=uuid, attr='subtractive_selectors'), + subtractive_selectors = [*self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='subtractive_selectors'), *watch.get("subtractive_selectors", []), *self.datastore.data["settings"]["application"].get("global_subtractive_selectors", []) ] @@ -188,7 +187,7 @@ class perform_site_check(difference_detection_processor): append_pretty_line_formatting=not watch.is_source_type_url) if not html_content.strip(): - raise FilterNotFoundInResponse(include_filters_rule) + raise FilterNotFoundInResponse(msg=include_filters_rule, screenshot=self.fetcher.screenshot, xpath_data=self.fetcher.xpath_data) if has_subtractive_selectors: html_content = html_tools.element_removal(subtractive_selectors, html_content) @@ -219,10 +218,10 @@ class perform_site_check(difference_detection_processor): # Rewrite's the processing text based on only what diff result they want to see if watch.has_special_diff_filter_options_set() and len(watch.history.keys()): # Now the content comes from the diff-parser and not the returned HTTP traffic, so could be some differences - from .. import diff + from changedetectionio import diff # needs to not include (added) etc or it may get used twice # Replace the processed text with the preferred result - rendered_diff = diff.render_diff(previous_version_file_contents=watch.get_last_fetched_before_filters(), + rendered_diff = diff.render_diff(previous_version_file_contents=watch.get_last_fetched_text_before_filters(), newest_version_file_contents=stripped_text_from_html, include_equal=False, # not the same lines include_added=watch.get('filter_text_added', True), @@ -231,7 +230,7 @@ class perform_site_check(difference_detection_processor): line_feed_sep="\n", include_change_type_prefix=False) - watch.save_last_fetched_before_filters(text_content_before_ignored_filter) + watch.save_last_text_fetched_before_filters(text_content_before_ignored_filter) if not rendered_diff and stripped_text_from_html: # We had some content, but no differences were found @@ -246,9 +245,10 @@ class perform_site_check(difference_detection_processor): if not is_json and not empty_pages_are_a_change and len(stripped_text_from_html.strip()) == 0: raise content_fetchers.exceptions.ReplyWithContentButNoText(url=url, status_code=self.fetcher.get_last_status_code(), - screenshot=screenshot, + screenshot=self.fetcher.screenshot, has_filters=has_filter_rule, - html_content=html_content + html_content=html_content, + xpath_data=self.fetcher.xpath_data ) # We rely on the actual text in the html output.. many sites have random script vars etc, @@ -338,23 +338,17 @@ class perform_site_check(difference_detection_processor): if blocked: changed_detected = False - # Extract title as title - if is_html: - if self.datastore.data['settings']['application'].get('extract_title_as_title') or watch['extract_title_as_title']: - if not watch['title'] or not len(watch['title']): - update_obj['title'] = html_tools.extract_element(find='title', html_content=self.fetcher.content) - - logger.debug(f"Watch UUID {uuid} content check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}") + logger.debug(f"Watch UUID {watch.get('uuid')} content check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}") if changed_detected: if watch.get('check_unique_lines', False): has_unique_lines = watch.lines_contain_something_unique_compared_to_history(lines=stripped_text_from_html.splitlines()) # One or more lines? unsure? if not has_unique_lines: - logger.debug(f"check_unique_lines: UUID {uuid} didnt have anything new setting change_detected=False") + logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} didnt have anything new setting change_detected=False") changed_detected = False else: - logger.debug(f"check_unique_lines: UUID {uuid} had unique content") + logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} had unique content") # Always record the new checksum update_obj["previous_md5"] = fetched_md5 diff --git a/changedetectionio/run_basic_tests.sh b/changedetectionio/run_basic_tests.sh index d9fa9ff0..38bcd603 100755 --- a/changedetectionio/run_basic_tests.sh +++ b/changedetectionio/run_basic_tests.sh @@ -35,4 +35,8 @@ pytest tests/test_access_control.py pytest tests/test_notification.py pytest tests/test_backend.py pytest tests/test_rss.py -pytest tests/test_unique_lines.py \ No newline at end of file +pytest tests/test_unique_lines.py + +# Check file:// will pickup a file when enabled +echo "Hello world" > /tmp/test-file.txt +ALLOW_FILE_URI=yes pytest tests/test_security.py diff --git a/changedetectionio/static/js/browser-steps.js b/changedetectionio/static/js/browser-steps.js index 4e576bd4..5c5fc52a 100644 --- a/changedetectionio/static/js/browser-steps.js +++ b/changedetectionio/static/js/browser-steps.js @@ -1,14 +1,5 @@ $(document).ready(function () { - // duplicate - var csrftoken = $('input[name=csrf_token]').val(); - $.ajaxSetup({ - beforeSend: function (xhr, settings) { - if (!/^(GET|HEAD|OPTIONS|TRACE)$/i.test(settings.type) && !this.crossDomain) { - xhr.setRequestHeader("X-CSRFToken", csrftoken) - } - } - }) var browsersteps_session_id; var browser_interface_seconds_remaining = 0; var apply_buttons_disabled = false; diff --git a/changedetectionio/static/js/csrf.js b/changedetectionio/static/js/csrf.js new file mode 100644 index 00000000..4e2aca53 --- /dev/null +++ b/changedetectionio/static/js/csrf.js @@ -0,0 +1,10 @@ +$(document).ready(function () { + $.ajaxSetup({ + beforeSend: function (xhr, settings) { + if (!/^(GET|HEAD|OPTIONS|TRACE)$/i.test(settings.type) && !this.crossDomain) { + xhr.setRequestHeader("X-CSRFToken", csrftoken) + } + } + }) +}); + diff --git a/changedetectionio/static/js/diff-overview.js b/changedetectionio/static/js/diff-overview.js index 95e6dd7a..1f501529 100644 --- a/changedetectionio/static/js/diff-overview.js +++ b/changedetectionio/static/js/diff-overview.js @@ -1,13 +1,4 @@ $(document).ready(function () { - var csrftoken = $('input[name=csrf_token]').val(); - $.ajaxSetup({ - beforeSend: function (xhr, settings) { - if (!/^(GET|HEAD|OPTIONS|TRACE)$/i.test(settings.type) && !this.crossDomain) { - xhr.setRequestHeader("X-CSRFToken", csrftoken) - } - } - }) - $('.needs-localtime').each(function () { for (var option of this.options) { var dateObject = new Date(option.value * 1000); @@ -48,6 +39,12 @@ $(document).ready(function () { $("#highlightSnippet").remove(); } + // Listen for Escape key press + window.addEventListener('keydown', function (e) { + if (e.key === 'Escape') { + clean(); + } + }, false); function dragTextHandler(event) { console.log('mouseupped'); diff --git a/changedetectionio/static/js/notifications.js b/changedetectionio/static/js/notifications.js index d3a0b81a..95f3eacf 100644 --- a/changedetectionio/static/js/notifications.js +++ b/changedetectionio/static/js/notifications.js @@ -13,16 +13,6 @@ $(document).ready(function() { $('#send-test-notification').click(function (e) { e.preventDefault(); - // this can be global - var csrftoken = $('input[name=csrf_token]').val(); - $.ajaxSetup({ - beforeSend: function(xhr, settings) { - if (!/^(GET|HEAD|OPTIONS|TRACE)$/i.test(settings.type) && !this.crossDomain) { - xhr.setRequestHeader("X-CSRFToken", csrftoken) - } - } - }) - data = { notification_body: $('#notification_body').val(), notification_format: $('#notification_format').val(), diff --git a/changedetectionio/static/js/preview.js b/changedetectionio/static/js/preview.js index a9895cb2..d85dd9fd 100644 --- a/changedetectionio/static/js/preview.js +++ b/changedetectionio/static/js/preview.js @@ -40,9 +40,13 @@ if (selectElement) { if (selectedOption) { if (selectedOption.previousElementSibling) { document.getElementById('btn-previous').href = "?version=" + selectedOption.previousElementSibling.value; + } else { + document.getElementById('btn-previous').remove() } if (selectedOption.nextElementSibling) { document.getElementById('btn-next').href = "?version=" + selectedOption.nextElementSibling.value; + } else { + document.getElementById('btn-next').remove() } } diff --git a/changedetectionio/static/js/visual-selector.js b/changedetectionio/static/js/visual-selector.js index 9432ae9f..7cc54e86 100644 --- a/changedetectionio/static/js/visual-selector.js +++ b/changedetectionio/static/js/visual-selector.js @@ -2,250 +2,258 @@ // All rights reserved. // yes - this is really a hack, if you are a front-ender and want to help, please get in touch! -$(document).ready(function () { - - var current_selected_i; - var state_clicked = false; - - var c; - - // greyed out fill context - var xctx; - // redline highlight context - var ctx; - - var current_default_xpath = []; - var x_scale = 1; - var y_scale = 1; - var selector_image; - var selector_image_rect; - var selector_data; - - $('#visualselector-tab').click(function () { - $("img#selector-background").off('load'); - state_clicked = false; - current_selected_i = false; - bootstrap_visualselector(); +let runInClearMode = false; + +$(document).ready(() => { + let currentSelections = []; + let currentSelection = null; + let appendToList = false; + let c, xctx, ctx; + let xScale = 1, yScale = 1; + let selectorImage, selectorImageRect, selectorData; + + + // Global jQuery selectors with "Elem" appended + const $selectorCanvasElem = $('#selector-canvas'); + const $includeFiltersElem = $("#include_filters"); + const $selectorBackgroundElem = $("img#selector-background"); + const $selectorCurrentXpathElem = $("#selector-current-xpath span"); + const $fetchingUpdateNoticeElem = $('.fetching-update-notice'); + const $selectorWrapperElem = $("#selector-wrapper"); + + // Color constants + const FILL_STYLE_HIGHLIGHT = 'rgba(205,0,0,0.35)'; + const FILL_STYLE_GREYED_OUT = 'rgba(205,205,205,0.95)'; + const STROKE_STYLE_HIGHLIGHT = 'rgba(255,0,0, 0.9)'; + const FILL_STYLE_REDLINE = 'rgba(255,0,0, 0.1)'; + const STROKE_STYLE_REDLINE = 'rgba(225,0,0,0.9)'; + + $('#visualselector-tab').click(() => { + $selectorBackgroundElem.off('load'); + currentSelections = []; + bootstrapVisualSelector(); }); - $(document).on('keydown', function (event) { - if ($("img#selector-background").is(":visible")) { - if (event.key == "Escape") { - state_clicked = false; - ctx.clearRect(0, 0, c.width, c.height); - } + function clearReset() { + ctx.clearRect(0, 0, c.width, c.height); + + if ($includeFiltersElem.val().length) { + alert("Existing filters under the 'Filters & Triggers' tab were cleared."); } - }); + $includeFiltersElem.val(''); - // For when the page loads - if (!window.location.hash || window.location.hash != '#visualselector') { - $("img#selector-background").attr('src', ''); - return; + currentSelections = []; + + // Means we ignore the xpaths from the scraper marked as sel.highlight_as_custom_filter (it matched a previous selector) + runInClearMode = true; + + highlightCurrentSelected(); } - // Handle clearing button/link - $('#clear-selector').on('click', function (event) { - if (!state_clicked) { - alert('Oops, Nothing selected!'); + function splitToList(v) { + return v.split('\n').map(line => line.trim()).filter(line => line.length > 0); + } + + function sortScrapedElementsBySize() { + // Sort the currentSelections array by area (width * height) in descending order + selectorData['size_pos'].sort((a, b) => { + const areaA = a.width * a.height; + const areaB = b.width * b.height; + return areaB - areaA; + }); + } + + $(document).on('keydown keyup', (event) => { + if (event.code === 'ShiftLeft' || event.code === 'ShiftRight') { + appendToList = event.type === 'keydown'; + } + + if (event.type === 'keydown') { + if ($selectorBackgroundElem.is(":visible") && event.key === "Escape") { + clearReset(); + } } - state_clicked = false; - ctx.clearRect(0, 0, c.width, c.height); - xctx.clearRect(0, 0, c.width, c.height); - $("#include_filters").val(''); }); + $('#clear-selector').on('click', () => { + clearReset(); + }); + // So if they start switching between visualSelector and manual filters, stop it from rendering old filters + $('li.tab a').on('click', () => { + runInClearMode = true; + }); - bootstrap_visualselector(); + if (!window.location.hash || window.location.hash !== '#visualselector') { + $selectorBackgroundElem.attr('src', ''); + return; + } + bootstrapVisualSelector(); - function bootstrap_visualselector() { - if (1) { - // bootstrap it, this will trigger everything else - $("img#selector-background").on("error", function () { - $('.fetching-update-notice').html("<strong>Ooops!</strong> The VisualSelector tool needs atleast one fetched page, please unpause the watch and/or wait for the watch to complete fetching and then reload this page."); - $('.fetching-update-notice').css('color','#bb0000'); - $('#selector-current-xpath').hide(); - $('#clear-selector').hide(); - }).bind('load', function () { + function bootstrapVisualSelector() { + $selectorBackgroundElem + .on("error", () => { + $fetchingUpdateNoticeElem.html("<strong>Ooops!</strong> The VisualSelector tool needs at least one fetched page, please unpause the watch and/or wait for the watch to complete fetching and then reload this page.") + .css('color', '#bb0000'); + $('#selector-current-xpath, #clear-selector').hide(); + }) + .on('load', () => { console.log("Loaded background..."); c = document.getElementById("selector-canvas"); - // greyed out fill context xctx = c.getContext("2d"); - // redline highlight context ctx = c.getContext("2d"); - if ($("#include_filters").val().trim().length) { - current_default_xpath = $("#include_filters").val().split(/\r?\n/g); - } else { - current_default_xpath = []; - } - fetch_data(); - $('#selector-canvas').off("mousemove mousedown"); - // screenshot_url defined in the edit.html template - }).attr("src", screenshot_url); + fetchData(); + $selectorCanvasElem.off("mousemove mousedown"); + }) + .attr("src", screenshot_url); + + let s = `${$selectorBackgroundElem.attr('src')}?${new Date().getTime()}`; + $selectorBackgroundElem.attr('src', s); + } + + function alertIfFilterNotFound() { + let existingFilters = splitToList($includeFiltersElem.val()); + let sizePosXpaths = selectorData['size_pos'].map(sel => sel.xpath); + + for (let filter of existingFilters) { + if (!sizePosXpaths.includes(filter)) { + alert(`One or more of your existing filters was not found and will be removed when a new filter is selected.`); + break; + } } - // Tell visualSelector that the image should update - var s = $("img#selector-background").attr('src') + "?" + new Date().getTime(); - $("img#selector-background").attr('src', s) } - // This is fired once the img src is loaded in bootstrap_visualselector() - function fetch_data() { - // Image is ready - $('.fetching-update-notice').html("Fetching element data.."); + function fetchData() { + $fetchingUpdateNoticeElem.html("Fetching element data.."); $.ajax({ url: watch_visual_selector_data_url, context: document.body - }).done(function (data) { - $('.fetching-update-notice').html("Rendering.."); - selector_data = data; - console.log("Reported browser width from backend: " + data['browser_width']); - state_clicked = false; - set_scale(); - reflow_selector(); - $('.fetching-update-notice').fadeOut(); + }).done((data) => { + $fetchingUpdateNoticeElem.html("Rendering.."); + selectorData = data; + sortScrapedElementsBySize(); + console.log(`Reported browser width from backend: ${data['browser_width']}`); + + // Little sanity check for the user, alert them if something missing + alertIfFilterNotFound(); + + setScale(); + reflowSelector(); + $fetchingUpdateNoticeElem.fadeOut(); }); + } + function updateFiltersText() { + // Assuming currentSelections is already defined and contains the selections + let uniqueSelections = new Set(currentSelections.map(sel => (sel[0] === '/' ? `xpath:${sel.xpath}` : sel.xpath))); + + if (currentSelections.length > 0) { + // Convert the Set back to an array and join with newline characters + let textboxFilterText = Array.from(uniqueSelections).join("\n"); + $includeFiltersElem.val(textboxFilterText); + } } + function setScale() { + $selectorWrapperElem.show(); + selectorImage = $selectorBackgroundElem[0]; + selectorImageRect = selectorImage.getBoundingClientRect(); - function set_scale() { + $selectorCanvasElem.attr({ + 'height': selectorImageRect.height, + 'width': selectorImageRect.width + }); + $selectorWrapperElem.attr('width', selectorImageRect.width); + $('#visual-selector-heading').css('max-width', selectorImageRect.width + "px") - // some things to check if the scaling doesnt work - // - that the widths/sizes really are about the actual screen size cat elements.json |grep -o width......|sort|uniq - $("#selector-wrapper").show(); - selector_image = $("img#selector-background")[0]; - selector_image_rect = selector_image.getBoundingClientRect(); + xScale = selectorImageRect.width / selectorImage.naturalWidth; + yScale = selectorImageRect.height / selectorImage.naturalHeight; - // make the canvas the same size as the image - $('#selector-canvas').attr('height', selector_image_rect.height); - $('#selector-canvas').attr('width', selector_image_rect.width); - $('#selector-wrapper').attr('width', selector_image_rect.width); - x_scale = selector_image_rect.width / selector_data['browser_width']; - y_scale = selector_image_rect.height / selector_image.naturalHeight; - ctx.strokeStyle = 'rgba(255,0,0, 0.9)'; - ctx.fillStyle = 'rgba(255,0,0, 0.1)'; + ctx.strokeStyle = STROKE_STYLE_HIGHLIGHT; + ctx.fillStyle = FILL_STYLE_REDLINE; ctx.lineWidth = 3; - console.log("scaling set x: " + x_scale + " by y:" + y_scale); - $("#selector-current-xpath").css('max-width', selector_image_rect.width); + console.log("Scaling set x: " + xScale + " by y:" + yScale); + $("#selector-current-xpath").css('max-width', selectorImageRect.width); } - function reflow_selector() { - $(window).resize(function () { - set_scale(); - highlight_current_selected_i(); + function reflowSelector() { + $(window).resize(() => { + setScale(); + highlightCurrentSelected(); }); - var selector_currnt_xpath_text = $("#selector-current-xpath span"); - - set_scale(); - - console.log(selector_data['size_pos'].length + " selectors found"); - - // highlight the default one if we can find it in the xPath list - // or the xpath matches the default one - found = false; - if (current_default_xpath.length) { - // Find the first one that matches - // @todo In the future paint all that match - for (const c of current_default_xpath) { - for (var i = selector_data['size_pos'].length; i !== 0; i--) { - if (selector_data['size_pos'][i - 1].xpath.trim() === c.trim()) { - console.log("highlighting " + c); - current_selected_i = i - 1; - highlight_current_selected_i(); - found = true; - break; - } - } - if (found) { - break; - } - } - if (!found) { - alert("Unfortunately your existing CSS/xPath Filter was no longer found!"); - } - } + setScale(); - $('#selector-canvas').bind('mousemove', function (e) { - if (state_clicked) { - return; + console.log(selectorData['size_pos'].length + " selectors found"); + + let existingFilters = splitToList($includeFiltersElem.val()); + + selectorData['size_pos'].forEach(sel => { + if ((!runInClearMode && sel.highlight_as_custom_filter) || existingFilters.includes(sel.xpath)) { + console.log("highlighting " + c); + currentSelections.push(sel); } - ctx.clearRect(0, 0, c.width, c.height); - current_selected_i = null; + }); - // Add in offset - if ((typeof e.offsetX === "undefined" || typeof e.offsetY === "undefined") || (e.offsetX === 0 && e.offsetY === 0)) { - var targetOffset = $(e.target).offset(); + + highlightCurrentSelected(); + updateFiltersText(); + + $selectorCanvasElem.bind('mousemove', handleMouseMove.debounce(5)); + $selectorCanvasElem.bind('mousedown', handleMouseDown.debounce(5)); + $selectorCanvasElem.bind('mouseleave', highlightCurrentSelected.debounce(5)); + + function handleMouseMove(e) { + if (!e.offsetX && !e.offsetY) { + const targetOffset = $(e.target).offset(); e.offsetX = e.pageX - targetOffset.left; e.offsetY = e.pageY - targetOffset.top; } - // Reverse order - the most specific one should be deeper/"laster" - // Basically, find the most 'deepest' - var found = 0; - ctx.fillStyle = 'rgba(205,0,0,0.35)'; - // Will be sorted by smallest width*height first - for (var i = 0; i <= selector_data['size_pos'].length; i++) { - // draw all of them? let them choose somehow? - var sel = selector_data['size_pos'][i]; - // If we are in a bounding-box - if (e.offsetY > sel.top * y_scale && e.offsetY < sel.top * y_scale + sel.height * y_scale - && - e.offsetX > sel.left * y_scale && e.offsetX < sel.left * y_scale + sel.width * y_scale - - ) { - - // FOUND ONE - set_current_selected_text(sel.xpath); - ctx.strokeRect(sel.left * x_scale, sel.top * y_scale, sel.width * x_scale, sel.height * y_scale); - ctx.fillRect(sel.left * x_scale, sel.top * y_scale, sel.width * x_scale, sel.height * y_scale); - - // no need to keep digging - // @todo or, O to go out/up, I to go in - // or double click to go up/out the selector? - current_selected_i = i; - found += 1; - break; + ctx.fillStyle = FILL_STYLE_HIGHLIGHT; + + selectorData['size_pos'].forEach(sel => { + if (e.offsetY > sel.top * yScale && e.offsetY < sel.top * yScale + sel.height * yScale && + e.offsetX > sel.left * yScale && e.offsetX < sel.left * yScale + sel.width * yScale) { + setCurrentSelectedText(sel.xpath); + drawHighlight(sel); + currentSelections.push(sel); + currentSelection = sel; + highlightCurrentSelected(); + currentSelections.pop(); } - } + }) + } - }.debounce(5)); - function set_current_selected_text(s) { - selector_currnt_xpath_text[0].innerHTML = s; + function setCurrentSelectedText(s) { + $selectorCurrentXpathElem[0].innerHTML = s; } - function highlight_current_selected_i() { - if (state_clicked) { - state_clicked = false; - xctx.clearRect(0, 0, c.width, c.height); - return; - } - - var sel = selector_data['size_pos'][current_selected_i]; - if (sel[0] == '/') { - // @todo - not sure just checking / is right - $("#include_filters").val('xpath:' + sel.xpath); - } else { - $("#include_filters").val(sel.xpath); - } - xctx.fillStyle = 'rgba(205,205,205,0.95)'; - xctx.strokeStyle = 'rgba(225,0,0,0.9)'; - xctx.lineWidth = 3; - xctx.fillRect(0, 0, c.width, c.height); - // Clear out what only should be seen (make a clear/clean spot) - xctx.clearRect(sel.left * x_scale, sel.top * y_scale, sel.width * x_scale, sel.height * y_scale); - xctx.strokeRect(sel.left * x_scale, sel.top * y_scale, sel.width * x_scale, sel.height * y_scale); - state_clicked = true; - set_current_selected_text(sel.xpath); + function drawHighlight(sel) { + ctx.strokeRect(sel.left * xScale, sel.top * yScale, sel.width * xScale, sel.height * yScale); + ctx.fillRect(sel.left * xScale, sel.top * yScale, sel.width * xScale, sel.height * yScale); + } + function handleMouseDown() { + // If we are in 'appendToList' mode, grow the list, if not, just 1 + currentSelections = appendToList ? [...currentSelections, currentSelection] : [currentSelection]; + highlightCurrentSelected(); + updateFiltersText(); } + } + + function highlightCurrentSelected() { + xctx.fillStyle = FILL_STYLE_GREYED_OUT; + xctx.strokeStyle = STROKE_STYLE_REDLINE; + xctx.lineWidth = 3; + xctx.clearRect(0, 0, c.width, c.height); - $('#selector-canvas').bind('mousedown', function (e) { - highlight_current_selected_i(); + currentSelections.forEach(sel => { + //xctx.clearRect(sel.left * xScale, sel.top * yScale, sel.width * xScale, sel.height * yScale); + xctx.strokeRect(sel.left * xScale, sel.top * yScale, sel.width * xScale, sel.height * yScale); }); } - }); \ No newline at end of file diff --git a/changedetectionio/static/js/watch-settings.js b/changedetectionio/static/js/watch-settings.js index 73c66191..a55d2813 100644 --- a/changedetectionio/static/js/watch-settings.js +++ b/changedetectionio/static/js/watch-settings.js @@ -1,8 +1,8 @@ -function toggleOpacity(checkboxSelector, fieldSelector) { +function toggleOpacity(checkboxSelector, fieldSelector, inverted) { const checkbox = document.querySelector(checkboxSelector); const fields = document.querySelectorAll(fieldSelector); function updateOpacity() { - const opacityValue = checkbox.checked ? 0.6 : 1; + const opacityValue = !checkbox.checked ? (inverted ? 0.6 : 1) : (inverted ? 1 : 0.6); fields.forEach(field => { field.style.opacity = opacityValue; }); @@ -25,6 +25,7 @@ $(document).ready(function () { $('#notification-tokens-info').toggle(); }); - toggleOpacity('#time_between_check_use_default', '#time_between_check'); + toggleOpacity('#time_between_check_use_default', '#time_between_check', false); + }); diff --git a/changedetectionio/static/styles/scss/parts/_visualselector.scss b/changedetectionio/static/styles/scss/parts/_visualselector.scss index d0608c0c..17e8a659 100644 --- a/changedetectionio/static/styles/scss/parts/_visualselector.scss +++ b/changedetectionio/static/styles/scss/parts/_visualselector.scss @@ -1,6 +1,8 @@ #selector-wrapper { height: 100%; + text-align: center; + max-height: 70vh; overflow-y: scroll; position: relative; diff --git a/changedetectionio/static/styles/scss/styles.scss b/changedetectionio/static/styles/scss/styles.scss index 1c1e8b5b..b720b6d4 100644 --- a/changedetectionio/static/styles/scss/styles.scss +++ b/changedetectionio/static/styles/scss/styles.scss @@ -186,12 +186,17 @@ code { } } -.watch-tag-list { - color: var(--color-white); +.inline-tag { white-space: nowrap; - background: var(--color-text-watch-tag-list); border-radius: 5px; padding: 2px 5px; + margin-right: 4px; +} + +.watch-tag-list { + color: var(--color-white); + background: var(--color-text-watch-tag-list); + @extend .inline-tag; } .box { @@ -671,14 +676,25 @@ footer { and also iPads specifically. */ .watch-table { + /* make headings work on mobile */ + thead { + display: block; + tr { + th { + display: inline-block; + } + } + .empty-cell { + display: none; + } + } /* Force table to not be like tables anymore */ - thead, - tbody, - th, - td, - tr { - display: block; + tbody { + td, + tr { + display: block; + } } .last-checked { @@ -702,13 +718,6 @@ footer { display: inline-block; } - /* Hide table headers (but not display: none;, for accessibility) */ - thead tr { - position: absolute; - top: -9999px; - left: -9999px; - } - .pure-table td, .pure-table th { border: none; @@ -753,6 +762,7 @@ footer { thead { background-color: var(--color-background-table-thead); color: var(--color-text); + border-bottom: 1px solid var(--color-background-table-thead); } td, @@ -1021,6 +1031,11 @@ ul { border-radius: 10px; margin-bottom: 1em; display: none; + button { + /* some space if they wrap the page */ + margin-bottom: 3px; + margin-top: 3px; + } } .checkbox-uuid { @@ -1051,9 +1066,8 @@ ul { .tracking-ldjson-price-data { background-color: var(--color-background-button-green); color: #000; - padding: 3px; - border-radius: 3px; - white-space: nowrap; + opacity: 0.6; + @extend .inline-tag; } .ldjson-price-track-offer { @@ -1099,9 +1113,17 @@ ul { background-color: var(--color-background-button-cancel); color: #777; } - padding: 3px; - border-radius: 3px; - white-space: nowrap; + &.error { + background-color: var(--color-background-button-error); + color: #fff; + opacity: 0.7; + } + + svg { + vertical-align: middle; + } + + @extend .inline-tag; } #chrome-extension-link { diff --git a/changedetectionio/static/styles/styles.css b/changedetectionio/static/styles/styles.css index b09d5599..4f3fec10 100644 --- a/changedetectionio/static/styles/styles.css +++ b/changedetectionio/static/styles/styles.css @@ -531,12 +531,15 @@ code { content: url(); margin: 0 3px 0 5px; } -.watch-tag-list { - color: var(--color-white); +.inline-tag, .watch-tag-list, .tracking-ldjson-price-data, .restock-label { white-space: nowrap; - background: var(--color-text-watch-tag-list); border-radius: 5px; - padding: 2px 5px; } + padding: 2px 5px; + margin-right: 4px; } + +.watch-tag-list { + color: var(--color-white); + background: var(--color-text-watch-tag-list); } .box { max-width: 80%; @@ -863,14 +866,17 @@ footer { and also iPads specifically. */ .watch-table { + /* make headings work on mobile */ /* Force table to not be like tables anymore */ - /* Force table to not be like tables anymore */ - /* Hide table headers (but not display: none;, for accessibility) */ } - .watch-table thead, - .watch-table tbody, - .watch-table th, - .watch-table td, - .watch-table tr { + /* Force table to not be like tables anymore */ } + .watch-table thead { + display: block; } + .watch-table thead tr th { + display: inline-block; } + .watch-table thead .empty-cell { + display: none; } + .watch-table tbody td, + .watch-table tbody tr { display: block; } .watch-table .last-checked > span { vertical-align: middle; } @@ -882,10 +888,6 @@ footer { content: "Last Changed "; } .watch-table td.inline { display: inline-block; } - .watch-table thead tr { - position: absolute; - top: -9999px; - left: -9999px; } .watch-table .pure-table td, .watch-table .pure-table th { border: none; } @@ -912,7 +914,8 @@ footer { border-color: var(--color-border-table-cell); } .pure-table thead { background-color: var(--color-background-table-thead); - color: var(--color-text); } + color: var(--color-text); + border-bottom: 1px solid var(--color-background-table-thead); } .pure-table td, .pure-table th { border-left-color: var(--color-border-table-cell); } @@ -1065,6 +1068,7 @@ ul { #selector-wrapper { height: 100%; + text-align: center; max-height: 70vh; overflow-y: scroll; position: relative; } @@ -1127,6 +1131,10 @@ ul { border-radius: 10px; margin-bottom: 1em; display: none; } + #checkbox-operations button { + /* some space if they wrap the page */ + margin-bottom: 3px; + margin-top: 3px; } .checkbox-uuid > * { vertical-align: middle; } @@ -1148,9 +1156,7 @@ ul { .tracking-ldjson-price-data { background-color: var(--color-background-button-green); color: #000; - padding: 3px; - border-radius: 3px; - white-space: nowrap; } + opacity: 0.6; } .ldjson-price-track-offer { font-weight: bold; @@ -1175,16 +1181,23 @@ ul { #quick-watch-processor-type ul li > * { display: inline-block; } -.restock-label { - padding: 3px; - border-radius: 3px; - white-space: nowrap; } - .restock-label.in-stock { - background-color: var(--color-background-button-green); - color: #fff; } - .restock-label.not-in-stock { - background-color: var(--color-background-button-cancel); - color: #777; } +.restock-label.in-stock { + background-color: var(--color-background-button-green); + color: #fff; } + +.restock-label.not-in-stock { + background-color: var(--color-background-button-cancel); + color: #777; } + +.restock-label.error { + background-color: var(--color-background-button-error); + color: #fff; + opacity: 0.7; } + + +.restock-label svg { + vertical-align: middle; } + #chrome-extension-link { padding: 9px; diff --git a/changedetectionio/store.py b/changedetectionio/store.py index afa6b2ae..c3772557 100644 --- a/changedetectionio/store.py +++ b/changedetectionio/store.py @@ -18,6 +18,9 @@ import time import uuid as uuid_builder from loguru import logger +from .processors import get_custom_watch_obj_for_processor +from .processors.restock_diff import Restock + # Because the server will run as a daemon and wont know the URL for notification links when firing off a notification BASE_URL_NOT_SET_TEXT = '("Base URL" not set - see settings - notifications)' @@ -81,9 +84,13 @@ class ChangeDetectionStore: # Convert each existing watch back to the Watch.model object for uuid, watch in self.__data['watching'].items(): - watch['uuid']=uuid - self.__data['watching'][uuid] = Watch.model(datastore_path=self.datastore_path, default=watch) - logger.info(f"Watching: {uuid} {self.__data['watching'][uuid]['url']}") + self.__data['watching'][uuid] = self.rehydrate_entity(uuid, watch) + logger.info(f"Watching: {uuid} {watch['url']}") + + # And for Tags also, should be Restock type because it has extra settings + for uuid, tag in self.__data['settings']['application']['tags'].items(): + self.__data['settings']['application']['tags'][uuid] = self.rehydrate_entity(uuid, tag, processor_override='restock_diff') + logger.info(f"Tag: {uuid} {tag['title']}") # First time ran, Create the datastore. except (FileNotFoundError): @@ -124,12 +131,12 @@ class ChangeDetectionStore: self.__data['app_guid'] = str(uuid_builder.uuid4()) # Generate the URL access token for RSS feeds - if not 'rss_access_token' in self.__data['settings']['application']: + if not self.__data['settings']['application'].get('rss_access_token'): secret = secrets.token_hex(16) self.__data['settings']['application']['rss_access_token'] = secret # Generate the API access token - if not 'api_access_token' in self.__data['settings']['application']: + if not self.__data['settings']['application'].get('api_access_token'): secret = secrets.token_hex(16) self.__data['settings']['application']['api_access_token'] = secret @@ -138,6 +145,22 @@ class ChangeDetectionStore: # Finally start the thread that will manage periodic data saves to JSON save_data_thread = threading.Thread(target=self.save_datastore).start() + def rehydrate_entity(self, uuid, entity, processor_override=None): + """Set the dict back to the dict Watch object""" + entity['uuid'] = uuid + + if processor_override: + watch_class = get_custom_watch_obj_for_processor(processor_override) + entity['processor']=processor_override + else: + watch_class = get_custom_watch_obj_for_processor(entity.get('processor')) + + if entity.get('uuid') != 'text_json_diff': + logger.trace(f"Loading Watch object '{watch_class.__module__}.{watch_class.__name__}' for UUID {uuid}") + + entity = watch_class(datastore_path=self.datastore_path, default=entity) + return entity + def set_last_viewed(self, uuid, timestamp): logger.debug(f"Setting watch UUID: {uuid} last viewed to {int(timestamp)}") self.data['watching'][uuid].update({'last_viewed': int(timestamp)}) @@ -163,7 +186,6 @@ class ChangeDetectionStore: del (update_obj[dict_key]) self.__data['watching'][uuid].update(update_obj) - self.needs_write = True @property @@ -177,8 +199,11 @@ class ChangeDetectionStore: @property def has_unviewed(self): + if not self.__data.get('watching'): + return None + for uuid, watch in self.__data['watching'].items(): - if watch.viewed == False: + if watch.history_n >= 2 and watch.viewed == False: return True return False @@ -241,31 +266,7 @@ class ChangeDetectionStore: # Remove a watchs data but keep the entry (URL etc) def clear_watch_history(self, uuid): - import pathlib - - self.__data['watching'][uuid].update({ - 'browser_steps_last_error_step' : None, - 'check_count': 0, - 'fetch_time' : 0.0, - 'has_ldjson_price_data': None, - 'in_stock': None, - 'last_checked': 0, - 'last_error': False, - 'last_notification_error': False, - 'last_viewed': 0, - 'previous_md5': False, - 'previous_md5_before_filters': False, - 'remote_server_reply': None, - 'track_ldjson_price_data': None, - }) - - # JSON Data, Screenshots, Textfiles (history index and snapshots), HTML in the future etc - for item in pathlib.Path(os.path.join(self.datastore_path, uuid)).rglob("*.*"): - unlink(item) - - # Force the attr to recalculate - bump = self.__data['watching'][uuid].history - + self.__data['watching'][uuid].clear_watch() self.needs_write_urgent = True def add_watch(self, url, tag='', extras=None, tag_uuids=None, write_to_disk_now=True): @@ -342,11 +343,13 @@ class ChangeDetectionStore: if apply_extras.get('tags'): apply_extras['tags'] = list(set(apply_extras.get('tags'))) - new_watch = Watch.model(datastore_path=self.datastore_path, url=url) + # If the processor also has its own Watch implementation + watch_class = get_custom_watch_obj_for_processor(apply_extras.get('processor')) + new_watch = watch_class(datastore_path=self.datastore_path, url=url) new_uuid = new_watch.get('uuid') - logger.debug(f"Adding URL {url} - {new_uuid}") + logger.debug(f"Adding URL '{url}' - {new_uuid}") for k in ['uuid', 'history', 'last_checked', 'last_changed', 'newest_history_key', 'previous_md5', 'viewed']: if k in apply_extras: @@ -376,46 +379,6 @@ class ChangeDetectionStore: return False - # Save as PNG, PNG is larger but better for doing visual diff in the future - def save_screenshot(self, watch_uuid, screenshot: bytes, as_error=False): - if not self.data['watching'].get(watch_uuid): - return - - if as_error: - target_path = os.path.join(self.datastore_path, watch_uuid, "last-error-screenshot.png") - else: - target_path = os.path.join(self.datastore_path, watch_uuid, "last-screenshot.png") - - self.data['watching'][watch_uuid].ensure_data_dir_exists() - - with open(target_path, 'wb') as f: - f.write(screenshot) - f.close() - - - def save_error_text(self, watch_uuid, contents): - if not self.data['watching'].get(watch_uuid): - return - - self.data['watching'][watch_uuid].ensure_data_dir_exists() - target_path = os.path.join(self.datastore_path, watch_uuid, "last-error.txt") - with open(target_path, 'w') as f: - f.write(contents) - - def save_xpath_data(self, watch_uuid, data, as_error=False): - - if not self.data['watching'].get(watch_uuid): - return - if as_error: - target_path = os.path.join(self.datastore_path, watch_uuid, "elements-error.json") - else: - target_path = os.path.join(self.datastore_path, watch_uuid, "elements.json") - self.data['watching'][watch_uuid].ensure_data_dir_exists() - with open(target_path, 'w') as f: - f.write(json.dumps(data)) - f.close() - - def sync_to_json(self): logger.info("Saving JSON..") try: @@ -622,7 +585,8 @@ class ChangeDetectionStore: # Eventually almost everything todo with a watch will apply as a Tag # So we use the same model as a Watch with self.lock: - new_tag = Watch.model(datastore_path=self.datastore_path, default={ + from .model import Tag + new_tag = Tag.model(datastore_path=self.datastore_path, default={ 'title': name.strip(), 'date_created': int(time.time()) }) @@ -661,6 +625,39 @@ class ChangeDetectionStore: return next((v for v in tags if v.get('title', '').lower() == tag_name.lower()), None) + def any_watches_have_processor_by_name(self, processor_name): + for watch in self.data['watching'].values(): + if watch.get('processor') == processor_name: + return True + return False + + def get_unique_notification_tokens_available(self): + # Ask each type of watch if they have any extra notification token to add to the validation + extra_notification_tokens = {} + watch_processors_checked = set() + + for watch_uuid, watch in self.__data['watching'].items(): + processor = watch.get('processor') + if processor not in watch_processors_checked: + extra_notification_tokens.update(watch.extra_notification_token_values()) + watch_processors_checked.add(processor) + + return extra_notification_tokens + + def get_unique_notification_token_placeholders_available(self): + # The actual description of the tokens, could be combined with get_unique_notification_tokens_available instead of doing this twice + extra_notification_tokens = [] + watch_processors_checked = set() + + for watch_uuid, watch in self.__data['watching'].items(): + processor = watch.get('processor') + if processor not in watch_processors_checked: + extra_notification_tokens+=watch.extra_notification_token_placeholder_info() + watch_processors_checked.add(processor) + + return extra_notification_tokens + + def get_updates_available(self): import inspect updates_available = [] @@ -884,3 +881,30 @@ class ChangeDetectionStore: # Something custom here self.__data["watching"][uuid]['time_between_check_use_default'] = False + # Correctly set datatype for older installs where 'tag' was string and update_12 did not catch it + def update_16(self): + for uuid, watch in self.data['watching'].items(): + if isinstance(watch.get('tags'), str): + self.data['watching'][uuid]['tags'] = [] + + # Migrate old 'in_stock' values to the new Restock + def update_17(self): + for uuid, watch in self.data['watching'].items(): + if 'in_stock' in watch: + watch['restock'] = Restock({'in_stock': watch.get('in_stock')}) + del watch['in_stock'] + + # Migrate old restock settings + def update_18(self): + for uuid, watch in self.data['watching'].items(): + if not watch.get('restock_settings'): + # So we enable price following by default + self.data['watching'][uuid]['restock_settings'] = {'follow_price_changes': True} + + # Migrate and cleanoff old value + self.data['watching'][uuid]['restock_settings']['in_stock_processing'] = 'in_stock_only' if watch.get( + 'in_stock_only') else 'all_changes' + + if self.data['watching'][uuid].get('in_stock_only'): + del (self.data['watching'][uuid]['in_stock_only']) + diff --git a/changedetectionio/templates/_common_fields.html b/changedetectionio/templates/_common_fields.html index 932f3fb7..14fa9147 100644 --- a/changedetectionio/templates/_common_fields.html +++ b/changedetectionio/templates/_common_fields.html @@ -1,7 +1,7 @@ {% from '_helpers.html' import render_field %} -{% macro render_common_settings_form(form, emailprefix, settings_application) %} +{% macro render_common_settings_form(form, emailprefix, settings_application, extra_notification_token_placeholder_info) %} <div class="pure-control-group"> {{ render_field(form.notification_urls, rows=5, placeholder="Examples: Gitter - gitter://token/room @@ -107,7 +107,15 @@ <tr> <td><code>{{ '{{triggered_text}}' }}</code></td> <td>Text that tripped the trigger from filters</td> - </tr> + + {% if extra_notification_token_placeholder_info %} + {% for token in extra_notification_token_placeholder_info %} + <tr> + <td><code>{{ '{{' }}{{ token[0] }}{{ '}}' }}</code></td> + <td>{{ token[1] }}</td> + </tr> + {% endfor %} + {% endif %} </tbody> </table> <div class="pure-form-message-inline"> diff --git a/changedetectionio/templates/base.html b/changedetectionio/templates/base.html index 87018a7d..27ce8419 100644 --- a/changedetectionio/templates/base.html +++ b/changedetectionio/templates/base.html @@ -26,7 +26,11 @@ <meta name="msapplication-TileColor" content="#da532c"> <meta name="msapplication-config" content="favicons/browserconfig.xml"> <meta name="theme-color" content="#ffffff"> + <script> + const csrftoken="{{ csrf_token() }}"; + </script> <script src="{{url_for('static_content', group='js', filename='jquery-3.6.0.min.js')}}"></script> + <script src="{{url_for('static_content', group='js', filename='csrf.js')}}" defer></script> </head> <body> diff --git a/changedetectionio/templates/edit.html b/changedetectionio/templates/edit.html index 6d6d19fc..e81035e8 100644 --- a/changedetectionio/templates/edit.html +++ b/changedetectionio/templates/edit.html @@ -16,7 +16,7 @@ const email_notification_prefix=JSON.parse('{{ emailprefix|tojson }}'); {% endif %} const notification_base_url="{{url_for('ajax_callback_send_notification_test', watch_uuid=uuid)}}"; - const playwright_enabled={% if playwright_enabled %} true {% else %} false {% endif %}; + const playwright_enabled={% if playwright_enabled %}true{% else %}false{% endif %}; const recheck_proxy_start_url="{{url_for('check_proxies.start_check', uuid=uuid)}}"; const proxy_recheck_status_url="{{url_for('check_proxies.get_recheck_status', uuid=uuid)}}"; const screenshot_url="{{url_for('static_content', group='screenshot', filename=uuid)}}"; @@ -41,18 +41,16 @@ <ul> <li class="tab" id=""><a href="#general">General</a></li> <li class="tab"><a href="#request">Request</a></li> + {% if extra_tab_content %} + <li class="tab"><a href="#extras_tab">{{ extra_tab_content }}</a></li> + {% endif %} {% if playwright_enabled %} <li class="tab"><a id="browsersteps-tab" href="#browser-steps">Browser Steps</a></li> {% endif %} - {% if watch['processor'] == 'text_json_diff' %} <li class="tab"><a id="visualselector-tab" href="#visualselector">Visual Filter Selector</a></li> <li class="tab"><a href="#filters-and-triggers">Filters & Triggers</a></li> {% endif %} - - {% if watch['processor'] == 'restock_diff' %} - <li class="tab"><a href="#restock">Restock Detection</a></li> - {% endif %} <li class="tab"><a href="#notifications">Notifications</a></li> <li class="tab"><a href="#stats">Stats</a></li> </ul> @@ -69,16 +67,9 @@ {{ render_field(form.url, placeholder="https://...", required=true, class="m-d") }} <span class="pure-form-message-inline">Some sites use JavaScript to create the content, for this you should <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">use the Chrome/WebDriver Fetcher</a></span><br> <span class="pure-form-message-inline">You can use variables in the URL, perfect for inserting the current date and other logic, <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">help and examples here</a></span><br> - <span class="pure-form-message-inline"> - {% if watch['processor'] == 'text_json_diff' %} - Current mode: <strong>Webpage Text/HTML, JSON and PDF changes.</strong><br> - <a href="{{url_for('edit_page', uuid=uuid)}}?switch_processor=restock_diff" class="pure-button button-xsmall">Switch to re-stock detection mode.</a> - {% else %} - Current mode: <strong>Re-stock detection.</strong><br> - <a href="{{url_for('edit_page', uuid=uuid)}}?switch_processor=text_json_diff" class="pure-button button-xsmall">Switch to Webpage Text/HTML, JSON and PDF changes mode.</a> - {% endif %} - </span> - + </div> + <div class="pure-control-group inline-radio"> + {{ render_field(form.processor) }} </div> <div class="pure-control-group"> {{ render_field(form.title, class="m-d") }} @@ -255,7 +246,7 @@ User-Agent: wonderbra 1.0") }} {% endif %} <a href="#notifications" id="notification-setting-reset-to-default" class="pure-button button-xsmall" style="right: 20px; top: 20px; position: absolute; background-color: #5f42dd; border-radius: 4px; font-size: 70%; color: #fff">Use system defaults</a> - {{ render_common_settings_form(form, emailprefix, settings_application) }} + {{ render_common_settings_form(form, emailprefix, settings_application, extra_notification_token_placeholder_info) }} </div> </fieldset> </div> @@ -292,7 +283,7 @@ xpath://body/div/span[contains(@class, 'example-class')]", <ul> <li>JSONPath: Prefix with <code>json:</code>, use <code>json:$</code> to force re-formatting if required, <a href="https://jsonpath.com/" target="new">test your JSONPath here</a>.</li> {% if jq_support %} - <li>jq: Prefix with <code>jq:</code> and <a href="https://jqplay.org/" target="new">test your jq here</a>. Using <a href="https://stedolan.github.io/jq/" target="new">jq</a> allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation <a href="https://stedolan.github.io/jq/manual/" target="new">here</a>.</li> + <li>jq: Prefix with <code>jq:</code> and <a href="https://jqplay.org/" target="new">test your jq here</a>. Using <a href="https://stedolan.github.io/jq/" target="new">jq</a> allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation <a href="https://stedolan.github.io/jq/manual/" target="new">here</a>. Prefix <code>jqraw:</code> outputs the results as text instead of a JSON list.</li> {% else %} <li>jq support not installed</li> {% endif %} @@ -413,18 +404,12 @@ Unavailable") }} </div> </div> {% endif %} - - {% if watch['processor'] == 'restock_diff' %} - <div class="tab-pane-inner" id="restock"> - <fieldset> - <div class="pure-control-group"> - {{ render_checkbox_field(form.in_stock_only) }} - <span class="pure-form-message-inline">Only trigger notifications when page changes from <strong>out of stock</strong> to <strong>back in stock</strong></span> - </div> - </fieldset> + {# rendered sub Template #} + {% if extra_form_content %} + <div class="tab-pane-inner" id="extras_tab"> + {{ extra_form_content|safe }} </div> - {% endif %} - + {% endif %} {% if watch['processor'] == 'text_json_diff' %} <div class="tab-pane-inner visual-selector-ui" id="visualselector"> <img class="beta-logo" src="{{url_for('static_content', group='images', filename='beta-logo.png')}}" alt="New beta functionality"> @@ -432,9 +417,8 @@ Unavailable") }} <fieldset> <div class="pure-control-group"> {% if visualselector_enabled %} - <span class="pure-form-message-inline"> - The Visual Selector tool lets you select the <i>text</i> elements that will be used for the change detection ‐ after the <i>Browser Steps</i> has completed.<br> - This tool is a helper to manage filters in the "CSS/JSONPath/JQ/XPath Filters" box of the <a href="#filters-and-triggers">Filters & Triggers</a> tab. + <span class="pure-form-message-inline" id="visual-selector-heading"> + The Visual Selector tool lets you select the <i>text</i> elements that will be used for the change detection. It automatically fills-in the filters in the "CSS/JSONPath/JQ/XPath Filters" box of the <a href="#filters-and-triggers">Filters & Triggers</a> tab. Use <strong>Shift+Click</strong> to select multiple items. </span> <div id="selector-header"> @@ -495,6 +479,12 @@ Unavailable") }} </tr> </tbody> </table> + {% if watch.history_n %} + <p> + <a href="{{url_for('watch_get_latest_html', uuid=uuid)}}" class="pure-button button-small">Download latest HTML snapshot</a> + </p> + {% endif %} + </div> </div> <div id="actions"> diff --git a/changedetectionio/templates/preview.html b/changedetectionio/templates/preview.html index 8bc231e1..28431fe9 100644 --- a/changedetectionio/templates/preview.html +++ b/changedetectionio/templates/preview.html @@ -66,7 +66,7 @@ </div> <div class="tab-pane-inner" id="text"> - <div class="snapshot-age">{{ watch.snapshot_text_ctime|format_timestamp_timeago }}</div> + <div class="snapshot-age">{{ current_version|format_timestamp_timeago }}</div> <span class="ignored">Grey lines are ignored</span> <span class="triggered">Blue lines are triggers</span> <span class="tip"><strong>Pro-tip</strong>: Highlight text to add to ignore filters</span> diff --git a/changedetectionio/templates/settings.html b/changedetectionio/templates/settings.html index 0e3cea34..f1131f94 100644 --- a/changedetectionio/templates/settings.html +++ b/changedetectionio/templates/settings.html @@ -62,6 +62,9 @@ <span class="pure-form-message-inline">Allow access to view watch diff page when password is enabled (Good for sharing the diff page) </span> </div> + <div class="pure-control-group"> + {{ render_checkbox_field(form.application.form.rss_hide_muted_watches) }} + </div> <div class="pure-control-group"> {{ render_field(form.application.form.pager_size) }} <span class="pure-form-message-inline">Number of items per page in the watch overview list, 0 to disable.</span> @@ -73,7 +76,7 @@ </div> <div class="pure-control-group"> {{ render_checkbox_field(form.application.form.empty_pages_are_a_change) }} - <span class="pure-form-message-inline">When a page contains HTML, but no renderable text appears (empty page), is this considered a change?</span> + <span class="pure-form-message-inline">When a request returns no content, or the HTML does not contain any text, is this considered a change?</span> </div> {% if form.requests.proxy %} <div class="pure-control-group inline-radio"> @@ -89,7 +92,7 @@ <div class="tab-pane-inner" id="notifications"> <fieldset> <div class="field-group"> - {{ render_common_settings_form(form.application.form, emailprefix, settings_application) }} + {{ render_common_settings_form(form.application.form, emailprefix, settings_application, extra_notification_token_placeholder_info) }} </div> </fieldset> <div class="pure-control-group" id="notification-base-url"> diff --git a/changedetectionio/templates/watch-overview.html b/changedetectionio/templates/watch-overview.html index 15f538fb..736e19da 100644 --- a/changedetectionio/templates/watch-overview.html +++ b/changedetectionio/templates/watch-overview.html @@ -59,6 +59,11 @@ {% set sort_order = sort_order or 'asc' %} {% set sort_attribute = sort_attribute or 'last_changed' %} {% set pagination_page = request.args.get('page', 0) %} + {% set cols_required = 6 %} + {% set any_has_restock_price_processor = datastore.any_watches_have_processor_by_name("restock_diff") %} + {% if any_has_restock_price_processor %} + {% set cols_required = cols_required + 1 %} + {% endif %} <div id="watch-table-wrapper"> @@ -68,17 +73,20 @@ {% set link_order = "desc" if sort_order == 'asc' else "asc" %} {% set arrow_span = "" %} <th><input style="vertical-align: middle" type="checkbox" id="check-all" > <a class="{{ 'active '+link_order if sort_attribute == 'date_created' else 'inactive' }}" href="{{url_for('index', sort='date_created', order=link_order, tag=active_tag_uuid)}}"># <span class='arrow {{link_order}}'></span></a></th> - <th></th> + <th class="empty-cell"></th> <th><a class="{{ 'active '+link_order if sort_attribute == 'label' else 'inactive' }}" href="{{url_for('index', sort='label', order=link_order, tag=active_tag_uuid)}}">Website <span class='arrow {{link_order}}'></span></a></th> + {% if any_has_restock_price_processor %} + <th>Restock & Price</th> + {% endif %} <th><a class="{{ 'active '+link_order if sort_attribute == 'last_checked' else 'inactive' }}" href="{{url_for('index', sort='last_checked', order=link_order, tag=active_tag_uuid)}}">Last Checked <span class='arrow {{link_order}}'></span></a></th> <th><a class="{{ 'active '+link_order if sort_attribute == 'last_changed' else 'inactive' }}" href="{{url_for('index', sort='last_changed', order=link_order, tag=active_tag_uuid)}}">Last Changed <span class='arrow {{link_order}}'></span></a></th> - <th></th> + <th class="empty-cell"></th> </tr> </thead> <tbody> {% if not watches|length %} <tr> - <td colspan="6">No website watches configured, please add a URL in the box above, or <a href="{{ url_for('import_page')}}" >import a list</a>.</td> + <td colspan="{{ cols_required }}" style="text-wrap: wrap;">No website watches configured, please add a URL in the box above, or <a href="{{ url_for('import_page')}}" >import a list</a>.</td> </tr> {% endif %} {% for watch in (watches|sort(attribute=sort_attribute, reverse=sort_order == 'asc'))|pagination_slice(skip=pagination.skip) %} @@ -91,6 +99,7 @@ {% if watch.last_notification_error is defined and watch.last_notification_error != False %}error{% endif %} {% if watch.paused is defined and watch.paused != False %}paused{% endif %} {% if is_unviewed %}unviewed{% endif %} + {% if watch.has_restock_info %} has-restock-info {% if watch['restock']['in_stock'] %}in-stock{% else %}not-in-stock{% endif %} {% else %}no-restock-info{% endif %} {% if watch.uuid in queued_uuids %}queued{% endif %}"> <td class="inline checkbox-uuid" ><input name="uuids" type="checkbox" value="{{ watch.uuid}} " > <span>{{ loop.index+pagination.skip }}</span></td> <td class="inline watch-controls"> @@ -135,30 +144,39 @@ {% if watch['processor'] == 'text_json_diff' %} {% if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data'] %} - <div class="ldjson-price-track-offer">Embedded price data detected, follow only price data? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div> - {% endif %} - {% if watch['track_ldjson_price_data'] == 'accepted' %} - <span class="tracking-ldjson-price-data" title="Automatically following embedded price information"><img src="{{url_for('static_content', group='images', filename='price-tag-icon.svg')}}" class="status-icon price-follow-tag-icon" > Price</span> + <div class="ldjson-price-track-offer">Switch to Restock & Price watch mode? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div> {% endif %} {% endif %} - - {% if watch['processor'] == 'restock_diff' %} - <span class="restock-label {{'in-stock' if watch['in_stock'] else 'not-in-stock' }}" title="detecting restock conditions"> - <!-- maybe some object watch['processor'][restock_diff] or.. --> - {% if watch['last_checked'] and watch['in_stock'] != None %} - {% if watch['in_stock'] %} In stock {% else %} Not in stock {% endif %} - {% else %} - Not yet checked - {% endif %} - </span> + {% if watch['processor'] == 'restock_diff' %} + <span class="tracking-ldjson-price-data" title="Automatically following embedded price information"><img src="{{url_for('static_content', group='images', filename='price-tag-icon.svg')}}" class="status-icon price-follow-tag-icon" > Price</span> {% endif %} - - {% for watch_tag_uuid, watch_tag in datastore.get_all_tags_for_watch(watch['uuid']).items() %} <span class="watch-tag-list">{{ watch_tag.title }}</span> {% endfor %} + </td> + <!-- @todo make it so any watch handler obj can expose this ---> +{% if any_has_restock_price_processor %} + <td class="restock-and-price"> + {% if watch['processor'] == 'restock_diff' %} + {% if watch.has_restock_info %} + <span class="restock-label {{'in-stock' if watch['restock']['in_stock'] else 'not-in-stock' }}" title="Detecting restock and price"> + <!-- maybe some object watch['processor'][restock_diff] or.. --> + {% if watch['restock']['in_stock'] %} In stock {% else %} Not in stock {% endif %} + </span> + {% endif %} + {% if watch.get('restock') and watch['restock']['price'] != None %} + {% if watch['restock']['price'] != None %} + <span class="restock-label price" title="Price"> + {{ watch['restock']['price']|format_number_locale }} {{ watch['restock']['currency'] }} + </span> + {% endif %} + {% elif not watch.has_restock_info %} + <span class="restock-label error">No information</span> + {% endif %} + {% endif %} </td> +{% endif %} <td class="last-checked" data-timestamp="{{ watch.last_checked }}">{{watch|format_last_checked_time|safe}}</td> <td class="last-changed" data-timestamp="{{ watch.last_changed }}">{% if watch.history_n >=2 and watch.last_changed >0 %} {{watch.last_changed|format_timestamp_timeago}} diff --git a/changedetectionio/tests/conftest.py b/changedetectionio/tests/conftest.py index 7a328823..50f7104b 100644 --- a/changedetectionio/tests/conftest.py +++ b/changedetectionio/tests/conftest.py @@ -1,4 +1,7 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 +import resource +import time +from threading import Thread import pytest from changedetectionio import changedetection_app @@ -23,6 +26,36 @@ def reportlog(pytestconfig): yield logger.remove(handler_id) + +def track_memory(memory_usage, ): + while not memory_usage["stop"]: + max_rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss + memory_usage["peak"] = max(memory_usage["peak"], max_rss) + time.sleep(0.01) # Adjust the sleep time as needed + +@pytest.fixture(scope='function') +def measure_memory_usage(request): + memory_usage = {"peak": 0, "stop": False} + tracker_thread = Thread(target=track_memory, args=(memory_usage,)) + tracker_thread.start() + + yield + + memory_usage["stop"] = True + tracker_thread.join() + + # Note: ru_maxrss is in kilobytes on Unix-based systems + max_memory_used = memory_usage["peak"] / 1024 # Convert to MB + s = f"Peak memory used by the test {request.node.fspath} - '{request.node.name}': {max_memory_used:.2f} MB" + logger.debug(s) + + with open("test-memory.log", 'a') as f: + f.write(f"{s}\n") + + # Assert that the memory usage is less than 200MB +# assert max_memory_used < 150, f"Memory usage exceeded 200MB: {max_memory_used:.2f} MB" + + def cleanup(datastore_path): import glob # Unlink test output files diff --git a/changedetectionio/tests/custom_browser_url/test_custom_browser_url.py b/changedetectionio/tests/custom_browser_url/test_custom_browser_url.py index 212d2e27..87490a77 100644 --- a/changedetectionio/tests/custom_browser_url/test_custom_browser_url.py +++ b/changedetectionio/tests/custom_browser_url/test_custom_browser_url.py @@ -1,4 +1,4 @@ -# !/usr/bin/python3 +#!/usr/bin/env python3 import os from flask import url_for @@ -77,13 +77,13 @@ def do_test(client, live_server, make_test_use_extra_browser=False): # Requires playwright to be installed -def test_request_via_custom_browser_url(client, live_server): +def test_request_via_custom_browser_url(client, live_server, measure_memory_usage): live_server_setup(live_server) # We do this so we can grep the logs of the custom container and see if the request actually went through that container do_test(client, live_server, make_test_use_extra_browser=True) -def test_request_not_via_custom_browser_url(client, live_server): +def test_request_not_via_custom_browser_url(client, live_server, measure_memory_usage): live_server_setup(live_server) # We do this so we can grep the logs of the custom container and see if the request actually went through that container do_test(client, live_server, make_test_use_extra_browser=False) diff --git a/changedetectionio/tests/fetchers/conftest.py b/changedetectionio/tests/fetchers/conftest.py index 430513d4..9a4bf56b 100644 --- a/changedetectionio/tests/fetchers/conftest.py +++ b/changedetectionio/tests/fetchers/conftest.py @@ -1,3 +1,3 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 from .. import conftest diff --git a/changedetectionio/tests/fetchers/test_content.py b/changedetectionio/tests/fetchers/test_content.py index bbabe5f1..8d468cd4 100644 --- a/changedetectionio/tests/fetchers/test_content.py +++ b/changedetectionio/tests/fetchers/test_content.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 import time from flask import url_for @@ -6,7 +6,7 @@ from ..util import live_server_setup, wait_for_all_checks import logging # Requires playwright to be installed -def test_fetch_webdriver_content(client, live_server): +def test_fetch_webdriver_content(client, live_server, measure_memory_usage): live_server_setup(live_server) ##################### diff --git a/changedetectionio/tests/fetchers/test_custom_js_before_content.py b/changedetectionio/tests/fetchers/test_custom_js_before_content.py index bec4334a..24d715b3 100644 --- a/changedetectionio/tests/fetchers/test_custom_js_before_content.py +++ b/changedetectionio/tests/fetchers/test_custom_js_before_content.py @@ -3,7 +3,7 @@ from flask import url_for from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client -def test_execute_custom_js(client, live_server): +def test_execute_custom_js(client, live_server, measure_memory_usage): live_server_setup(live_server) assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test" diff --git a/changedetectionio/tests/proxy_list/conftest.py b/changedetectionio/tests/proxy_list/conftest.py index 95812e2e..0adb1c43 100644 --- a/changedetectionio/tests/proxy_list/conftest.py +++ b/changedetectionio/tests/proxy_list/conftest.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 from .. import conftest diff --git a/changedetectionio/tests/proxy_list/test_multiple_proxy.py b/changedetectionio/tests/proxy_list/test_multiple_proxy.py index f2e0d1a4..e312df5a 100644 --- a/changedetectionio/tests/proxy_list/test_multiple_proxy.py +++ b/changedetectionio/tests/proxy_list/test_multiple_proxy.py @@ -1,11 +1,11 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 import os from flask import url_for from ..util import live_server_setup, wait_for_all_checks -def test_preferred_proxy(client, live_server): +def test_preferred_proxy(client, live_server, measure_memory_usage): live_server_setup(live_server) url = "http://chosen.changedetection.io" diff --git a/changedetectionio/tests/proxy_list/test_noproxy.py b/changedetectionio/tests/proxy_list/test_noproxy.py index 5a3cbce2..976fce4f 100644 --- a/changedetectionio/tests/proxy_list/test_noproxy.py +++ b/changedetectionio/tests/proxy_list/test_noproxy.py @@ -1,11 +1,11 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 import time from flask import url_for from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client -def test_noproxy_option(client, live_server): +def test_noproxy_option(client, live_server, measure_memory_usage): live_server_setup(live_server) # Run by run_proxy_tests.sh # Call this URL then scan the containers that it never went through them diff --git a/changedetectionio/tests/proxy_list/test_proxy.py b/changedetectionio/tests/proxy_list/test_proxy.py index 1f4c5ff4..cefdce30 100644 --- a/changedetectionio/tests/proxy_list/test_proxy.py +++ b/changedetectionio/tests/proxy_list/test_proxy.py @@ -1,11 +1,11 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 import time from flask import url_for from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client # just make a request, we will grep in the docker logs to see it actually got called -def test_check_basic_change_detection_functionality(client, live_server): +def test_check_basic_change_detection_functionality(client, live_server, measure_memory_usage): live_server_setup(live_server) res = client.post( url_for("import_page"), diff --git a/changedetectionio/tests/proxy_list/test_select_custom_proxy.py b/changedetectionio/tests/proxy_list/test_select_custom_proxy.py index eb8990d3..1ae7ac69 100644 --- a/changedetectionio/tests/proxy_list/test_select_custom_proxy.py +++ b/changedetectionio/tests/proxy_list/test_select_custom_proxy.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 import time from flask import url_for @@ -6,7 +6,7 @@ from ..util import live_server_setup, wait_for_all_checks import os # just make a request, we will grep in the docker logs to see it actually got called -def test_select_custom(client, live_server): +def test_select_custom(client, live_server, measure_memory_usage): live_server_setup(live_server) # Goto settings, add our custom one diff --git a/changedetectionio/tests/proxy_socks5/test_socks5_proxy.py b/changedetectionio/tests/proxy_socks5/test_socks5_proxy.py index 8fb52bf8..f9ce166d 100644 --- a/changedetectionio/tests/proxy_socks5/test_socks5_proxy.py +++ b/changedetectionio/tests/proxy_socks5/test_socks5_proxy.py @@ -1,11 +1,11 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 import os import time from flask import url_for from changedetectionio.tests.util import live_server_setup, wait_for_all_checks -def test_socks5(client, live_server): +def test_socks5(client, live_server, measure_memory_usage): live_server_setup(live_server) # Setup a proxy diff --git a/changedetectionio/tests/proxy_socks5/test_socks5_proxy_sources.py b/changedetectionio/tests/proxy_socks5/test_socks5_proxy_sources.py index 2562249b..0da50acc 100644 --- a/changedetectionio/tests/proxy_socks5/test_socks5_proxy_sources.py +++ b/changedetectionio/tests/proxy_socks5/test_socks5_proxy_sources.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 import os import time from flask import url_for @@ -7,7 +7,7 @@ from changedetectionio.tests.util import live_server_setup, wait_for_all_checks # should be proxies.json mounted from run_proxy_tests.sh already # -v `pwd`/tests/proxy_socks5/proxies.json-example:/app/changedetectionio/test-datastore/proxies.json -def test_socks5_from_proxiesjson_file(client, live_server): +def test_socks5_from_proxiesjson_file(client, live_server, measure_memory_usage): live_server_setup(live_server) test_url = "https://changedetection.io/CHANGELOG.txt?socks-test-tag=" + os.getenv('SOCKSTEST', '') diff --git a/changedetectionio/tests/restock/conftest.py b/changedetectionio/tests/restock/conftest.py index 430513d4..9a4bf56b 100644 --- a/changedetectionio/tests/restock/conftest.py +++ b/changedetectionio/tests/restock/conftest.py @@ -1,3 +1,3 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 from .. import conftest diff --git a/changedetectionio/tests/restock/test_restock.py b/changedetectionio/tests/restock/test_restock.py index 30528cd1..38cbc98e 100644 --- a/changedetectionio/tests/restock/test_restock.py +++ b/changedetectionio/tests/restock/test_restock.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 import os import time from flask import url_for @@ -48,7 +48,7 @@ def set_back_in_stock_response(): return None # Add a site in paused mode, add an invalid filter, we should still have visual selector data ready -def test_restock_detection(client, live_server): +def test_restock_detection(client, live_server, measure_memory_usage): set_original_response() #assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test" diff --git a/changedetectionio/tests/smtp/smtp-test-server.py b/changedetectionio/tests/smtp/smtp-test-server.py index 3481ce7e..a6e3df66 100755 --- a/changedetectionio/tests/smtp/smtp-test-server.py +++ b/changedetectionio/tests/smtp/smtp-test-server.py @@ -1,42 +1,51 @@ -#!/usr/bin/python3 -import smtpd -import asyncore +#!/usr/bin/env python3 +import asyncio +from aiosmtpd.controller import Controller +from aiosmtpd.smtp import SMTP # Accept a SMTP message and offer a way to retrieve the last message via TCP Socket last_received_message = b"Nothing" -class CustomSMTPServer(smtpd.SMTPServer): - - def process_message(self, peer, mailfrom, rcpttos, data, **kwargs): +class CustomSMTPHandler: + async def handle_DATA(self, server, session, envelope): global last_received_message - last_received_message = data - print('Receiving message from:', peer) - print('Message addressed from:', mailfrom) - print('Message addressed to :', rcpttos) - print('Message length :', len(data)) - print(data.decode('utf8')) - return - - -# Just print out the last message received on plain TCP socket server -class EchoServer(asyncore.dispatcher): - - def __init__(self, host, port): - asyncore.dispatcher.__init__(self) - self.create_socket() - self.set_reuse_addr() - self.bind((host, port)) - self.listen(5) - - def handle_accepted(self, sock, addr): + last_received_message = envelope.content + print('Receiving message from:', session.peer) + print('Message addressed from:', envelope.mail_from) + print('Message addressed to :', envelope.rcpt_tos) + print('Message length :', len(envelope.content)) + print(envelope.content.decode('utf8')) + return '250 Message accepted for delivery' + + +class EchoServerProtocol(asyncio.Protocol): + def connection_made(self, transport): global last_received_message - print('Incoming connection from %s' % repr(addr)) - sock.send(last_received_message) + self.transport = transport + peername = transport.get_extra_info('peername') + print('Incoming connection from {}'.format(peername)) + self.transport.write(last_received_message) + last_received_message = b'' + self.transport.close() + + +async def main(): + # Start the SMTP server + controller = Controller(CustomSMTPHandler(), hostname='0.0.0.0', port=11025) + controller.start() + + # Start the TCP Echo server + loop = asyncio.get_running_loop() + server = await loop.create_server( + lambda: EchoServerProtocol(), + '0.0.0.0', 11080 + ) + async with server: + await server.serve_forever() -server = CustomSMTPServer(('0.0.0.0', 11025), None) # SMTP mail goes here -server2 = EchoServer('0.0.0.0', 11080) # Echo back last message received -asyncore.loop() +if __name__ == "__main__": + asyncio.run(main()) diff --git a/changedetectionio/tests/smtp/test_notification_smtp.py b/changedetectionio/tests/smtp/test_notification_smtp.py index 92a2fa82..47080a9a 100644 --- a/changedetectionio/tests/smtp/test_notification_smtp.py +++ b/changedetectionio/tests/smtp/test_notification_smtp.py @@ -32,13 +32,15 @@ def get_last_message_from_smtp_server(): client_socket.connect((smtp_test_server, port)) # connect to the server data = client_socket.recv(50024).decode() # receive response + logging.info("get_last_message_from_smtp_server..") + logging.info(data) client_socket.close() # close the connection return data # Requires running the test SMTP server -def test_check_notification_email_formats_default_HTML(client, live_server): +def test_check_notification_email_formats_default_HTML(client, live_server, measure_memory_usage): # live_server_setup(live_server) set_original_response() @@ -71,6 +73,8 @@ def test_check_notification_email_formats_default_HTML(client, live_server): wait_for_all_checks(client) set_longer_modified_response() + time.sleep(2) + client.get(url_for("form_watch_checknow"), follow_redirects=True) wait_for_all_checks(client) @@ -81,14 +85,14 @@ def test_check_notification_email_formats_default_HTML(client, live_server): # The email should have two bodies, and the text/html part should be <br> assert 'Content-Type: text/plain' in msg - assert '(added) So let\'s see what happens.\n' in msg # The plaintext part with \n + assert '(added) So let\'s see what happens.\r\n' in msg # The plaintext part with \r\n assert 'Content-Type: text/html' in msg assert '(added) So let\'s see what happens.<br>' in msg # the html part res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) assert b'Deleted' in res.data -def test_check_notification_email_formats_default_Text_override_HTML(client, live_server): +def test_check_notification_email_formats_default_Text_override_HTML(client, live_server, measure_memory_usage): # live_server_setup(live_server) # HTML problems? see this @@ -135,6 +139,7 @@ def test_check_notification_email_formats_default_Text_override_HTML(client, liv wait_for_all_checks(client) set_longer_modified_response() + time.sleep(2) client.get(url_for("form_watch_checknow"), follow_redirects=True) wait_for_all_checks(client) @@ -147,7 +152,7 @@ def test_check_notification_email_formats_default_Text_override_HTML(client, liv # The email should not have two bodies, should be TEXT only assert 'Content-Type: text/plain' in msg - assert '(added) So let\'s see what happens.\n' in msg # The plaintext part with \n + assert '(added) So let\'s see what happens.\r\n' in msg # The plaintext part with \r\n set_original_response() # Now override as HTML format @@ -168,7 +173,7 @@ def test_check_notification_email_formats_default_Text_override_HTML(client, liv # The email should have two bodies, and the text/html part should be <br> assert 'Content-Type: text/plain' in msg - assert '(removed) So let\'s see what happens.\n' in msg # The plaintext part with \n + assert '(removed) So let\'s see what happens.\r\n' in msg # The plaintext part with \n assert 'Content-Type: text/html' in msg assert '(removed) So let\'s see what happens.<br>' in msg # the html part diff --git a/changedetectionio/tests/test_add_replace_remove_filter.py b/changedetectionio/tests/test_add_replace_remove_filter.py index f64d877b..72c19c37 100644 --- a/changedetectionio/tests/test_add_replace_remove_filter.py +++ b/changedetectionio/tests/test_add_replace_remove_filter.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 import os.path import time from flask import url_for @@ -35,10 +35,10 @@ def set_original(excluding=None, add_line=None): with open("test-datastore/endpoint-content.txt", "w") as f: f.write(test_return_data) -def test_setup(client, live_server): +def test_setup(client, live_server, measure_memory_usage): live_server_setup(live_server) -def test_check_removed_line_contains_trigger(client, live_server): +def test_check_removed_line_contains_trigger(client, live_server, measure_memory_usage): # Give the endpoint time to spin up time.sleep(1) @@ -103,7 +103,7 @@ def test_check_removed_line_contains_trigger(client, live_server): assert b'Deleted' in res.data -def test_check_add_line_contains_trigger(client, live_server): +def test_check_add_line_contains_trigger(client, live_server, measure_memory_usage): #live_server_setup(live_server) # Give the endpoint time to spin up @@ -112,7 +112,7 @@ def test_check_add_line_contains_trigger(client, live_server): res = client.post( url_for("settings_page"), data={"application-notification_title": "New ChangeDetection.io Notification - {{ watch_url }}", - "application-notification_body": 'triggered text was -{{triggered_text}}-', + "application-notification_body": 'triggered text was -{{triggered_text}}- 网站监测 内容更新了', # https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation "application-notification_urls": test_notification_url, "application-minutes_between_check": 180, @@ -140,6 +140,7 @@ def test_check_add_line_contains_trigger(client, live_server): url_for("edit_page", uuid="first"), data={"trigger_text": 'Oh yes please', "url": test_url, + 'processor': 'text_json_diff', 'fetch_backend': "html_requests", 'filter_text_removed': '', 'filter_text_added': 'y'}, @@ -166,9 +167,10 @@ def test_check_add_line_contains_trigger(client, live_server): # Takes a moment for apprise to fire time.sleep(3) assert os.path.isfile("test-datastore/notification.txt"), "Notification fired because I can see the output file" - with open("test-datastore/notification.txt", 'r') as f: - response= f.read() - assert '-Oh yes please-' in response + with open("test-datastore/notification.txt", 'rb') as f: + response = f.read() + assert b'-Oh yes please-' in response + assert '网站监测 内容更新了'.encode('utf-8') in response res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) diff --git a/changedetectionio/tests/test_api.py b/changedetectionio/tests/test_api.py index 5be55ec2..55ea4635 100644 --- a/changedetectionio/tests/test_api.py +++ b/changedetectionio/tests/test_api.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 import time from flask import url_for @@ -53,10 +53,10 @@ def is_valid_uuid(val): return False -def test_setup(client, live_server): +def test_setup(client, live_server, measure_memory_usage): live_server_setup(live_server) -def test_api_simple(client, live_server): +def test_api_simple(client, live_server, measure_memory_usage): #live_server_setup(live_server) api_key = extract_api_key_from_UI(client) @@ -149,6 +149,15 @@ def test_api_simple(client, live_server): headers={'x-api-key': api_key}, ) assert b'which has this one new line' in res.data + assert b'<div id' not in res.data + + # Fetch the HTML of the latest one + res = client.get( + url_for("watchsinglehistory", uuid=watch_uuid, timestamp='latest')+"?html=1", + headers={'x-api-key': api_key}, + ) + assert b'which has this one new line' in res.data + assert b'<div id' in res.data # Fetch the whole watch res = client.get( @@ -232,7 +241,7 @@ def test_api_simple(client, live_server): ) assert len(res.json) == 0, "Watch list should be empty" -def test_access_denied(client, live_server): +def test_access_denied(client, live_server, measure_memory_usage): # `config_api_token_enabled` Should be On by default res = client.get( url_for("createwatch") @@ -278,7 +287,7 @@ def test_access_denied(client, live_server): ) assert b"Settings updated." in res.data -def test_api_watch_PUT_update(client, live_server): +def test_api_watch_PUT_update(client, live_server, measure_memory_usage): #live_server_setup(live_server) api_key = extract_api_key_from_UI(client) @@ -360,7 +369,7 @@ def test_api_watch_PUT_update(client, live_server): assert b'Deleted' in res.data -def test_api_import(client, live_server): +def test_api_import(client, live_server, measure_memory_usage): api_key = extract_api_key_from_UI(client) res = client.post( diff --git a/changedetectionio/tests/test_auth.py b/changedetectionio/tests/test_auth.py index b84f8cf7..20fa96ba 100644 --- a/changedetectionio/tests/test_auth.py +++ b/changedetectionio/tests/test_auth.py @@ -1,14 +1,13 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 import time from flask import url_for -from . util import live_server_setup +from .util import live_server_setup, wait_for_all_checks -def test_basic_auth(client, live_server): + +def test_basic_auth(client, live_server, measure_memory_usage): live_server_setup(live_server) - # Give the endpoint time to spin up - time.sleep(1) # Add our URL to the import page test_url = url_for('test_basicauth_method', _external=True).replace("//","//myuser:mypass@") @@ -19,8 +18,8 @@ def test_basic_auth(client, live_server): follow_redirects=True ) assert b"1 Imported" in res.data + wait_for_all_checks(client) time.sleep(1) - # Check form validation res = client.post( url_for("edit_page", uuid="first"), @@ -29,7 +28,7 @@ def test_basic_auth(client, live_server): ) assert b"Updated watch." in res.data - time.sleep(1) + wait_for_all_checks(client) res = client.get( url_for("preview_page", uuid="first"), follow_redirects=True diff --git a/changedetectionio/tests/test_automatic_follow_ldjson_price.py b/changedetectionio/tests/test_automatic_follow_ldjson_price.py index ff1e6330..e09661e3 100644 --- a/changedetectionio/tests/test_automatic_follow_ldjson_price.py +++ b/changedetectionio/tests/test_automatic_follow_ldjson_price.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 import time from flask import url_for @@ -76,12 +76,12 @@ def set_response_without_ldjson(): f.write(test_return_data) return None -def test_setup(client, live_server): +def test_setup(client, live_server, measure_memory_usage): live_server_setup(live_server) # actually only really used by the distll.io importer, but could be handy too -def test_check_ldjson_price_autodetect(client, live_server): - +def test_check_ldjson_price_autodetect(client, live_server, measure_memory_usage): + #live_server_setup(live_server) set_response_with_ldjson() # Add our URL to the import page @@ -100,11 +100,8 @@ def test_check_ldjson_price_autodetect(client, live_server): # Accept it uuid = extract_UUID_from_client(client) - + #time.sleep(1) client.get(url_for('price_data_follower.accept', uuid=uuid, follow_redirects=True)) - wait_for_all_checks(client) - - # Trigger a check client.get(url_for("form_watch_checknow"), follow_redirects=True) wait_for_all_checks(client) # Offer should be gone @@ -119,8 +116,8 @@ def test_check_ldjson_price_autodetect(client, live_server): headers={'x-api-key': api_key}, ) - # Should see this (dont know where the whitespace came from) - assert b'"highPrice": 8099900' in res.data + assert b'8097000' in res.data + # And not this cause its not the ld-json assert b"So let's see what happens" not in res.data @@ -159,14 +156,14 @@ def _test_runner_check_bad_format_ignored(live_server, client, has_ldjson_price_ for k,v in client.application.config.get('DATASTORE').data['watching'].items(): assert v.get('last_error') == False - assert v.get('has_ldjson_price_data') == has_ldjson_price_data + assert v.get('has_ldjson_price_data') == has_ldjson_price_data, f"Detected LDJSON data? should be {has_ldjson_price_data}" ########################################################################################## client.get(url_for("form_delete", uuid="all"), follow_redirects=True) -def test_bad_ldjson_is_correctly_ignored(client, live_server): +def test_bad_ldjson_is_correctly_ignored(client, live_server, measure_memory_usage): #live_server_setup(live_server) test_return_data = """ <html> @@ -200,35 +197,37 @@ def test_bad_ldjson_is_correctly_ignored(client, live_server): f.write(test_return_data) _test_runner_check_bad_format_ignored(live_server=live_server, client=client, has_ldjson_price_data=True) - test_return_data = """ - <html> - <head> - <script type="application/ld+json"> - { - "@context": "http://schema.org", - "@type": ["Product", "SubType"], - "name": "My test product", - "description": "", - "BrokenOffers": { - "@type": "Offer", - "offeredBy": { - "@type": "Organization", - "name":"Person", - "telephone":"+1 999 999 999" - }, - "price": "1", - "priceCurrency": "EUR", - "url": "/some/url" - } - } - </script> - </head> - <body> - <div class="yes">Some extra stuff</div> - </body></html> - """ - with open("test-datastore/endpoint-content.txt", "w") as f: - f.write(test_return_data) - - _test_runner_check_bad_format_ignored(live_server=live_server, client=client, has_ldjson_price_data=False) + # This is OK that it offers a suggestion in this case, the processor will let them know more about something wrong + + # test_return_data = """ + # <html> + # <head> + # <script type="application/ld+json"> + # { + # "@context": "http://schema.org", + # "@type": ["Product", "SubType"], + # "name": "My test product", + # "description": "", + # "BrokenOffers": { + # "@type": "Offer", + # "offeredBy": { + # "@type": "Organization", + # "name":"Person", + # "telephone":"+1 999 999 999" + # }, + # "price": "1", + # "priceCurrency": "EUR", + # "url": "/some/url" + # } + # } + # </script> + # </head> + # <body> + # <div class="yes">Some extra stuff</div> + # </body></html> + # """ + # with open("test-datastore/endpoint-content.txt", "w") as f: + # f.write(test_return_data) + # + # _test_runner_check_bad_format_ignored(live_server=live_server, client=client, has_ldjson_price_data=False) diff --git a/changedetectionio/tests/test_backend.py b/changedetectionio/tests/test_backend.py index 78c3b6fa..f7c259eb 100644 --- a/changedetectionio/tests/test_backend.py +++ b/changedetectionio/tests/test_backend.py @@ -1,9 +1,10 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 import time from flask import url_for from urllib.request import urlopen -from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks, extract_rss_token_from_UI +from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks, extract_rss_token_from_UI, \ + extract_UUID_from_client sleep_time_for_fetch_thread = 3 @@ -16,7 +17,7 @@ def test_inscriptus(): assert stripped_text_from_html == 'test!\nok man' -def test_check_basic_change_detection_functionality(client, live_server): +def test_check_basic_change_detection_functionality(client, live_server, measure_memory_usage): set_original_response() live_server_setup(live_server) @@ -62,9 +63,6 @@ def test_check_basic_change_detection_functionality(client, live_server): # Make a change set_modified_response() - res = urlopen(url_for('test_endpoint', _external=True)) - assert b'which has this one new line' in res.read() - # Force recheck res = client.get(url_for("form_watch_checknow"), follow_redirects=True) assert b'1 watches queued for rechecking.' in res.data @@ -135,12 +133,28 @@ def test_check_basic_change_detection_functionality(client, live_server): # It should have picked up the <title> assert b'head title' in res.data + # Be sure the last_viewed is going to be greater than the last snapshot + time.sleep(1) + # hit the mark all viewed link res = client.get(url_for("mark_all_viewed"), follow_redirects=True) assert b'Mark all viewed' not in res.data assert b'unviewed' not in res.data + # #2458 "clear history" should make the Watch object update its status correctly when the first snapshot lands again + uuid = extract_UUID_from_client(client) + client.get(url_for("clear_watch_history", uuid=uuid)) + client.get(url_for("form_watch_checknow"), follow_redirects=True) + wait_for_all_checks(client) + res = client.get(url_for("index")) + assert b'preview/' in res.data + + + # Check the 'get latest snapshot works' + res = client.get(url_for("watch_get_latest_html", uuid=uuid)) + assert b'<head><title>head title' in res.data + # # Cleanup everything res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) diff --git a/changedetectionio/tests/test_backup.py b/changedetectionio/tests/test_backup.py index 9f381755..c6dfd956 100644 --- a/changedetectionio/tests/test_backup.py +++ b/changedetectionio/tests/test_backup.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 from .util import set_original_response, live_server_setup, wait_for_all_checks from flask import url_for @@ -8,7 +8,7 @@ import re import time -def test_backup(client, live_server): +def test_backup(client, live_server, measure_memory_usage): live_server_setup(live_server) set_original_response() diff --git a/changedetectionio/tests/test_block_while_text_present.py b/changedetectionio/tests/test_block_while_text_present.py index 2669b52a..62ecb598 100644 --- a/changedetectionio/tests/test_block_while_text_present.py +++ b/changedetectionio/tests/test_block_while_text_present.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 import time from flask import url_for @@ -60,7 +60,7 @@ def set_modified_response_minus_block_text(): f.write(test_return_data) -def test_check_block_changedetection_text_NOT_present(client, live_server): +def test_check_block_changedetection_text_NOT_present(client, live_server, measure_memory_usage): live_server_setup(live_server) # Use a mix of case in ZzZ to prove it works case-insensitive. diff --git a/changedetectionio/tests/test_clone.py b/changedetectionio/tests/test_clone.py index 7f502be1..2e97c77e 100644 --- a/changedetectionio/tests/test_clone.py +++ b/changedetectionio/tests/test_clone.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 import time from flask import url_for @@ -6,7 +6,7 @@ from . util import live_server_setup -def test_trigger_functionality(client, live_server): +def test_trigger_functionality(client, live_server, measure_memory_usage): live_server_setup(live_server) diff --git a/changedetectionio/tests/test_css_selector.py b/changedetectionio/tests/test_css_selector.py index dcc10331..7b7d0ee8 100644 --- a/changedetectionio/tests/test_css_selector.py +++ b/changedetectionio/tests/test_css_selector.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 import time from flask import url_for @@ -70,7 +70,7 @@ def test_include_filters_output(): # Tests the whole stack works with the CSS Filter -def test_check_markup_include_filters_restriction(client, live_server): +def test_check_markup_include_filters_restriction(client, live_server, measure_memory_usage): sleep_time_for_fetch_thread = 3 include_filters = "#sametext" @@ -124,7 +124,7 @@ def test_check_markup_include_filters_restriction(client, live_server): # Tests the whole stack works with the CSS Filter -def test_check_multiple_filters(client, live_server): +def test_check_multiple_filters(client, live_server, measure_memory_usage): sleep_time_for_fetch_thread = 3 include_filters = "#blob-a\r\nxpath://*[contains(@id,'blob-b')]" @@ -180,7 +180,7 @@ def test_check_multiple_filters(client, live_server): # The filter exists, but did not contain anything useful # Mainly used when the filter contains just an IMG, this can happen when someone selects an image in the visual-selector # Tests fetcher can throw a "ReplyWithContentButNoText" exception after applying filter and extracting text -def test_filter_is_empty_help_suggestion(client, live_server): +def test_filter_is_empty_help_suggestion(client, live_server, measure_memory_usage): #live_server_setup(live_server) include_filters = "#blob-a" diff --git a/changedetectionio/tests/test_element_removal.py b/changedetectionio/tests/test_element_removal.py index 3c280d22..121d2ab9 100644 --- a/changedetectionio/tests/test_element_removal.py +++ b/changedetectionio/tests/test_element_removal.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 import time @@ -106,7 +106,7 @@ across multiple lines ) -def test_element_removal_full(client, live_server): +def test_element_removal_full(client, live_server, measure_memory_usage): sleep_time_for_fetch_thread = 3 set_original_response() diff --git a/changedetectionio/tests/test_encoding.py b/changedetectionio/tests/test_encoding.py index 4b273edf..08bdf170 100644 --- a/changedetectionio/tests/test_encoding.py +++ b/changedetectionio/tests/test_encoding.py @@ -1,9 +1,9 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # coding=utf-8 import time from flask import url_for -from .util import live_server_setup +from .util import live_server_setup, wait_for_all_checks import pytest @@ -24,12 +24,9 @@ def set_html_response(): # In the case the server does not issue a charset= or doesnt have content_type header set -def test_check_encoding_detection(client, live_server): +def test_check_encoding_detection(client, live_server, measure_memory_usage): set_html_response() - # Give the endpoint time to spin up - time.sleep(1) - # Add our URL to the import page test_url = url_for('test_endpoint', content_type="text/html", _external=True) client.post( @@ -39,7 +36,7 @@ def test_check_encoding_detection(client, live_server): ) # Give the thread time to pick it up - time.sleep(2) + wait_for_all_checks(client) res = client.get( url_for("preview_page", uuid="first"), @@ -53,12 +50,9 @@ def test_check_encoding_detection(client, live_server): # In the case the server does not issue a charset= or doesnt have content_type header set -def test_check_encoding_detection_missing_content_type_header(client, live_server): +def test_check_encoding_detection_missing_content_type_header(client, live_server, measure_memory_usage): set_html_response() - # Give the endpoint time to spin up - time.sleep(1) - # Add our URL to the import page test_url = url_for('test_endpoint', _external=True) client.post( @@ -67,8 +61,7 @@ def test_check_encoding_detection_missing_content_type_header(client, live_serve follow_redirects=True ) - # Give the thread time to pick it up - time.sleep(2) + wait_for_all_checks(client) res = client.get( url_for("preview_page", uuid="first"), diff --git a/changedetectionio/tests/test_errorhandling.py b/changedetectionio/tests/test_errorhandling.py index d439fee1..0cc159b8 100644 --- a/changedetectionio/tests/test_errorhandling.py +++ b/changedetectionio/tests/test_errorhandling.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 import time @@ -54,7 +54,7 @@ def _runner_test_http_errors(client, live_server, http_code, expected_text): assert b'Deleted' in res.data -def test_http_error_handler(client, live_server): +def test_http_error_handler(client, live_server, measure_memory_usage): _runner_test_http_errors(client, live_server, 403, 'Access denied') _runner_test_http_errors(client, live_server, 404, 'Page not found') _runner_test_http_errors(client, live_server, 500, '(Internal server error) received') @@ -63,7 +63,7 @@ def test_http_error_handler(client, live_server): assert b'Deleted' in res.data # Just to be sure error text is properly handled -def test_DNS_errors(client, live_server): +def test_DNS_errors(client, live_server, measure_memory_usage): # Give the endpoint time to spin up time.sleep(1) @@ -87,7 +87,7 @@ def test_DNS_errors(client, live_server): assert b'Deleted' in res.data # Re 1513 -def test_low_level_errors_clear_correctly(client, live_server): +def test_low_level_errors_clear_correctly(client, live_server, measure_memory_usage): #live_server_setup(live_server) # Give the endpoint time to spin up time.sleep(1) diff --git a/changedetectionio/tests/test_extract_csv.py b/changedetectionio/tests/test_extract_csv.py index 52596a9e..4616679d 100644 --- a/changedetectionio/tests/test_extract_csv.py +++ b/changedetectionio/tests/test_extract_csv.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 import time from flask import url_for @@ -9,7 +9,7 @@ sleep_time_for_fetch_thread = 3 -def test_check_extract_text_from_diff(client, live_server): +def test_check_extract_text_from_diff(client, live_server, measure_memory_usage): import time with open("test-datastore/endpoint-content.txt", "w") as f: f.write("Now it's {} seconds since epoch, time flies!".format(str(time.time()))) @@ -29,6 +29,7 @@ def test_check_extract_text_from_diff(client, live_server): # Load in 5 different numbers/changes last_date="" for n in range(5): + time.sleep(1) # Give the thread time to pick it up print("Bumping snapshot and checking.. ", n) last_date = str(time.time()) diff --git a/changedetectionio/tests/test_extract_regex.py b/changedetectionio/tests/test_extract_regex.py index 45a84800..da52da33 100644 --- a/changedetectionio/tests/test_extract_regex.py +++ b/changedetectionio/tests/test_extract_regex.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 import time from flask import url_for @@ -67,10 +67,10 @@ def set_multiline_response(): return None -def test_setup(client, live_server): +def test_setup(client, live_server, measure_memory_usage): live_server_setup(live_server) -def test_check_filter_multiline(client, live_server): +def test_check_filter_multiline(client, live_server, measure_memory_usage): #live_server_setup(live_server) set_multiline_response() @@ -122,7 +122,7 @@ def test_check_filter_multiline(client, live_server): # but the last one, which also says 'lines' shouldnt be here (non-greedy match checking) assert b'aaand something lines' not in res.data -def test_check_filter_and_regex_extract(client, live_server): +def test_check_filter_and_regex_extract(client, live_server, measure_memory_usage): include_filters = ".changetext" @@ -205,7 +205,7 @@ def test_check_filter_and_regex_extract(client, live_server): -def test_regex_error_handling(client, live_server): +def test_regex_error_handling(client, live_server, measure_memory_usage): #live_server_setup(live_server) diff --git a/changedetectionio/tests/test_filter_exist_changes.py b/changedetectionio/tests/test_filter_exist_changes.py index e57db579..dabb58af 100644 --- a/changedetectionio/tests/test_filter_exist_changes.py +++ b/changedetectionio/tests/test_filter_exist_changes.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # https://www.reddit.com/r/selfhosted/comments/wa89kp/comment/ii3a4g7/?context=3 import os @@ -41,7 +41,7 @@ def set_response_with_filter(): f.write(test_return_data) return None -def test_filter_doesnt_exist_then_exists_should_get_notification(client, live_server): +def test_filter_doesnt_exist_then_exists_should_get_notification(client, live_server, measure_memory_usage): # Filter knowingly doesn't exist, like someone setting up a known filter to see if some cinema tickets are on sale again # And the page has that filter available # Then I should get a notification diff --git a/changedetectionio/tests/test_filter_failure_notification.py b/changedetectionio/tests/test_filter_failure_notification.py index b25931ea..0cc8711d 100644 --- a/changedetectionio/tests/test_filter_failure_notification.py +++ b/changedetectionio/tests/test_filter_failure_notification.py @@ -21,10 +21,11 @@ def set_response_with_filter(): f.write(test_return_data) return None -def run_filter_test(client, content_filter): +def run_filter_test(client, live_server, content_filter): + + # Response WITHOUT the filter ID element + set_original_response() - # Give the endpoint time to spin up - time.sleep(1) # cleanup for the next client.get( url_for("form_delete", uuid="all"), @@ -79,6 +80,7 @@ def run_filter_test(client, content_filter): "include_filters": content_filter, "fetch_backend": "html_requests"}) + # A POST here will also reset the filter failure counter (filter_failure_notification_threshold_attempts) res = client.post( url_for("edit_page", uuid="first"), data=notification_form_data, @@ -91,20 +93,21 @@ def run_filter_test(client, content_filter): # Now the notification should not exist, because we didnt reach the threshold assert not os.path.isfile("test-datastore/notification.txt") - # -2 because we would have checked twice above (on adding and on edit) + # recheck it up to just before the threshold, including the fact that in the previous POST it would have rechecked (and incremented) for i in range(0, App._FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT-2): - res = client.get(url_for("form_watch_checknow"), follow_redirects=True) + client.get(url_for("form_watch_checknow"), follow_redirects=True) wait_for_all_checks(client) - assert not os.path.isfile("test-datastore/notification.txt"), f"test-datastore/notification.txt should not exist - Attempt {i}" + time.sleep(2) # delay for apprise to fire + assert not os.path.isfile("test-datastore/notification.txt"), f"test-datastore/notification.txt should not exist - Attempt {i} when threshold is {App._FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT}" # We should see something in the frontend + res = client.get(url_for("index")) assert b'Warning, no filters were found' in res.data - # One more check should trigger it (see -2 above) - client.get(url_for("form_watch_checknow"), follow_redirects=True) - wait_for_all_checks(client) + # One more check should trigger the _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT threshold client.get(url_for("form_watch_checknow"), follow_redirects=True) wait_for_all_checks(client) + time.sleep(2) # delay for apprise to fire # Now it should exist and contain our "filter not found" alert assert os.path.isfile("test-datastore/notification.txt") @@ -148,14 +151,10 @@ def run_filter_test(client, content_filter): def test_setup(live_server): live_server_setup(live_server) -def test_check_include_filters_failure_notification(client, live_server): - set_original_response() - wait_for_all_checks(client) - run_filter_test(client, '#nope-doesnt-exist') +def test_check_include_filters_failure_notification(client, live_server, measure_memory_usage): + run_filter_test(client, live_server,'#nope-doesnt-exist') -def test_check_xpath_filter_failure_notification(client, live_server): - set_original_response() - time.sleep(1) - run_filter_test(client, '//*[@id="nope-doesnt-exist"]') +def test_check_xpath_filter_failure_notification(client, live_server, measure_memory_usage): + run_filter_test(client, live_server, '//*[@id="nope-doesnt-exist"]') # Test that notification is never sent diff --git a/changedetectionio/tests/test_group.py b/changedetectionio/tests/test_group.py index 8904097d..82f994fc 100644 --- a/changedetectionio/tests/test_group.py +++ b/changedetectionio/tests/test_group.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 import time from flask import url_for @@ -6,7 +6,7 @@ from .util import live_server_setup, wait_for_all_checks, extract_rss_token_from import os -def test_setup(client, live_server): +def test_setup(client, live_server, measure_memory_usage): live_server_setup(live_server) def set_original_response(): @@ -39,7 +39,7 @@ def set_modified_response(): f.write(test_return_data) return None -def test_setup_group_tag(client, live_server): +def test_setup_group_tag(client, live_server, measure_memory_usage): #live_server_setup(live_server) set_original_response() @@ -130,7 +130,7 @@ def test_setup_group_tag(client, live_server): res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) assert b'Deleted' in res.data -def test_tag_import_singular(client, live_server): +def test_tag_import_singular(client, live_server, measure_memory_usage): #live_server_setup(live_server) test_url = url_for('test_endpoint', _external=True) @@ -150,7 +150,7 @@ def test_tag_import_singular(client, live_server): res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) assert b'Deleted' in res.data -def test_tag_add_in_ui(client, live_server): +def test_tag_add_in_ui(client, live_server, measure_memory_usage): #live_server_setup(live_server) # res = client.post( @@ -167,7 +167,7 @@ def test_tag_add_in_ui(client, live_server): res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) assert b'Deleted' in res.data -def test_group_tag_notification(client, live_server): +def test_group_tag_notification(client, live_server, measure_memory_usage): #live_server_setup(live_server) set_original_response() @@ -235,7 +235,7 @@ def test_group_tag_notification(client, live_server): res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) assert b'Deleted' in res.data -def test_limit_tag_ui(client, live_server): +def test_limit_tag_ui(client, live_server, measure_memory_usage): #live_server_setup(live_server) test_url = url_for('test_endpoint', _external=True) @@ -273,7 +273,7 @@ def test_limit_tag_ui(client, live_server): assert b'Deleted' in res.data res = client.get(url_for("tags.delete_all"), follow_redirects=True) assert b'All tags deleted' in res.data -def test_clone_tag_on_import(client, live_server): +def test_clone_tag_on_import(client, live_server, measure_memory_usage): #live_server_setup(live_server) test_url = url_for('test_endpoint', _external=True) res = client.post( @@ -298,7 +298,7 @@ def test_clone_tag_on_import(client, live_server): res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) assert b'Deleted' in res.data -def test_clone_tag_on_quickwatchform_add(client, live_server): +def test_clone_tag_on_quickwatchform_add(client, live_server, measure_memory_usage): #live_server_setup(live_server) test_url = url_for('test_endpoint', _external=True) @@ -328,7 +328,7 @@ def test_clone_tag_on_quickwatchform_add(client, live_server): res = client.get(url_for("tags.delete_all"), follow_redirects=True) assert b'All tags deleted' in res.data -def test_order_of_filters_tag_filter_and_watch_filter(client, live_server): +def test_order_of_filters_tag_filter_and_watch_filter(client, live_server, measure_memory_usage): # Add a tag with some config, import a tag and it should roughly work res = client.post( diff --git a/changedetectionio/tests/test_history_consistency.py b/changedetectionio/tests/test_history_consistency.py index a7e46a57..7f171c44 100644 --- a/changedetectionio/tests/test_history_consistency.py +++ b/changedetectionio/tests/test_history_consistency.py @@ -1,19 +1,17 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 import time import os import json import logging from flask import url_for -from .util import live_server_setup +from .util import live_server_setup, wait_for_all_checks from urllib.parse import urlparse, parse_qs -def test_consistent_history(client, live_server): +def test_consistent_history(client, live_server, measure_memory_usage): live_server_setup(live_server) - # Give the endpoint time to spin up - time.sleep(1) - r = range(1, 50) + r = range(1, 30) for one in r: test_url = url_for('test_endpoint', content_type="text/html", content=str(one), _external=True) @@ -25,15 +23,8 @@ def test_consistent_history(client, live_server): assert b"1 Imported" in res.data - time.sleep(3) - while True: - res = client.get(url_for("index")) - logging.debug("Waiting for 'Checking now' to go away..") - if b'Checking now' not in res.data: - break - time.sleep(0.5) + wait_for_all_checks(client) - time.sleep(3) # Essentially just triggers the DB write/update res = client.post( url_for("settings_page"), @@ -44,8 +35,9 @@ def test_consistent_history(client, live_server): ) assert b"Settings updated." in res.data - # Give it time to write it out - time.sleep(3) + + time.sleep(2) + json_db_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, 'url-watches.json') json_obj = None @@ -58,7 +50,7 @@ def test_consistent_history(client, live_server): # each one should have a history.txt containing just one line for w in json_obj['watching'].keys(): history_txt_index_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, w, 'history.txt') - assert os.path.isfile(history_txt_index_file), "History.txt should exist where I expect it - {}".format(history_txt_index_file) + assert os.path.isfile(history_txt_index_file), f"History.txt should exist where I expect it at {history_txt_index_file}" # Same like in model.Watch with open(history_txt_index_file, "r") as f: @@ -70,15 +62,20 @@ def test_consistent_history(client, live_server): w)) # Find the snapshot one for fname in files_in_watch_dir: - if fname != 'history.txt': + if fname != 'history.txt' and 'html' not in fname: # contents should match what we requested as content returned from the test url with open(os.path.join(live_server.app.config['DATASTORE'].datastore_path, w, fname), 'r') as snapshot_f: contents = snapshot_f.read() watch_url = json_obj['watching'][w]['url'] u = urlparse(watch_url) q = parse_qs(u[4]) - assert q['content'][0] == contents.strip(), "Snapshot file {} should contain {}".format(fname, q['content'][0]) + assert q['content'][0] == contents.strip(), f"Snapshot file {fname} should contain {q['content'][0]}" + + assert len(files_in_watch_dir) == 3, "Should be just three files in the dir, html.br snapshot, history.txt and the extracted text snapshot" - assert len(files_in_watch_dir) == 2, "Should be just two files in the dir, history.txt and the snapshot" + + json_db_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, 'url-watches.json') + with open(json_db_file, 'r') as f: + assert '"default"' not in f.read(), "'default' probably shouldnt be here, it came from when the 'default' Watch vars were accidently being saved" diff --git a/changedetectionio/tests/test_html_to_text.py b/changedetectionio/tests/test_html_to_text.py index 766a74f0..b1bc3ca3 100644 --- a/changedetectionio/tests/test_html_to_text.py +++ b/changedetectionio/tests/test_html_to_text.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 """Test suite for the method to extract text from an html string""" from ..html_tools import html_to_text diff --git a/changedetectionio/tests/test_ignore_regex_text.py b/changedetectionio/tests/test_ignore_regex_text.py index 45f73392..06c60ea4 100644 --- a/changedetectionio/tests/test_ignore_regex_text.py +++ b/changedetectionio/tests/test_ignore_regex_text.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 from . util import live_server_setup from changedetectionio import html_tools @@ -9,8 +9,6 @@ def test_setup(live_server): # Unit test of the stripper # Always we are dealing in utf-8 def test_strip_regex_text_func(): - from ..processors import text_json_diff as fetch_site_status - test_content = """ but sometimes we want to remove the lines. diff --git a/changedetectionio/tests/test_ignore_text.py b/changedetectionio/tests/test_ignore_text.py index 5d6d7149..60a2f3a2 100644 --- a/changedetectionio/tests/test_ignore_text.py +++ b/changedetectionio/tests/test_ignore_text.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 import time from flask import url_for @@ -11,9 +11,6 @@ def test_setup(live_server): # Unit test of the stripper # Always we are dealing in utf-8 def test_strip_text_func(): - from ..processors import text_json_diff as fetch_site_status - - test_content = """ Some content is listed here @@ -82,7 +79,7 @@ def set_modified_ignore_response(): f.write(test_return_data) -def test_check_ignore_text_functionality(client, live_server): +def test_check_ignore_text_functionality(client, live_server, measure_memory_usage): # Use a mix of case in ZzZ to prove it works case-insensitive. ignore_text = "XXXXX\r\nYYYYY\r\nzZzZZ\r\nnew ignore stuff" @@ -164,7 +161,7 @@ def test_check_ignore_text_functionality(client, live_server): res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) assert b'Deleted' in res.data -def test_check_global_ignore_text_functionality(client, live_server): +def test_check_global_ignore_text_functionality(client, live_server, measure_memory_usage): # Give the endpoint time to spin up time.sleep(1) diff --git a/changedetectionio/tests/test_ignorehighlighter.py b/changedetectionio/tests/test_ignorehighlighter.py index 88bd0af6..58ecfeb4 100644 --- a/changedetectionio/tests/test_ignorehighlighter.py +++ b/changedetectionio/tests/test_ignorehighlighter.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 import time from flask import url_for @@ -23,7 +23,7 @@ def set_original_ignore_response(): f.write(test_return_data) -def test_highlight_ignore(client, live_server): +def test_highlight_ignore(client, live_server, measure_memory_usage): live_server_setup(live_server) set_original_ignore_response() test_url = url_for('test_endpoint', _external=True) @@ -45,7 +45,6 @@ def test_highlight_ignore(client, live_server): ) res = client.get(url_for("edit_page", uuid=uuid)) - # should be a regex now assert b'/oh\ yeah\ \d+/' in res.data @@ -55,3 +54,7 @@ def test_highlight_ignore(client, live_server): # And it should register in the preview page res = client.get(url_for("preview_page", uuid=uuid)) assert b'
    oh yeah 456' in res.data + + # Should be in base.html + assert b'csrftoken' in res.data + diff --git a/changedetectionio/tests/test_ignorehyperlinks.py b/changedetectionio/tests/test_ignorehyperlinks.py index f917125f..d739eb58 100644 --- a/changedetectionio/tests/test_ignorehyperlinks.py +++ b/changedetectionio/tests/test_ignorehyperlinks.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 """Test suite for the render/not render anchor tag content functionality""" import time @@ -40,7 +40,7 @@ def set_modified_ignore_response(): with open("test-datastore/endpoint-content.txt", "w") as f: f.write(test_return_data) -def test_render_anchor_tag_content_true(client, live_server): +def test_render_anchor_tag_content_true(client, live_server, measure_memory_usage): """Testing that the link changes are detected when render_anchor_tag_content setting is set to true""" sleep_time_for_fetch_thread = 3 diff --git a/changedetectionio/tests/test_ignorestatuscode.py b/changedetectionio/tests/test_ignorestatuscode.py index 74999b24..9ec8086a 100644 --- a/changedetectionio/tests/test_ignorestatuscode.py +++ b/changedetectionio/tests/test_ignorestatuscode.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 import time from flask import url_for @@ -39,7 +39,7 @@ def set_some_changed_response(): f.write(test_return_data) -def test_normal_page_check_works_with_ignore_status_code(client, live_server): +def test_normal_page_check_works_with_ignore_status_code(client, live_server, measure_memory_usage): # Give the endpoint time to spin up @@ -85,7 +85,7 @@ def test_normal_page_check_works_with_ignore_status_code(client, live_server): # Tests the whole stack works with staus codes ignored -def test_403_page_check_works_with_ignore_status_code(client, live_server): +def test_403_page_check_works_with_ignore_status_code(client, live_server, measure_memory_usage): sleep_time_for_fetch_thread = 3 set_original_response() diff --git a/changedetectionio/tests/test_ignorewhitespace.py b/changedetectionio/tests/test_ignorewhitespace.py index a0db9ead..25d16244 100644 --- a/changedetectionio/tests/test_ignorewhitespace.py +++ b/changedetectionio/tests/test_ignorewhitespace.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 import time from flask import url_for @@ -49,7 +49,7 @@ def set_original_ignore_response(): # If there was only a change in the whitespacing, then we shouldnt have a change detected -def test_check_ignore_whitespace(client, live_server): +def test_check_ignore_whitespace(client, live_server, measure_memory_usage): sleep_time_for_fetch_thread = 3 # Give the endpoint time to spin up diff --git a/changedetectionio/tests/test_import.py b/changedetectionio/tests/test_import.py index ed080e0e..4b25d654 100644 --- a/changedetectionio/tests/test_import.py +++ b/changedetectionio/tests/test_import.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 import io import os import time @@ -8,10 +8,10 @@ from flask import url_for from .util import live_server_setup, wait_for_all_checks -def test_setup(client, live_server): +def test_setup(client, live_server, measure_memory_usage): live_server_setup(live_server) -def test_import(client, live_server): +def test_import(client, live_server, measure_memory_usage): # Give the endpoint time to spin up wait_for_all_checks(client) @@ -34,7 +34,7 @@ https://example.com tag1, other tag""" res = client.get( url_for("index")) res = client.get( url_for("index")) -def xtest_import_skip_url(client, live_server): +def xtest_import_skip_url(client, live_server, measure_memory_usage): # Give the endpoint time to spin up @@ -57,7 +57,7 @@ def xtest_import_skip_url(client, live_server): # Clear flask alerts res = client.get( url_for("index")) -def test_import_distillio(client, live_server): +def test_import_distillio(client, live_server, measure_memory_usage): distill_data=''' { @@ -123,7 +123,7 @@ def test_import_distillio(client, live_server): # Clear flask alerts res = client.get(url_for("index")) -def test_import_custom_xlsx(client, live_server): +def test_import_custom_xlsx(client, live_server, measure_memory_usage): """Test can upload a excel spreadsheet and the watches are created correctly""" #live_server_setup(live_server) @@ -172,7 +172,7 @@ def test_import_custom_xlsx(client, live_server): res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) assert b'Deleted' in res.data -def test_import_watchete_xlsx(client, live_server): +def test_import_watchete_xlsx(client, live_server, measure_memory_usage): """Test can upload a excel spreadsheet and the watches are created correctly""" #live_server_setup(live_server) diff --git a/changedetectionio/tests/test_jinja2.py b/changedetectionio/tests/test_jinja2.py index 1e08691b..fba9f227 100644 --- a/changedetectionio/tests/test_jinja2.py +++ b/changedetectionio/tests/test_jinja2.py @@ -1,15 +1,15 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 import time from flask import url_for from .util import live_server_setup, wait_for_all_checks -def test_setup(client, live_server): +def test_setup(client, live_server, measure_memory_usage): live_server_setup(live_server) # If there was only a change in the whitespacing, then we shouldnt have a change detected -def test_jinja2_in_url_query(client, live_server): +def test_jinja2_in_url_query(client, live_server, measure_memory_usage): #live_server_setup(live_server) # Add our URL to the import page @@ -34,7 +34,7 @@ def test_jinja2_in_url_query(client, live_server): assert b'date=2' in res.data # https://techtonics.medium.com/secure-templating-with-jinja2-understanding-ssti-and-jinja2-sandbox-environment-b956edd60456 -def test_jinja2_security_url_query(client, live_server): +def test_jinja2_security_url_query(client, live_server, measure_memory_usage): #live_server_setup(live_server) # Add our URL to the import page diff --git a/changedetectionio/tests/test_jsonpath_jq_selector.py b/changedetectionio/tests/test_jsonpath_jq_selector.py index 1202849f..d139e1cf 100644 --- a/changedetectionio/tests/test_jsonpath_jq_selector.py +++ b/changedetectionio/tests/test_jsonpath_jq_selector.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # coding=utf-8 import time @@ -41,19 +41,26 @@ and it can also be repeated from .. import html_tools # See that we can find the second ', + # LD JSON + '', + '' + # Microdata + '

    Example Product

    This is a sample product description.

    Price: $$$PRICE$$

    ' +] + +out_of_stock_props = [ + # out of stock AND contains multiples + '' +] + +def set_original_response(props_markup='', price="121.95"): + + props_markup=props_markup.replace('$$PRICE$$', price) + test_return_data = f""" + + Some initial text
    +

    Which is across multiple lines

    +
    + So let's see what happens.
    +
    price: ${price}
    + {props_markup} + + + """ + + with open("test-datastore/endpoint-content.txt", "w") as f: + f.write(test_return_data) + time.sleep(1) + return None + + + + +def test_setup(client, live_server): + + live_server_setup(live_server) + +def test_restock_itemprop_basic(client, live_server): + + #live_server_setup(live_server) + + test_url = url_for('test_endpoint', _external=True) + + # By default it should enable ('in_stock_processing') == 'all_changes' + + for p in instock_props: + set_original_response(props_markup=p) + client.post( + url_for("form_quick_watch_add"), + data={"url": test_url, "tags": 'restock tests', 'processor': 'restock_diff'}, + follow_redirects=True + ) + wait_for_all_checks(client) + res = client.get(url_for("index")) + assert b'more than one price detected' not in res.data + assert b'has-restock-info' in res.data + assert b' in-stock' in res.data + assert b' not-in-stock' not in res.data + res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) + assert b'Deleted' in res.data + + + for p in out_of_stock_props: + set_original_response(props_markup=p) + client.post( + url_for("form_quick_watch_add"), + data={"url": test_url, "tags": '', 'processor': 'restock_diff'}, + follow_redirects=True + ) + wait_for_all_checks(client) + res = client.get(url_for("index")) + + assert b'has-restock-info not-in-stock' in res.data + + res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) + assert b'Deleted' in res.data + +def test_itemprop_price_change(client, live_server): + #live_server_setup(live_server) + + # Out of the box 'Follow price changes' should be ON + test_url = url_for('test_endpoint', _external=True) + + set_original_response(props_markup=instock_props[0], price="190.95") + client.post( + url_for("form_quick_watch_add"), + data={"url": test_url, "tags": 'restock tests', 'processor': 'restock_diff'}, + follow_redirects=True + ) + + # A change in price, should trigger a change by default + wait_for_all_checks(client) + res = client.get(url_for("index")) + assert b'190.95' in res.data + + # basic price change, look for notification + set_original_response(props_markup=instock_props[0], price='180.45') + client.get(url_for("form_watch_checknow"), follow_redirects=True) + wait_for_all_checks(client) + res = client.get(url_for("index")) + assert b'180.45' in res.data + assert b'unviewed' in res.data + client.get(url_for("mark_all_viewed"), follow_redirects=True) + + # turning off price change trigger, but it should show the new price, with no change notification + set_original_response(props_markup=instock_props[0], price='120.45') + res = client.post( + url_for("edit_page", uuid="first"), + data={"restock_settings-follow_price_changes": "", "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests"}, + follow_redirects=True + ) + assert b"Updated watch." in res.data + client.get(url_for("form_watch_checknow"), follow_redirects=True) + wait_for_all_checks(client) + res = client.get(url_for("index")) + assert b'120.45' in res.data + assert b'unviewed' not in res.data + + + res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) + assert b'Deleted' in res.data + +def _run_test_minmax_limit(client, extra_watch_edit_form): + + res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) + assert b'Deleted' in res.data + + test_url = url_for('test_endpoint', _external=True) + + set_original_response(props_markup=instock_props[0], price="950.95") + client.post( + url_for("form_quick_watch_add"), + data={"url": test_url, "tags": 'restock tests', 'processor': 'restock_diff'}, + follow_redirects=True + ) + + # A change in price, should trigger a change by default + wait_for_all_checks(client) + + data = { + "tags": "", + "url": test_url, + "headers": "", + 'fetch_backend': "html_requests" + } + data.update(extra_watch_edit_form) + res = client.post( + url_for("edit_page", uuid="first"), + data=data, + follow_redirects=True + ) + assert b"Updated watch." in res.data + wait_for_all_checks(client) + + client.get(url_for("mark_all_viewed")) + + # price changed to something greater than min (900), BUT less than max (1100).. should be no change + set_original_response(props_markup=instock_props[0], price='1000.45') + client.get(url_for("form_watch_checknow")) + wait_for_all_checks(client) + res = client.get(url_for("index")) + + assert b'more than one price detected' not in res.data + # BUT the new price should show, even tho its within limits + assert b'1,000.45' or b'1000.45' in res.data #depending on locale + assert b'unviewed' not in res.data + + + # price changed to something LESS than min (900), SHOULD be a change + set_original_response(props_markup=instock_props[0], price='890.45') + # let previous runs wait + time.sleep(1) + res = client.get(url_for("form_watch_checknow"), follow_redirects=True) + assert b'1 watches queued for rechecking.' in res.data + wait_for_all_checks(client) + res = client.get(url_for("index")) + assert b'890.45' in res.data + assert b'unviewed' in res.data + + client.get(url_for("mark_all_viewed")) + + # price changed to something MORE than max (1100.10), SHOULD be a change + set_original_response(props_markup=instock_props[0], price='1890.45') + client.get(url_for("form_watch_checknow"), follow_redirects=True) + wait_for_all_checks(client) + res = client.get(url_for("index")) + assert b'1,890.45' or b'1890.45' in res.data + assert b'unviewed' in res.data + + res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) + assert b'Deleted' in res.data + + +def test_restock_itemprop_minmax(client, live_server): +# live_server_setup(live_server) + extras = { + "restock_settings-follow_price_changes": "y", + "restock_settings-price_change_min": 900.0, + "restock_settings-price_change_max": 1100.10 + } + _run_test_minmax_limit(client, extra_watch_edit_form=extras) + +def test_restock_itemprop_with_tag(client, live_server): + #live_server_setup(live_server) + + res = client.post( + url_for("tags.form_tag_add"), + data={"name": "test-tag"}, + follow_redirects=True + ) + assert b"Tag added" in res.data + + res = client.post( + url_for("tags.form_tag_edit_submit", uuid="first"), + data={"name": "test-tag", + "restock_settings-follow_price_changes": "y", + "restock_settings-price_change_min": 900.0, + "restock_settings-price_change_max": 1100.10, + "overrides_watch": "y", #overrides_watch should be restock_overrides_watch + }, + follow_redirects=True + ) + + extras = { + "tags": "test-tag" + } + + _run_test_minmax_limit(client, extra_watch_edit_form=extras) + + + +def test_itemprop_percent_threshold(client, live_server): + #live_server_setup(live_server) + + res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) + assert b'Deleted' in res.data + + test_url = url_for('test_endpoint', _external=True) + + set_original_response(props_markup=instock_props[0], price="950.95") + client.post( + url_for("form_quick_watch_add"), + data={"url": test_url, "tags": 'restock tests', 'processor': 'restock_diff'}, + follow_redirects=True + ) + + # A change in price, should trigger a change by default + wait_for_all_checks(client) + + res = client.post( + url_for("edit_page", uuid="first"), + data={"restock_settings-follow_price_changes": "y", + "restock_settings-price_change_threshold_percent": 5.0, + "url": test_url, + "tags": "", + "headers": "", + 'fetch_backend': "html_requests" + }, + follow_redirects=True + ) + assert b"Updated watch." in res.data + wait_for_all_checks(client) + + + # Basic change should not trigger + set_original_response(props_markup=instock_props[0], price='960.45') + client.get(url_for("form_watch_checknow")) + wait_for_all_checks(client) + res = client.get(url_for("index")) + assert b'960.45' in res.data + assert b'unviewed' not in res.data + + # Bigger INCREASE change than the threshold should trigger + set_original_response(props_markup=instock_props[0], price='1960.45') + client.get(url_for("form_watch_checknow")) + wait_for_all_checks(client) + res = client.get(url_for("index")) + assert b'1,960.45' or b'1960.45' in res.data #depending on locale + assert b'unviewed' in res.data + + + # Small decrease should NOT trigger + client.get(url_for("mark_all_viewed")) + set_original_response(props_markup=instock_props[0], price='1950.45') + client.get(url_for("form_watch_checknow")) + wait_for_all_checks(client) + res = client.get(url_for("index")) + assert b'1,950.45' or b'1950.45' in res.data #depending on locale + assert b'unviewed' not in res.data + + + + + res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) + assert b'Deleted' in res.data + + + +def test_change_with_notification_values(client, live_server): + #live_server_setup(live_server) + + if os.path.isfile("test-datastore/notification.txt"): + os.unlink("test-datastore/notification.txt") + + test_url = url_for('test_endpoint', _external=True) + set_original_response(props_markup=instock_props[0], price='960.45') + + notification_url = url_for('test_notification_endpoint', _external=True).replace('http', 'json') + + ###################### + # You must add a type of 'restock_diff' for its tokens to register as valid in the global settings + client.post( + url_for("form_quick_watch_add"), + data={"url": test_url, "tags": 'restock tests', 'processor': 'restock_diff'}, + follow_redirects=True + ) + + # A change in price, should trigger a change by default + wait_for_all_checks(client) + + # Should see new tokens register + res = client.get(url_for("settings_page")) + assert b'{{restock.original_price}}' in res.data + assert b'Original price at first check' in res.data + + ##################### + # Set this up for when we remove the notification from the watch, it should fallback with these details + res = client.post( + url_for("settings_page"), + data={"application-notification_urls": notification_url, + "application-notification_title": "title new price {{restock.price}}", + "application-notification_body": "new price {{restock.price}}", + "application-notification_format": default_notification_format, + "requests-time_between_check-minutes": 180, + 'application-fetch_backend': "html_requests"}, + follow_redirects=True + ) + + # check tag accepts without error + + # Check the watches in these modes add the tokens for validating + assert b"A variable or function is not defined" not in res.data + + assert b"Settings updated." in res.data + + + set_original_response(props_markup=instock_props[0], price='960.45') + # A change in price, should trigger a change by default + set_original_response(props_markup=instock_props[0], price='1950.45') + client.get(url_for("form_watch_checknow")) + wait_for_all_checks(client) + time.sleep(3) + assert os.path.isfile("test-datastore/notification.txt"), "Notification received" + with open("test-datastore/notification.txt", 'r') as f: + notification = f.read() + assert "new price 1950.45" in notification + assert "title new price 1950.45" in notification + + + +def test_data_sanity(client, live_server): + #live_server_setup(live_server) + + res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) + assert b'Deleted' in res.data + + test_url = url_for('test_endpoint', _external=True) + test_url2 = url_for('test_endpoint2', _external=True) + set_original_response(props_markup=instock_props[0], price="950.95") + client.post( + url_for("form_quick_watch_add"), + data={"url": test_url, "tags": 'restock tests', 'processor': 'restock_diff'}, + follow_redirects=True + ) + + + wait_for_all_checks(client) + res = client.get(url_for("index")) + assert b'950.95' in res.data + + # Check the restock model object doesnt store the value by mistake and used in a new one + client.post( + url_for("form_quick_watch_add"), + data={"url": test_url2, "tags": 'restock tests', 'processor': 'restock_diff'}, + follow_redirects=True + ) + wait_for_all_checks(client) + res = client.get(url_for("index")) + assert str(res.data.decode()).count("950.95") == 1, "Price should only show once (for the watch added, no other watches yet)" + + ## different test, check the edit page works on an empty request result + res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) + assert b'Deleted' in res.data + + client.post( + url_for("form_quick_watch_add"), + data={"url": test_url2, "tags": 'restock tests', 'processor': 'restock_diff'}, + follow_redirects=True + ) + wait_for_all_checks(client) + + res = client.get( + url_for("edit_page", uuid="first")) + assert test_url2.encode('utf-8') in res.data diff --git a/changedetectionio/tests/test_rss.py b/changedetectionio/tests/test_rss.py index 7f030482..e249e0ad 100644 --- a/changedetectionio/tests/test_rss.py +++ b/changedetectionio/tests/test_rss.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 import time from flask import url_for @@ -49,10 +49,10 @@ def set_original_cdata_xml(): f.write(test_return_data) -def test_setup(client, live_server): +def test_setup(client, live_server, measure_memory_usage): live_server_setup(live_server) -def test_rss_and_token(client, live_server): +def test_rss_and_token(client, live_server, measure_memory_usage): # live_server_setup(live_server) set_original_response() @@ -69,6 +69,7 @@ def test_rss_and_token(client, live_server): wait_for_all_checks(client) set_modified_response() + time.sleep(1) client.get(url_for("form_watch_checknow"), follow_redirects=True) wait_for_all_checks(client) @@ -87,9 +88,9 @@ def test_rss_and_token(client, live_server): assert b"Access denied, bad token" not in res.data assert b"Random content" in res.data - res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) + client.get(url_for("form_delete", uuid="all"), follow_redirects=True) -def test_basic_cdata_rss_markup(client, live_server): +def test_basic_cdata_rss_markup(client, live_server, measure_memory_usage): #live_server_setup(live_server) set_original_cdata_xml() @@ -117,7 +118,7 @@ def test_basic_cdata_rss_markup(client, live_server): assert b'The days of Terminator' in res.data res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) -def test_rss_xpath_filtering(client, live_server): +def test_rss_xpath_filtering(client, live_server, measure_memory_usage): #live_server_setup(live_server) set_original_cdata_xml() diff --git a/changedetectionio/tests/test_search.py b/changedetectionio/tests/test_search.py index 453a0b8a..70dab62f 100644 --- a/changedetectionio/tests/test_search.py +++ b/changedetectionio/tests/test_search.py @@ -5,7 +5,7 @@ import time def test_setup(live_server): live_server_setup(live_server) -def test_basic_search(client, live_server): +def test_basic_search(client, live_server, measure_memory_usage): #live_server_setup(live_server) urls = ['https://localhost:12300?first-result=1', @@ -38,7 +38,7 @@ def test_basic_search(client, live_server): assert urls[1].encode('utf-8') not in res.data -def test_search_in_tag_limit(client, live_server): +def test_search_in_tag_limit(client, live_server, measure_memory_usage): #live_server_setup(live_server) urls = ['https://localhost:12300?first-result=1 tag-one', diff --git a/changedetectionio/tests/test_security.py b/changedetectionio/tests/test_security.py index c56c84492..af55efb2 100644 --- a/changedetectionio/tests/test_security.py +++ b/changedetectionio/tests/test_security.py @@ -1,11 +1,16 @@ +import os + from flask import url_for from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks import time -def test_setup(client, live_server): +from .. import strtobool + + +def test_setup(client, live_server, measure_memory_usage): live_server_setup(live_server) -def test_bad_access(client, live_server): +def test_bad_access(client, live_server, measure_memory_usage): #live_server_setup(live_server) res = client.post( url_for("import_page"), @@ -55,19 +60,35 @@ def test_bad_access(client, live_server): assert b'Watch protocol is not permitted by SAFE_PROTOCOL_REGEX' in res.data - # file:// is permitted by default, but it will be caught by ALLOW_FILE_URI +def test_file_access(client, live_server, measure_memory_usage): + #live_server_setup(live_server) + + test_file_path = "/tmp/test-file.txt" + + # file:// is permitted by default, but it will be caught by ALLOW_FILE_URI client.post( url_for("form_quick_watch_add"), - data={"url": 'file:///tasty/disk/drive', "tags": ''}, + data={"url": f"file://{test_file_path}", "tags": ''}, follow_redirects=True ) wait_for_all_checks(client) res = client.get(url_for("index")) - assert b'file:// type access is denied for security reasons.' in res.data + # If it is enabled at test time + if strtobool(os.getenv('ALLOW_FILE_URI', 'false')): + res = client.get( + url_for("preview_page", uuid="first"), + follow_redirects=True + ) + + # Should see something (this file added by run_basic_tests.sh) + assert b"Hello world" in res.data + else: + # Default should be here + assert b'file:// type access is denied for security reasons.' in res.data -def test_xss(client, live_server): +def test_xss(client, live_server, measure_memory_usage): #live_server_setup(live_server) from changedetectionio.notification import ( default_notification_format diff --git a/changedetectionio/tests/test_share_watch.py b/changedetectionio/tests/test_share_watch.py index bf76fabc..a8e53601 100644 --- a/changedetectionio/tests/test_share_watch.py +++ b/changedetectionio/tests/test_share_watch.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 import time from flask import url_for @@ -9,7 +9,7 @@ import re sleep_time_for_fetch_thread = 3 -def test_share_watch(client, live_server): +def test_share_watch(client, live_server, measure_memory_usage): set_original_response() live_server_setup(live_server) diff --git a/changedetectionio/tests/test_source.py b/changedetectionio/tests/test_source.py index f46e8ad8..cffd1bd6 100644 --- a/changedetectionio/tests/test_source.py +++ b/changedetectionio/tests/test_source.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 import time from flask import url_for @@ -10,7 +10,7 @@ sleep_time_for_fetch_thread = 3 def test_setup(live_server): live_server_setup(live_server) -def test_check_basic_change_detection_functionality_source(client, live_server): +def test_check_basic_change_detection_functionality_source(client, live_server, measure_memory_usage): set_original_response() test_url = 'source:'+url_for('test_endpoint', _external=True) # Add our URL to the import page @@ -58,7 +58,7 @@ def test_check_basic_change_detection_functionality_source(client, live_server): # `subtractive_selectors` should still work in `source:` type requests -def test_check_ignore_elements(client, live_server): +def test_check_ignore_elements(client, live_server, measure_memory_usage): set_original_response() time.sleep(1) test_url = 'source:'+url_for('test_endpoint', _external=True) diff --git a/changedetectionio/tests/test_trigger.py b/changedetectionio/tests/test_trigger.py index 24e9fabf..d13fe79e 100644 --- a/changedetectionio/tests/test_trigger.py +++ b/changedetectionio/tests/test_trigger.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 import time from flask import url_for @@ -55,7 +55,7 @@ def set_modified_with_trigger_text_response(): f.write(test_return_data) -def test_trigger_functionality(client, live_server): +def test_trigger_functionality(client, live_server, measure_memory_usage): live_server_setup(live_server) diff --git a/changedetectionio/tests/test_trigger_regex.py b/changedetectionio/tests/test_trigger_regex.py index 7f070e89..ef465a35 100644 --- a/changedetectionio/tests/test_trigger_regex.py +++ b/changedetectionio/tests/test_trigger_regex.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 import time from flask import url_for @@ -22,7 +22,7 @@ def set_original_ignore_response(): -def test_trigger_regex_functionality(client, live_server): +def test_trigger_regex_functionality(client, live_server, measure_memory_usage): live_server_setup(live_server) diff --git a/changedetectionio/tests/test_trigger_regex_with_filter.py b/changedetectionio/tests/test_trigger_regex_with_filter.py index 4f3328fc..4e3eba72 100644 --- a/changedetectionio/tests/test_trigger_regex_with_filter.py +++ b/changedetectionio/tests/test_trigger_regex_with_filter.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 import time from flask import url_for @@ -22,7 +22,7 @@ def set_original_ignore_response(): -def test_trigger_regex_functionality_with_filter(client, live_server): +def test_trigger_regex_functionality_with_filter(client, live_server, measure_memory_usage): live_server_setup(live_server) sleep_time_for_fetch_thread = 3 diff --git a/changedetectionio/tests/test_unique_lines.py b/changedetectionio/tests/test_unique_lines.py index 4eff11fd..65840073 100644 --- a/changedetectionio/tests/test_unique_lines.py +++ b/changedetectionio/tests/test_unique_lines.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 import time from flask import url_for @@ -66,10 +66,10 @@ def set_modified_with_trigger_text_response(): with open("test-datastore/endpoint-content.txt", "w") as f: f.write(test_return_data) -def test_setup(client, live_server): +def test_setup(client, live_server, measure_memory_usage): live_server_setup(live_server) -def test_unique_lines_functionality(client, live_server): +def test_unique_lines_functionality(client, live_server, measure_memory_usage): #live_server_setup(live_server) @@ -118,7 +118,7 @@ def test_unique_lines_functionality(client, live_server): res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) assert b'Deleted' in res.data -def test_sort_lines_functionality(client, live_server): +def test_sort_lines_functionality(client, live_server, measure_memory_usage): #live_server_setup(live_server) set_modified_swapped_lines_with_extra_text_for_sorting() diff --git a/changedetectionio/tests/test_watch_fields_storage.py b/changedetectionio/tests/test_watch_fields_storage.py index 7dc3f748..5737c9c7 100644 --- a/changedetectionio/tests/test_watch_fields_storage.py +++ b/changedetectionio/tests/test_watch_fields_storage.py @@ -4,7 +4,7 @@ from urllib.request import urlopen from . util import set_original_response, set_modified_response, live_server_setup -def test_check_watch_field_storage(client, live_server): +def test_check_watch_field_storage(client, live_server, measure_memory_usage): set_original_response() live_server_setup(live_server) diff --git a/changedetectionio/tests/test_xpath_selector.py b/changedetectionio/tests/test_xpath_selector.py index 1a9c5afa..e8b5d855 100644 --- a/changedetectionio/tests/test_xpath_selector.py +++ b/changedetectionio/tests/test_xpath_selector.py @@ -49,7 +49,7 @@ def set_modified_response(): # Handle utf-8 charset replies https://github.com/dgtlmoon/changedetection.io/pull/613 -def test_check_xpath_filter_utf8(client, live_server): +def test_check_xpath_filter_utf8(client, live_server, measure_memory_usage): filter = '//item/*[self::description]' d = ''' @@ -105,7 +105,7 @@ def test_check_xpath_filter_utf8(client, live_server): # Handle utf-8 charset replies https://github.com/dgtlmoon/changedetection.io/pull/613 -def test_check_xpath_text_function_utf8(client, live_server): +def test_check_xpath_text_function_utf8(client, live_server, measure_memory_usage): filter = '//item/title/text()' d = ''' @@ -168,7 +168,7 @@ def test_check_xpath_text_function_utf8(client, live_server): assert b'Deleted' in res.data -def test_check_markup_xpath_filter_restriction(client, live_server): +def test_check_markup_xpath_filter_restriction(client, live_server, measure_memory_usage): xpath_filter = "//*[contains(@class, 'sametext')]" set_original_response() @@ -214,7 +214,7 @@ def test_check_markup_xpath_filter_restriction(client, live_server): assert b'Deleted' in res.data -def test_xpath_validation(client, live_server): +def test_xpath_validation(client, live_server, measure_memory_usage): # Add our URL to the import page test_url = url_for('test_endpoint', _external=True) res = client.post( @@ -235,7 +235,7 @@ def test_xpath_validation(client, live_server): assert b'Deleted' in res.data -def test_xpath23_prefix_validation(client, live_server): +def test_xpath23_prefix_validation(client, live_server, measure_memory_usage): # Add our URL to the import page test_url = url_for('test_endpoint', _external=True) res = client.post( @@ -255,7 +255,7 @@ def test_xpath23_prefix_validation(client, live_server): res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) assert b'Deleted' in res.data -def test_xpath1_lxml(client, live_server): +def test_xpath1_lxml(client, live_server, measure_memory_usage): #live_server_setup(live_server) d = ''' @@ -319,7 +319,7 @@ def test_xpath1_lxml(client, live_server): ##### -def test_xpath1_validation(client, live_server): +def test_xpath1_validation(client, live_server, measure_memory_usage): # Add our URL to the import page test_url = url_for('test_endpoint', _external=True) res = client.post( @@ -341,7 +341,7 @@ def test_xpath1_validation(client, live_server): # actually only really used by the distll.io importer, but could be handy too -def test_check_with_prefix_include_filters(client, live_server): +def test_check_with_prefix_include_filters(client, live_server, measure_memory_usage): res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) assert b'Deleted' in res.data @@ -378,7 +378,7 @@ def test_check_with_prefix_include_filters(client, live_server): client.get(url_for("form_delete", uuid="all"), follow_redirects=True) -def test_various_rules(client, live_server): +def test_various_rules(client, live_server, measure_memory_usage): # Just check these don't error # live_server_setup(live_server) with open("test-datastore/endpoint-content.txt", "w") as f: @@ -426,7 +426,7 @@ def test_various_rules(client, live_server): assert b'Deleted' in res.data -def test_xpath_20(client, live_server): +def test_xpath_20(client, live_server, measure_memory_usage): test_url = url_for('test_endpoint', _external=True) res = client.post( url_for("import_page"), @@ -463,7 +463,7 @@ def test_xpath_20(client, live_server): client.get(url_for("form_delete", uuid="all"), follow_redirects=True) -def test_xpath_20_function_count(client, live_server): +def test_xpath_20_function_count(client, live_server, measure_memory_usage): set_original_response() # Add our URL to the import page @@ -499,7 +499,7 @@ def test_xpath_20_function_count(client, live_server): client.get(url_for("form_delete", uuid="all"), follow_redirects=True) -def test_xpath_20_function_count2(client, live_server): +def test_xpath_20_function_count2(client, live_server, measure_memory_usage): set_original_response() # Add our URL to the import page @@ -535,7 +535,7 @@ def test_xpath_20_function_count2(client, live_server): client.get(url_for("form_delete", uuid="all"), follow_redirects=True) -def test_xpath_20_function_string_join_matches(client, live_server): +def test_xpath_20_function_string_join_matches(client, live_server, measure_memory_usage): set_original_response() # Add our URL to the import page diff --git a/changedetectionio/tests/unit/test_jinja2_security.py b/changedetectionio/tests/unit/test_jinja2_security.py index eb43db9d..7616bb65 100644 --- a/changedetectionio/tests/unit/test_jinja2_security.py +++ b/changedetectionio/tests/unit/test_jinja2_security.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # run from dir above changedetectionio/ dir # python3 -m unittest changedetectionio.tests.unit.test_jinja2_security diff --git a/changedetectionio/tests/unit/test_notification_diff.py b/changedetectionio/tests/unit/test_notification_diff.py index c7befc20..b106f6a1 100755 --- a/changedetectionio/tests/unit/test_notification_diff.py +++ b/changedetectionio/tests/unit/test_notification_diff.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # run from dir above changedetectionio/ dir # python3 -m unittest changedetectionio.tests.unit.test_notification_diff diff --git a/changedetectionio/tests/unit/test_restock_logic.py b/changedetectionio/tests/unit/test_restock_logic.py new file mode 100644 index 00000000..46fff2c7 --- /dev/null +++ b/changedetectionio/tests/unit/test_restock_logic.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python3 + +# run from dir above changedetectionio/ dir +# python3 -m unittest changedetectionio.tests.unit.test_restock_logic + +import unittest +import os + +from changedetectionio.processors import restock_diff + +# mostly +class TestDiffBuilder(unittest.TestCase): + + def test_logic(self): + assert restock_diff.is_between(number=10, lower=9, upper=11) == True, "Between 9 and 11" + assert restock_diff.is_between(number=10, lower=0, upper=11) == True, "Between 9 and 11" + assert restock_diff.is_between(number=10, lower=None, upper=11) == True, "Between None and 11" + assert not restock_diff.is_between(number=12, lower=None, upper=11) == True, "12 is not between None and 11" + +if __name__ == '__main__': + unittest.main() diff --git a/changedetectionio/tests/unit/test_watch_model.py b/changedetectionio/tests/unit/test_watch_model.py index 78ca60b2..a9e3df3c 100644 --- a/changedetectionio/tests/unit/test_watch_model.py +++ b/changedetectionio/tests/unit/test_watch_model.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # run from dir above changedetectionio/ dir # python3 -m unittest changedetectionio.tests.unit.test_notification_diff diff --git a/changedetectionio/tests/util.py b/changedetectionio/tests/util.py index 186fc736..1b0b3e5b 100644 --- a/changedetectionio/tests/util.py +++ b/changedetectionio/tests/util.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 from flask import make_response, request from flask import url_for @@ -121,18 +121,21 @@ def extract_UUID_from_client(client): return uuid.strip() def wait_for_all_checks(client): + # actually this is not entirely true, it can still be 'processing' but not in the queue # Loop waiting until done.. attempt=0 - time.sleep(0.1) + # because sub-second rechecks are problematic in testing, use lots of delays + time.sleep(1) while attempt < 60: - time.sleep(1) res = client.get(url_for("index")) if not b'Checking now' in res.data: break logging.getLogger().info("Waiting for watch-list to not say 'Checking now'.. {}".format(attempt)) - + time.sleep(1) attempt += 1 + time.sleep(1) + def live_server_setup(live_server): @live_server.app.route('/test-random-content-endpoint') @@ -140,6 +143,9 @@ def live_server_setup(live_server): import secrets return "Random content - {}\n".format(secrets.token_hex(64)) + @live_server.app.route('/test-endpoint2') + def test_endpoint2(): + return "some basic content" @live_server.app.route('/test-endpoint') def test_endpoint(): diff --git a/changedetectionio/tests/visualselector/conftest.py b/changedetectionio/tests/visualselector/conftest.py index 430513d4..9a4bf56b 100644 --- a/changedetectionio/tests/visualselector/conftest.py +++ b/changedetectionio/tests/visualselector/conftest.py @@ -1,3 +1,3 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 from .. import conftest diff --git a/changedetectionio/tests/visualselector/test_fetch_data.py b/changedetectionio/tests/visualselector/test_fetch_data.py index 15677f31..de3b9030 100644 --- a/changedetectionio/tests/visualselector/test_fetch_data.py +++ b/changedetectionio/tests/visualselector/test_fetch_data.py @@ -1,15 +1,15 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 import os from flask import url_for from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client -def test_setup(client, live_server): +def test_setup(client, live_server, measure_memory_usage): live_server_setup(live_server) # Add a site in paused mode, add an invalid filter, we should still have visual selector data ready -def test_visual_selector_content_ready(client, live_server): +def test_visual_selector_content_ready(client, live_server, measure_memory_usage): import os import json @@ -79,7 +79,7 @@ def test_visual_selector_content_ready(client, live_server): follow_redirects=True ) -def test_basic_browserstep(client, live_server): +def test_basic_browserstep(client, live_server, measure_memory_usage): #live_server_setup(live_server) assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test" diff --git a/changedetectionio/update_worker.py b/changedetectionio/update_worker.py index dcb9dbe7..ba183848 100644 --- a/changedetectionio/update_worker.py +++ b/changedetectionio/update_worker.py @@ -1,18 +1,19 @@ +from .processors.exceptions import ProcessorException +import changedetectionio.content_fetchers.exceptions as content_fetchers_exceptions +from changedetectionio.processors.text_json_diff.processor import FilterNotFoundInResponse +from changedetectionio import html_tools + +import importlib import os -import threading import queue +import threading import time -from . import content_fetchers -from changedetectionio import html_tools -from .processors.text_json_diff import FilterNotFoundInResponse -from .processors.restock_diff import UnableToExtractRestockData # A single update worker # # Requests for checking on a single site(watch) from a queue of watches # (another process inserts watches into the queue that are time-ready for checking) -import sys from loguru import logger class update_worker(threading.Thread): @@ -26,7 +27,6 @@ class update_worker(threading.Thread): super().__init__(*args, **kwargs) def queue_notification_for_watch(self, notification_q, n_object, watch): - from changedetectionio import diff dates = [] trigger_text = '' @@ -80,6 +80,9 @@ class update_worker(threading.Thread): 'uuid': watch.get('uuid') if watch else None, 'watch_url': watch.get('url') if watch else None, }) + + n_object.update(watch.extra_notification_token_values()) + logger.trace(f"Main rendered notification placeholders (diff_added etc) calculated in {time.time()-now:.3f}s") logger.debug("Queued notification for sending") notification_q.put(n_object) @@ -225,8 +228,6 @@ class update_worker(threading.Thread): os.unlink(full_path) def run(self): - - from .processors import text_json_diff, restock_diff now = time.time() while not self.app.config.exit.is_set(): @@ -245,41 +246,40 @@ class update_worker(threading.Thread): contents = b'' process_changedetection_results = True update_obj = {} - logger.info(f"Processing watch UUID {uuid} " - f"Priority {queued_item_data.priority} " - f"URL {self.datastore.data['watching'][uuid]['url']}") + + # Clear last errors (move to preflight func?) + self.datastore.data['watching'][uuid]['browser_steps_last_error_step'] = None + + watch = self.datastore.data['watching'].get(uuid) + + logger.info(f"Processing watch UUID {uuid} Priority {queued_item_data.priority} URL {watch['url']}") now = time.time() try: # Processor is what we are using for detecting the "Change" - processor = self.datastore.data['watching'][uuid].get('processor', 'text_json_diff') - # if system... - + processor = watch.get('processor', 'text_json_diff') # Abort processing when the content was the same as the last fetch skip_when_same_checksum = queued_item_data.item.get('skip_when_checksum_same') - # @todo some way to switch by name - # Init a new 'difference_detection_processor' + # Init a new 'difference_detection_processor', first look in processors + processor_module_name = f"changedetectionio.processors.{processor}.processor" + try: + processor_module = importlib.import_module(processor_module_name) + except ModuleNotFoundError as e: + print(f"Processor module '{processor}' not found.") + raise e - if processor == 'restock_diff': - update_handler = restock_diff.perform_site_check(datastore=self.datastore, + update_handler = processor_module.perform_site_check(datastore=self.datastore, watch_uuid=uuid ) - else: - # Used as a default and also by some tests - update_handler = text_json_diff.perform_site_check(datastore=self.datastore, - watch_uuid=uuid - ) - - # Clear last errors (move to preflight func?) - self.datastore.data['watching'][uuid]['browser_steps_last_error_step'] = None update_handler.call_browser() - changed_detected, update_obj, contents = update_handler.run_changedetection(uuid, - skip_when_checksum_same=skip_when_same_checksum, - ) + changed_detected, update_obj, contents = update_handler.run_changedetection( + watch=watch, + skip_when_checksum_same=skip_when_same_checksum, + ) # Re #342 # In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes. @@ -290,7 +290,17 @@ class update_worker(threading.Thread): logger.critical(f"File permission error updating file, watch: {uuid}") logger.critical(str(e)) process_changedetection_results = False - except content_fetchers.exceptions.ReplyWithContentButNoText as e: + + # A generic other-exception thrown by processors + except ProcessorException as e: + if e.screenshot: + watch.save_screenshot(screenshot=e.screenshot) + if e.xpath_data: + watch.save_xpath_data(data=e.xpath_data) + self.datastore.update_watch(uuid=uuid, update_obj={'last_error': e.message}) + process_changedetection_results = False + + except content_fetchers_exceptions.ReplyWithContentButNoText as e: # Totally fine, it's by choice - just continue on, nothing more to care about # Page had elements/content but no renderable text # Backend (not filters) gave zero output @@ -309,10 +319,14 @@ class update_worker(threading.Thread): }) if e.screenshot: - self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot) + watch.save_screenshot(screenshot=e.screenshot, as_error=True) + + if e.xpath_data: + watch.save_xpath_data(data=e.xpath_data) + process_changedetection_results = False - except content_fetchers.exceptions.Non200ErrorCodeReceived as e: + except content_fetchers_exceptions.Non200ErrorCodeReceived as e: if e.status_code == 403: err_text = "Error - 403 (Access denied) received" elif e.status_code == 404: @@ -325,11 +339,11 @@ class update_worker(threading.Thread): err_text = "Error - Request returned a HTTP error code {}".format(str(e.status_code)) if e.screenshot: - self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True) + watch.save_screenshot(screenshot=e.screenshot, as_error=True) if e.xpath_data: - self.datastore.save_xpath_data(watch_uuid=uuid, data=e.xpath_data, as_error=True) + watch.save_xpath_data(data=e.xpath_data, as_error=True) if e.page_text: - self.datastore.save_error_text(watch_uuid=uuid, contents=e.page_text) + watch.save_error_text(contents=e.page_text) self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text}) process_changedetection_results = False @@ -341,16 +355,23 @@ class update_worker(threading.Thread): err_text = "Warning, no filters were found, no change detection ran - Did the page change layout? update your Visual Filter if necessary." self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text}) + # Filter wasnt found, but we should still update the visual selector so that they can have a chance to set it up again + if e.screenshot: + watch.save_screenshot(screenshot=e.screenshot) + + if e.xpath_data: + watch.save_xpath_data(data=e.xpath_data) + # Only when enabled, send the notification - if self.datastore.data['watching'][uuid].get('filter_failure_notification_send', False): - c = self.datastore.data['watching'][uuid].get('consecutive_filter_failures', 5) + if watch.get('filter_failure_notification_send', False): + c = watch.get('consecutive_filter_failures', 5) c += 1 # Send notification if we reached the threshold? threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', 0) - logger.error(f"Filter for {uuid} not found, consecutive_filter_failures: {c}") + logger.warning(f"Filter for {uuid} not found, consecutive_filter_failures: {c}") if threshold > 0 and c >= threshold: - if not self.datastore.data['watching'][uuid].get('notification_muted'): + if not watch.get('notification_muted'): self.send_filter_failure_notification(uuid) c = 0 @@ -358,24 +379,23 @@ class update_worker(threading.Thread): process_changedetection_results = False - except content_fetchers.exceptions.checksumFromPreviousCheckWasTheSame as e: + except content_fetchers_exceptions.checksumFromPreviousCheckWasTheSame as e: # Yes fine, so nothing todo, don't continue to process. process_changedetection_results = False changed_detected = False - self.datastore.update_watch(uuid=uuid, update_obj={'last_error': False}) - except content_fetchers.exceptions.BrowserConnectError as e: + except content_fetchers_exceptions.BrowserConnectError as e: self.datastore.update_watch(uuid=uuid, update_obj={'last_error': e.msg } ) process_changedetection_results = False - except content_fetchers.exceptions.BrowserFetchTimedOut as e: + except content_fetchers_exceptions.BrowserFetchTimedOut as e: self.datastore.update_watch(uuid=uuid, update_obj={'last_error': e.msg } ) process_changedetection_results = False - except content_fetchers.exceptions.BrowserStepsStepException as e: + except content_fetchers_exceptions.BrowserStepsStepException as e: if not self.datastore.data['watching'].get(uuid): continue @@ -401,15 +421,15 @@ class update_worker(threading.Thread): } ) - if self.datastore.data['watching'][uuid].get('filter_failure_notification_send', False): - c = self.datastore.data['watching'][uuid].get('consecutive_filter_failures', 5) + if watch.get('filter_failure_notification_send', False): + c = watch.get('consecutive_filter_failures', 5) c += 1 # Send notification if we reached the threshold? threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', 0) logger.error(f"Step for {uuid} not found, consecutive_filter_failures: {c}") if threshold > 0 and c >= threshold: - if not self.datastore.data['watching'][uuid].get('notification_muted'): + if not watch.get('notification_muted'): self.send_step_failure_notification(watch_uuid=uuid, step_n=e.step_n) c = 0 @@ -417,56 +437,48 @@ class update_worker(threading.Thread): process_changedetection_results = False - except content_fetchers.exceptions.EmptyReply as e: + except content_fetchers_exceptions.EmptyReply as e: # Some kind of custom to-str handler in the exception handler that does this? err_text = "EmptyReply - try increasing 'Wait seconds before extracting text', Status Code {}".format(e.status_code) self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, 'last_check_status': e.status_code}) process_changedetection_results = False - except content_fetchers.exceptions.ScreenshotUnavailable as e: + except content_fetchers_exceptions.ScreenshotUnavailable as e: err_text = "Screenshot unavailable, page did not render fully in the expected time or page was too long - try increasing 'Wait seconds before extracting text'" self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, 'last_check_status': e.status_code}) process_changedetection_results = False - except content_fetchers.exceptions.JSActionExceptions as e: + except content_fetchers_exceptions.JSActionExceptions as e: err_text = "Error running JS Actions - Page request - "+e.message if e.screenshot: - self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True) + watch.save_screenshot(screenshot=e.screenshot, as_error=True) self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, 'last_check_status': e.status_code}) process_changedetection_results = False - except content_fetchers.exceptions.PageUnloadable as e: + except content_fetchers_exceptions.PageUnloadable as e: err_text = "Page request from server didnt respond correctly" if e.message: err_text = "{} - {}".format(err_text, e.message) if e.screenshot: - self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True) + watch.save_screenshot(screenshot=e.screenshot, as_error=True) self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, 'last_check_status': e.status_code, 'has_ldjson_price_data': None}) process_changedetection_results = False - except content_fetchers.exceptions.BrowserStepsInUnsupportedFetcher as e: + except content_fetchers_exceptions.BrowserStepsInUnsupportedFetcher as e: err_text = "This watch has Browser Steps configured and so it cannot run with the 'Basic fast Plaintext/HTTP Client', either remove the Browser Steps or select a Chrome fetcher." self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text}) process_changedetection_results = False logger.error(f"Exception (BrowserStepsInUnsupportedFetcher) reached processing watch UUID: {uuid}") - except UnableToExtractRestockData as e: - # Usually when fetcher.instock_data returns empty - logger.error(f"Exception (UnableToExtractRestockData) reached processing watch UUID: {uuid}") - logger.error(str(e)) - self.datastore.update_watch(uuid=uuid, update_obj={'last_error': f"Unable to extract restock data for this page unfortunately. (Got code {e.status_code} from server)"}) - process_changedetection_results = False except Exception as e: logger.error(f"Exception reached processing watch UUID: {uuid}") logger.error(str(e)) self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Exception: " + str(e)}) # Other serious error process_changedetection_results = False -# import traceback -# print(traceback.format_exc()) else: # Crash protection, the watch entry could have been removed by this point (during a slow chrome fetch etc) @@ -474,7 +486,7 @@ class update_worker(threading.Thread): continue # Mark that we never had any failures - if not self.datastore.data['watching'][uuid].get('ignore_status_codes'): + if not watch.get('ignore_status_codes'): update_obj['consecutive_filter_failures'] = 0 # Everything ran OK, clean off any previous error @@ -482,28 +494,57 @@ class update_worker(threading.Thread): self.cleanup_error_artifacts(uuid) + if not self.datastore.data['watching'].get(uuid): + continue # # Different exceptions mean that we may or may not want to bump the snapshot, trigger notifications etc if process_changedetection_results: + + # Extract as title if possible/requested. + if self.datastore.data['settings']['application'].get('extract_title_as_title') or watch['extract_title_as_title']: + if not watch['title'] or not len(watch['title']): + try: + update_obj['title'] = html_tools.extract_element(find='title', html_content=update_handler.fetcher.content) + logger.info(f"UUID: {uuid} Extract <title> updated title to '{update_obj['title']}") + except Exception as e: + logger.warning(f"UUID: {uuid} Extract <title> as watch title was enabled, but couldn't find a <title>.") + + # Now update after running everything + timestamp = round(time.time()) try: - watch = self.datastore.data['watching'].get(uuid) self.datastore.update_watch(uuid=uuid, update_obj=update_obj) - # Also save the snapshot on the first time checked - if changed_detected or not watch['last_checked']: + + # Also save the snapshot on the first time checked, "last checked" will always be updated, so we just check history length. + if changed_detected or not watch.history_n: + + if update_handler.screenshot: + watch.save_screenshot(screenshot=update_handler.screenshot) + + if update_handler.xpath_data: + watch.save_xpath_data(data=update_handler.xpath_data) + + # Small hack so that we sleep just enough to allow 1 second between history snapshots + # this is because history.txt indexes/keys snapshots by epoch seconds and we dont want dupe keys + + if watch.newest_history_key and int(timestamp) == int(watch.newest_history_key): + logger.warning( + f"Timestamp {timestamp} already exists, waiting 1 seconds so we have a unique key in history.txt") + timestamp = str(int(timestamp) + 1) + time.sleep(1) + watch.save_history_text(contents=contents, - timestamp=str(round(time.time())), + timestamp=timestamp, snapshot_id=update_obj.get('previous_md5', 'none')) - # A change was detected - if changed_detected: + if update_handler.fetcher.content: + watch.save_last_fetched_html(contents=update_handler.fetcher.content, timestamp=timestamp) + # Notifications should only trigger on the second time (first time, we gather the initial snapshot) if watch.history_n >= 2: logger.info(f"Change detected in UUID {uuid} - {watch['url']}") - if not self.datastore.data['watching'][uuid].get('notification_muted'): + if not watch.get('notification_muted'): self.send_content_changed_notification(watch_uuid=uuid) - else: - logger.info(f"Change triggered in UUID {uuid} due to first history saving (no notifications sent) - {watch['url']}") except Exception as e: # Catch everything possible here, so that if a worker crashes, we don't lose it until restart! @@ -511,29 +552,23 @@ class update_worker(threading.Thread): logger.critical(str(e)) self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)}) - if self.datastore.data['watching'].get(uuid): - # Always record that we atleast tried - count = self.datastore.data['watching'][uuid].get('check_count', 0) + 1 - # Record the 'server' header reply, can be used for actions in the future like cloudflare/akamai workarounds - try: - server_header = update_handler.fetcher.headers.get('server', '').strip().lower()[:255] - self.datastore.update_watch(uuid=uuid, - update_obj={'remote_server_reply': server_header} - ) - except Exception as e: - pass - - self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3), - 'last_checked': round(time.time()), - 'check_count': count - }) - - # Always save the screenshot if it's available - if update_handler.screenshot: - self.datastore.save_screenshot(watch_uuid=uuid, screenshot=update_handler.screenshot) - if update_handler.xpath_data: - self.datastore.save_xpath_data(watch_uuid=uuid, data=update_handler.xpath_data) + # Always record that we atleast tried + count = watch.get('check_count', 0) + 1 + + # Record the 'server' header reply, can be used for actions in the future like cloudflare/akamai workarounds + try: + server_header = update_handler.fetcher.headers.get('server', '').strip().lower()[:255] + self.datastore.update_watch(uuid=uuid, + update_obj={'remote_server_reply': server_header} + ) + except Exception as e: + pass + + self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3), + 'last_checked': round(time.time()), + 'check_count': count + }) self.current_uuid = None # Done diff --git a/docker-compose.yml b/docker-compose.yml index 4cf17605..2480a339 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -68,9 +68,10 @@ services: # If WEBDRIVER or PLAYWRIGHT are enabled, changedetection container depends on that # and must wait before starting (substitute "browser-chrome" with "playwright-chrome" if last one is used) -# depends_on: -# browser-chrome: -# condition: service_started +# depends_on: +# playwright-chrome: +# condition: service_started + # Used for fetching pages via Playwright+Chrome where you need Javascript support. # RECOMMENDED FOR FETCHING PAGES WITH CHROME diff --git a/docs/restock-overview.png b/docs/restock-overview.png new file mode 100644 index 00000000..c4a4e78f Binary files /dev/null and b/docs/restock-overview.png differ diff --git a/docs/restock-settings.png b/docs/restock-settings.png new file mode 100644 index 00000000..1e468632 Binary files /dev/null and b/docs/restock-settings.png differ diff --git a/requirements.txt b/requirements.txt index 8dc317eb..49a3762e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ # Used by Pyppeteer pyee -eventlet==0.33.3 # related to dnspython fixes +eventlet>=0.36.1 # fixes SSL error on Python 3.12 feedgen~=0.9 flask-compress # 0.6.3 included compatibility fix for werkzeug 3.x (2.x had deprecation of url handlers) @@ -22,31 +22,28 @@ validators~=0.21 # >= 2.26 also adds Brotli support if brotli is installed brotli~=1.0 requests[socks] +requests-file -urllib3==1.26.18 +urllib3==1.26.19 chardet>2.3.0 wtforms~=3.0 jsonpath-ng~=1.5.3 -# Pinned: module 'eventlet.green.select' has no attribute 'epoll' -# https://github.com/eventlet/eventlet/issues/805#issuecomment-1640463482 -dnspython==2.3.0 # related to eventlet fixes +dnspython==2.6.1 # related to eventlet fixes # jq not available on Windows so must be installed manually # Notification library -apprise~=1.8.0 +apprise~=1.8.1 # apprise mqtt https://github.com/dgtlmoon/changedetection.io/issues/315 # and 2.0.0 https://github.com/dgtlmoon/changedetection.io/issues/2241 not yet compatible # use v1.x due to https://github.com/eclipse/paho.mqtt.python/issues/814 paho-mqtt>=1.6.1,<2.0.0 -# This mainly affects some ARM builds, which unlike the other builds ignores "ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1" -# so without this pinning, the newer versions on ARM will forcefully try to build rust, which results in "rust compiler not found" -# (introduced once apprise became a dep) -cryptography~=3.4 +# Requires extra wheel for rPi +cryptography~=42.0.8 # Used for CSS filtering beautifulsoup4 @@ -73,8 +70,6 @@ openpyxl jq~=1.3; python_version >= "3.8" and sys_platform == "darwin" jq~=1.3; python_version >= "3.8" and sys_platform == "linux" -# Any current modern version, required so far for screenshot PNG->JPEG conversion but will be used more in the future -pillow # playwright is installed at Dockerfile build time because it's not available on all platforms pyppeteer-ng==2.0.0rc5 @@ -88,3 +83,12 @@ pytest-flask ~=1.2 jsonschema==4.17.3 loguru + +# For scraping all possible metadata relating to products so we can do better restock detection +extruct + +# For cleaning up unknown currency formats +babel + +# Needed for > 3.10, https://github.com/microsoft/playwright-python/issues/2096 +greenlet >= 3.0.3