From fff32cef0d76acf5e9f7f08c427e6b7bfcb4594a Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Tue, 11 Oct 2022 14:40:36 +0200 Subject: [PATCH 1/3] Adding test - Test the 'execute JS before changedetection' (#1006) --- .../tests/visualselector/test_fetch_data.py | 22 +++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/changedetectionio/tests/visualselector/test_fetch_data.py b/changedetectionio/tests/visualselector/test_fetch_data.py index 04cd4644..17dcfd9f 100644 --- a/changedetectionio/tests/visualselector/test_fetch_data.py +++ b/changedetectionio/tests/visualselector/test_fetch_data.py @@ -13,9 +13,9 @@ def test_visual_selector_content_ready(client, live_server): live_server_setup(live_server) time.sleep(1) - # Add our URL to the import page, maybe better to use something we control? - # We use an external URL because the docker container is too difficult to setup to connect back to the pytest socket - test_url = 'https://news.ycombinator.com' + # Add our URL to the import page, because the docker container (playwright/selenium) wont be able to connect to our usual test url + test_url = "https://changedetection.io/ci-test/test-runjs.html" + res = client.post( url_for("form_quick_watch_add"), data={"url": test_url, "tag": '', 'edit_and_watch_submit_button': 'Edit > Watch'}, @@ -25,13 +25,27 @@ def test_visual_selector_content_ready(client, live_server): res = client.post( url_for("edit_page", uuid="first", unpause_on_save=1), - data={"css_filter": ".does-not-exist", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_webdriver"}, + data={ + "url": test_url, + "tag": "", + "headers": "", + 'fetch_backend': "html_webdriver", + 'webdriver_js_execute_code': 'document.querySelector("button[name=test-button]").click();' + }, follow_redirects=True ) assert b"unpaused" in res.data time.sleep(1) wait_for_all_checks(client) uuid = extract_UUID_from_client(client) + + # Check the JS execute code before extract worked + res = client.get( + url_for("preview_page", uuid="first"), + follow_redirects=True + ) + assert b'I smell JavaScript' in res.data + assert os.path.isfile(os.path.join('test-datastore', uuid, 'last-screenshot.png')), "last-screenshot.png should exist" assert os.path.isfile(os.path.join('test-datastore', uuid, 'elements.json')), "xpath elements.json data should exist" From 32ea1a8721e4ddbd69967a7e4753448f1e2ea6ad Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Wed, 12 Oct 2022 09:53:16 +0200 Subject: [PATCH 2/3] Windows - JQ - Make library optional so it doesnt break Windows pip installs (#1009) --- Dockerfile | 5 ++++ README.md | 8 +++-- changedetectionio/__init__.py | 21 ++++++++----- changedetectionio/forms.py | 10 +++++-- changedetectionio/html_tools.py | 18 +++++++---- changedetectionio/run_all_tests.sh | 7 +++++ changedetectionio/templates/edit.html | 8 +++-- .../tests/test_jsonpath_jq_selector.py | 30 ++++++++++++------- requirements.txt | 3 +- 9 files changed, 80 insertions(+), 30 deletions(-) diff --git a/Dockerfile b/Dockerfile index 24d3490e..d422918e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -26,6 +26,11 @@ RUN pip install --target=/dependencies -r /requirements.txt RUN pip install --target=/dependencies playwright~=1.26 \ || echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled." + +RUN pip install --target=/dependencies jq~=1.3 \ + || echo "WARN: Failed to install JQ. The application can still run, but the Jq: filter option will be disabled." + + # Final image stage FROM python:3.8-slim diff --git a/README.md b/README.md index 797f8c56..03b12463 100644 --- a/README.md +++ b/README.md @@ -121,8 +121,8 @@ See the wiki for more information https://github.com/dgtlmoon/changedetection.io ## Filters -XPath, JSONPath, jq, and CSS support comes baked in! You can be as specific as you need, use XPath exported from various XPath element query creation tools. +XPath, JSONPath, jq, and CSS support comes baked in! You can be as specific as you need, use XPath exported from various XPath element query creation tools. (We support LXML `re:test`, `re:math` and `re:replace`.) ## Notifications @@ -163,7 +163,11 @@ This will re-parse the JSON and apply formatting to the text, making it super ea For more complex parsing, filtering, and modifying of JSON data, jq is recommended due to the built-in operators and functions. Refer to the [documentation](https://stedolan.github.io/jq/manual/) for more information on jq. -The example below adds the price in dollars to each item in the JSON data, and then filters to only show items that are greater than 10. +Notes: +- `jq` must be added manually separately from the installation of changedetection.io (simply run `pip3 install jq`) +- `jq` is not available on Windows or must be manually compiled (No "wheel" package available on pypi) + +- The example below adds the price in dollars to each item in the JSON data, and then filters to only show items that are greater than 10. #### Sample input data from API ``` diff --git a/changedetectionio/__init__.py b/changedetectionio/__init__.py index 8f6d5a55..e766f78b 100644 --- a/changedetectionio/__init__.py +++ b/changedetectionio/__init__.py @@ -636,20 +636,27 @@ def changedetection_app(config=None, datastore_o=None): # Only works reliably with Playwright visualselector_enabled = os.getenv('PLAYWRIGHT_DRIVER_URL', False) and default['fetch_backend'] == 'html_webdriver' + # JQ is difficult to install on windows and must be manually added (outside requirements.txt) + jq_support = True + try: + import jq + except ModuleNotFoundError: + jq_support = False output = render_template("edit.html", - uuid=uuid, - watch=datastore.data['watching'][uuid], - form=form, - has_empty_checktime=using_default_check_time, - has_default_notification_urls=True if len(datastore.data['settings']['application']['notification_urls']) else False, - using_global_webdriver_wait=default['webdriver_delay'] is None, current_base_url=datastore.data['settings']['application']['base_url'], emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False), + form=form, + has_default_notification_urls=True if len(datastore.data['settings']['application']['notification_urls']) else False, + has_empty_checktime=using_default_check_time, + jq_support=jq_support, + playwright_enabled=os.getenv('PLAYWRIGHT_DRIVER_URL', False), settings_application=datastore.data['settings']['application'], + using_global_webdriver_wait=default['webdriver_delay'] is None, + uuid=uuid, visualselector_data_is_ready=visualselector_data_is_ready, visualselector_enabled=visualselector_enabled, - playwright_enabled=os.getenv('PLAYWRIGHT_DRIVER_URL', False) + watch=datastore.data['watching'][uuid], ) return output diff --git a/changedetectionio/forms.py b/changedetectionio/forms.py index 7fa17f90..51e02884 100644 --- a/changedetectionio/forms.py +++ b/changedetectionio/forms.py @@ -303,12 +303,16 @@ class ValidateCSSJSONXPATHInput(object): # Re #265 - maybe in the future fetch the page and offer a # warning/notice that its possible the rule doesnt yet match anything? - - if 'jq:' in line: if not self.allow_json: raise ValidationError("jq not permitted in this field!") - import jq + if 'jq:' in line: + try: + import jq + except ModuleNotFoundError: + # `jq` requires full compilation in windows and so isn't generally available + raise ValidationError("jq not support not found") + input = line.replace('jq:', '') try: diff --git a/changedetectionio/html_tools.py b/changedetectionio/html_tools.py index 6cc8e20a..167d0f77 100644 --- a/changedetectionio/html_tools.py +++ b/changedetectionio/html_tools.py @@ -1,12 +1,11 @@ -import json -from typing import List from bs4 import BeautifulSoup -from jsonpath_ng.ext import parse -import jq -import re from inscriptis import get_text from inscriptis.model.config import ParserConfig +from jsonpath_ng.ext import parse +from typing import List +import json +import re class FilterNotFoundInResponse(ValueError): def __init__(self, msg): @@ -85,9 +84,18 @@ def _parse_json(json_data, json_filter): jsonpath_expression = parse(json_filter.replace('json:', '')) match = jsonpath_expression.find(json_data) return _get_stripped_text_from_json_match(match) + if 'jq:' in json_filter: + + try: + import jq + except ModuleNotFoundError: + # `jq` requires full compilation in windows and so isn't generally available + raise Exception("jq not support not found") + jq_expression = jq.compile(json_filter.replace('jq:', '')) match = jq_expression.input(json_data).all() + return _get_stripped_text_from_json_match(match) def _get_stripped_text_from_json_match(match): diff --git a/changedetectionio/run_all_tests.sh b/changedetectionio/run_all_tests.sh index e4ea3bac..28dd85c6 100755 --- a/changedetectionio/run_all_tests.sh +++ b/changedetectionio/run_all_tests.sh @@ -23,6 +23,13 @@ export BASE_URL="https://really-unique-domain.io" pytest tests/test_notification.py +## JQ + JSON: filter test +# jq is not available on windows and we should just test it when the package is installed +# this will re-test with jq support +pip3 install jq~=1.3 +pytest tests/test_jsonpath_jq_selector.py + + # Now for the selenium and playwright/browserless fetchers # Note - this is not UI functional tests - just checking that each one can fetch the content diff --git a/changedetectionio/templates/edit.html b/changedetectionio/templates/edit.html index 907894e1..59d95317 100644 --- a/changedetectionio/templates/edit.html +++ b/changedetectionio/templates/edit.html @@ -184,10 +184,14 @@ User-Agent: wonderbra 1.0") }}
  • CSS - Limit text to this CSS rule, only text matching this CSS rule is included.
  • -
  • JSON - Limit text to this JSON rule, using either JSONPath or jq. +
  • JSON - Limit text to this JSON rule, using either JSONPath or jq (if installed).
    • JSONPath: Prefix with json:, use json:$ to force re-formatting if required, test your JSONPath here.
    • + {% if jq_support %}
    • jq: Prefix with jq: and test your jq here. Using jq allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation here.
    • + {% else %} +
    • jq support not installed
    • + {% endif %}
  • XPath - Limit text to this XPath rule, simply start with a forward-slash, @@ -198,7 +202,7 @@ User-Agent: wonderbra 1.0") }}
- Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath, or jq selector rules before filing an issue on GitHub! here for more CSS selector help.
diff --git a/changedetectionio/tests/test_jsonpath_jq_selector.py b/changedetectionio/tests/test_jsonpath_jq_selector.py index d0082122..f6da84db 100644 --- a/changedetectionio/tests/test_jsonpath_jq_selector.py +++ b/changedetectionio/tests/test_jsonpath_jq_selector.py @@ -5,7 +5,12 @@ import time from flask import url_for, escape from . util import live_server_setup import pytest +jq_support = True +try: + import jq +except ModuleNotFoundError: + jq_support = False def test_setup(live_server): live_server_setup(live_server) @@ -40,13 +45,14 @@ and it can also be repeated assert text == "23.5" # also check for jq - text = html_tools.extract_json_as_string(content, "jq:.offers.price") - assert text == "23.5" + if jq_support: + text = html_tools.extract_json_as_string(content, "jq:.offers.price") + assert text == "23.5" - text = html_tools.extract_json_as_string('{"id":5}', "json:$.id") - assert text == "5" + text = html_tools.extract_json_as_string('{"id":5}', "jq:.id") + assert text == "5" - text = html_tools.extract_json_as_string('{"id":5}', "jq:.id") + text = html_tools.extract_json_as_string('{"id":5}', "json:$.id") assert text == "5" # When nothing at all is found, it should throw JSONNOTFound @@ -54,8 +60,9 @@ and it can also be repeated with pytest.raises(html_tools.JSONNotFound) as e_info: html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "json:$.id") - with pytest.raises(html_tools.JSONNotFound) as e_info: - html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "jq:.id") + if jq_support: + with pytest.raises(html_tools.JSONNotFound) as e_info: + html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "jq:.id") def set_original_ext_response(): data = """ @@ -271,7 +278,8 @@ def test_check_jsonpath_filter(client, live_server): check_json_filter('json:boss.name', client, live_server) def test_check_jq_filter(client, live_server): - check_json_filter('jq:.boss.name', client, live_server) + if jq_support: + check_json_filter('jq:.boss.name', client, live_server) def check_json_filter_bool_val(json_filter, client, live_server): set_original_response() @@ -329,7 +337,8 @@ def test_check_jsonpath_filter_bool_val(client, live_server): check_json_filter_bool_val("json:$['available']", client, live_server) def test_check_jq_filter_bool_val(client, live_server): - check_json_filter_bool_val("jq:.available", client, live_server) + if jq_support: + check_json_filter_bool_val("jq:.available", client, live_server) # Re #265 - Extended JSON selector test # Stuff to consider here @@ -408,4 +417,5 @@ def test_check_jsonpath_ext_filter(client, live_server): check_json_ext_filter('json:$[?(@.status==Sold)]', client, live_server) def test_check_jq_ext_filter(client, live_server): - check_json_ext_filter('jq:.[] | select(.status | contains("Sold"))', client, live_server) \ No newline at end of file + if jq_support: + check_json_ext_filter('jq:.[] | select(.status | contains("Sold"))', client, live_server) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 68aabe9a..bffc2a7f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,7 +19,8 @@ chardet > 2.3.0 wtforms ~= 3.0 jsonpath-ng ~= 1.5.3 -jq ~= 1.3.0 + +# jq not available on Windows so must be installed manually # Notification library apprise ~= 1.1.0 From 1b077abd93e7ea9d6ecb7c6c868d9c80d2eed2c9 Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Wed, 12 Oct 2022 09:53:59 +0200 Subject: [PATCH 3/3] 0.39.20.2 --- changedetectionio/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changedetectionio/__init__.py b/changedetectionio/__init__.py index e766f78b..c8d8c52f 100644 --- a/changedetectionio/__init__.py +++ b/changedetectionio/__init__.py @@ -33,7 +33,7 @@ from flask_wtf import CSRFProtect from changedetectionio import html_tools from changedetectionio.api import api_v1 -__version__ = '0.39.20.1' +__version__ = '0.39.20.2' datastore = None