Merge branch 'master' into diff-filters

diff-proposed-for-bwees
dgtlmoon 2 years ago
commit 802daa6296

@ -26,6 +26,11 @@ RUN pip install --target=/dependencies -r /requirements.txt
RUN pip install --target=/dependencies playwright~=1.26 \ RUN pip install --target=/dependencies playwright~=1.26 \
|| echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled." || echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled."
RUN pip install --target=/dependencies jq~=1.3 \
|| echo "WARN: Failed to install JQ. The application can still run, but the Jq: filter option will be disabled."
# Final image stage # Final image stage
FROM python:3.8-slim FROM python:3.8-slim

@ -121,8 +121,8 @@ See the wiki for more information https://github.com/dgtlmoon/changedetection.io
## Filters ## Filters
XPath, JSONPath, jq, and CSS support comes baked in! You can be as specific as you need, use XPath exported from various XPath element query creation tools.
XPath, JSONPath, jq, and CSS support comes baked in! You can be as specific as you need, use XPath exported from various XPath element query creation tools.
(We support LXML `re:test`, `re:math` and `re:replace`.) (We support LXML `re:test`, `re:math` and `re:replace`.)
## Notifications ## Notifications
@ -163,7 +163,11 @@ This will re-parse the JSON and apply formatting to the text, making it super ea
For more complex parsing, filtering, and modifying of JSON data, jq is recommended due to the built-in operators and functions. Refer to the [documentation](https://stedolan.github.io/jq/manual/) for more information on jq. For more complex parsing, filtering, and modifying of JSON data, jq is recommended due to the built-in operators and functions. Refer to the [documentation](https://stedolan.github.io/jq/manual/) for more information on jq.
The example below adds the price in dollars to each item in the JSON data, and then filters to only show items that are greater than 10. Notes:
- `jq` must be added manually separately from the installation of changedetection.io (simply run `pip3 install jq`)
- `jq` is not available on Windows or must be manually compiled (No "wheel" package available on pypi)
- The example below adds the price in dollars to each item in the JSON data, and then filters to only show items that are greater than 10.
#### Sample input data from API #### Sample input data from API
``` ```

@ -33,7 +33,7 @@ from flask_wtf import CSRFProtect
from changedetectionio import html_tools from changedetectionio import html_tools
from changedetectionio.api import api_v1 from changedetectionio.api import api_v1
__version__ = '0.39.20.1' __version__ = '0.39.20.2'
datastore = None datastore = None
@ -636,20 +636,27 @@ def changedetection_app(config=None, datastore_o=None):
# Only works reliably with Playwright # Only works reliably with Playwright
visualselector_enabled = os.getenv('PLAYWRIGHT_DRIVER_URL', False) and default['fetch_backend'] == 'html_webdriver' visualselector_enabled = os.getenv('PLAYWRIGHT_DRIVER_URL', False) and default['fetch_backend'] == 'html_webdriver'
# JQ is difficult to install on windows and must be manually added (outside requirements.txt)
jq_support = True
try:
import jq
except ModuleNotFoundError:
jq_support = False
output = render_template("edit.html", output = render_template("edit.html",
uuid=uuid,
watch=datastore.data['watching'][uuid],
form=form,
has_empty_checktime=using_default_check_time,
has_default_notification_urls=True if len(datastore.data['settings']['application']['notification_urls']) else False,
using_global_webdriver_wait=default['webdriver_delay'] is None,
current_base_url=datastore.data['settings']['application']['base_url'], current_base_url=datastore.data['settings']['application']['base_url'],
emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False), emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
form=form,
has_default_notification_urls=True if len(datastore.data['settings']['application']['notification_urls']) else False,
has_empty_checktime=using_default_check_time,
jq_support=jq_support,
playwright_enabled=os.getenv('PLAYWRIGHT_DRIVER_URL', False),
settings_application=datastore.data['settings']['application'], settings_application=datastore.data['settings']['application'],
using_global_webdriver_wait=default['webdriver_delay'] is None,
uuid=uuid,
visualselector_data_is_ready=visualselector_data_is_ready, visualselector_data_is_ready=visualselector_data_is_ready,
visualselector_enabled=visualselector_enabled, visualselector_enabled=visualselector_enabled,
playwright_enabled=os.getenv('PLAYWRIGHT_DRIVER_URL', False) watch=datastore.data['watching'][uuid],
) )
return output return output

@ -303,12 +303,16 @@ class ValidateCSSJSONXPATHInput(object):
# Re #265 - maybe in the future fetch the page and offer a # Re #265 - maybe in the future fetch the page and offer a
# warning/notice that its possible the rule doesnt yet match anything? # warning/notice that its possible the rule doesnt yet match anything?
if 'jq:' in line:
if not self.allow_json: if not self.allow_json:
raise ValidationError("jq not permitted in this field!") raise ValidationError("jq not permitted in this field!")
if 'jq:' in line:
try:
import jq import jq
except ModuleNotFoundError:
# `jq` requires full compilation in windows and so isn't generally available
raise ValidationError("jq not support not found")
input = line.replace('jq:', '') input = line.replace('jq:', '')
try: try:

@ -1,12 +1,11 @@
import json
from typing import List
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from jsonpath_ng.ext import parse
import jq
import re
from inscriptis import get_text from inscriptis import get_text
from inscriptis.model.config import ParserConfig from inscriptis.model.config import ParserConfig
from jsonpath_ng.ext import parse
from typing import List
import json
import re
class FilterNotFoundInResponse(ValueError): class FilterNotFoundInResponse(ValueError):
def __init__(self, msg): def __init__(self, msg):
@ -85,9 +84,18 @@ def _parse_json(json_data, json_filter):
jsonpath_expression = parse(json_filter.replace('json:', '')) jsonpath_expression = parse(json_filter.replace('json:', ''))
match = jsonpath_expression.find(json_data) match = jsonpath_expression.find(json_data)
return _get_stripped_text_from_json_match(match) return _get_stripped_text_from_json_match(match)
if 'jq:' in json_filter: if 'jq:' in json_filter:
try:
import jq
except ModuleNotFoundError:
# `jq` requires full compilation in windows and so isn't generally available
raise Exception("jq not support not found")
jq_expression = jq.compile(json_filter.replace('jq:', '')) jq_expression = jq.compile(json_filter.replace('jq:', ''))
match = jq_expression.input(json_data).all() match = jq_expression.input(json_data).all()
return _get_stripped_text_from_json_match(match) return _get_stripped_text_from_json_match(match)
def _get_stripped_text_from_json_match(match): def _get_stripped_text_from_json_match(match):

@ -23,6 +23,13 @@ export BASE_URL="https://really-unique-domain.io"
pytest tests/test_notification.py pytest tests/test_notification.py
## JQ + JSON: filter test
# jq is not available on windows and we should just test it when the package is installed
# this will re-test with jq support
pip3 install jq~=1.3
pytest tests/test_jsonpath_jq_selector.py
# Now for the selenium and playwright/browserless fetchers # Now for the selenium and playwright/browserless fetchers
# Note - this is not UI functional tests - just checking that each one can fetch the content # Note - this is not UI functional tests - just checking that each one can fetch the content

@ -194,10 +194,14 @@ User-Agent: wonderbra 1.0") }}
<span class="pure-form-message-inline"> <span class="pure-form-message-inline">
<ul> <ul>
<li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li> <li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
<li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a>. <li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed).
<ul> <ul>
<li>JSONPath: Prefix with <code>json:</code>, use <code>json:$</code> to force re-formatting if required, <a href="https://jsonpath.com/" target="new">test your JSONPath here</a>.</li> <li>JSONPath: Prefix with <code>json:</code>, use <code>json:$</code> to force re-formatting if required, <a href="https://jsonpath.com/" target="new">test your JSONPath here</a>.</li>
{% if jq_support %}
<li>jq: Prefix with <code>jq:</code> and <a href="https://jqplay.org/" target="new">test your jq here</a>. Using <a href="https://stedolan.github.io/jq/" target="new">jq</a> allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation <a href="https://stedolan.github.io/jq/manual/" target="new">here</a>.</li> <li>jq: Prefix with <code>jq:</code> and <a href="https://jqplay.org/" target="new">test your jq here</a>. Using <a href="https://stedolan.github.io/jq/" target="new">jq</a> allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation <a href="https://stedolan.github.io/jq/manual/" target="new">here</a>.</li>
{% else %}
<li>jq support not installed</li>
{% endif %}
</ul> </ul>
</li> </li>
<li>XPath - Limit text to this XPath rule, simply start with a forward-slash, <li>XPath - Limit text to this XPath rule, simply start with a forward-slash,
@ -208,7 +212,7 @@ User-Agent: wonderbra 1.0") }}
</ul> </ul>
</li> </li>
</ul> </ul>
Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath, or jq selector rules before filing an issue on GitHub! <a Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath{% if jq_support %}, or jq selector{%endif%} rules before filing an issue on GitHub! <a
href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br/> href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br/>
</span> </span>
</div> </div>

@ -5,7 +5,12 @@ import time
from flask import url_for, escape from flask import url_for, escape
from . util import live_server_setup from . util import live_server_setup
import pytest import pytest
jq_support = True
try:
import jq
except ModuleNotFoundError:
jq_support = False
def test_setup(live_server): def test_setup(live_server):
live_server_setup(live_server) live_server_setup(live_server)
@ -40,13 +45,14 @@ and it can also be repeated
assert text == "23.5" assert text == "23.5"
# also check for jq # also check for jq
if jq_support:
text = html_tools.extract_json_as_string(content, "jq:.offers.price") text = html_tools.extract_json_as_string(content, "jq:.offers.price")
assert text == "23.5" assert text == "23.5"
text = html_tools.extract_json_as_string('{"id":5}', "json:$.id") text = html_tools.extract_json_as_string('{"id":5}', "jq:.id")
assert text == "5" assert text == "5"
text = html_tools.extract_json_as_string('{"id":5}', "jq:.id") text = html_tools.extract_json_as_string('{"id":5}', "json:$.id")
assert text == "5" assert text == "5"
# When nothing at all is found, it should throw JSONNOTFound # When nothing at all is found, it should throw JSONNOTFound
@ -54,6 +60,7 @@ and it can also be repeated
with pytest.raises(html_tools.JSONNotFound) as e_info: with pytest.raises(html_tools.JSONNotFound) as e_info:
html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "json:$.id") html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "json:$.id")
if jq_support:
with pytest.raises(html_tools.JSONNotFound) as e_info: with pytest.raises(html_tools.JSONNotFound) as e_info:
html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "jq:.id") html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "jq:.id")
@ -271,6 +278,7 @@ def test_check_jsonpath_filter(client, live_server):
check_json_filter('json:boss.name', client, live_server) check_json_filter('json:boss.name', client, live_server)
def test_check_jq_filter(client, live_server): def test_check_jq_filter(client, live_server):
if jq_support:
check_json_filter('jq:.boss.name', client, live_server) check_json_filter('jq:.boss.name', client, live_server)
def check_json_filter_bool_val(json_filter, client, live_server): def check_json_filter_bool_val(json_filter, client, live_server):
@ -329,6 +337,7 @@ def test_check_jsonpath_filter_bool_val(client, live_server):
check_json_filter_bool_val("json:$['available']", client, live_server) check_json_filter_bool_val("json:$['available']", client, live_server)
def test_check_jq_filter_bool_val(client, live_server): def test_check_jq_filter_bool_val(client, live_server):
if jq_support:
check_json_filter_bool_val("jq:.available", client, live_server) check_json_filter_bool_val("jq:.available", client, live_server)
# Re #265 - Extended JSON selector test # Re #265 - Extended JSON selector test
@ -408,4 +417,5 @@ def test_check_jsonpath_ext_filter(client, live_server):
check_json_ext_filter('json:$[?(@.status==Sold)]', client, live_server) check_json_ext_filter('json:$[?(@.status==Sold)]', client, live_server)
def test_check_jq_ext_filter(client, live_server): def test_check_jq_ext_filter(client, live_server):
if jq_support:
check_json_ext_filter('jq:.[] | select(.status | contains("Sold"))', client, live_server) check_json_ext_filter('jq:.[] | select(.status | contains("Sold"))', client, live_server)

@ -13,9 +13,9 @@ def test_visual_selector_content_ready(client, live_server):
live_server_setup(live_server) live_server_setup(live_server)
time.sleep(1) time.sleep(1)
# Add our URL to the import page, maybe better to use something we control? # Add our URL to the import page, because the docker container (playwright/selenium) wont be able to connect to our usual test url
# We use an external URL because the docker container is too difficult to setup to connect back to the pytest socket test_url = "https://changedetection.io/ci-test/test-runjs.html"
test_url = 'https://news.ycombinator.com'
res = client.post( res = client.post(
url_for("form_quick_watch_add"), url_for("form_quick_watch_add"),
data={"url": test_url, "tag": '', 'edit_and_watch_submit_button': 'Edit > Watch'}, data={"url": test_url, "tag": '', 'edit_and_watch_submit_button': 'Edit > Watch'},
@ -25,13 +25,27 @@ def test_visual_selector_content_ready(client, live_server):
res = client.post( res = client.post(
url_for("edit_page", uuid="first", unpause_on_save=1), url_for("edit_page", uuid="first", unpause_on_save=1),
data={"css_filter": ".does-not-exist", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_webdriver"}, data={
"url": test_url,
"tag": "",
"headers": "",
'fetch_backend': "html_webdriver",
'webdriver_js_execute_code': 'document.querySelector("button[name=test-button]").click();'
},
follow_redirects=True follow_redirects=True
) )
assert b"unpaused" in res.data assert b"unpaused" in res.data
time.sleep(1) time.sleep(1)
wait_for_all_checks(client) wait_for_all_checks(client)
uuid = extract_UUID_from_client(client) uuid = extract_UUID_from_client(client)
# Check the JS execute code before extract worked
res = client.get(
url_for("preview_page", uuid="first"),
follow_redirects=True
)
assert b'I smell JavaScript' in res.data
assert os.path.isfile(os.path.join('test-datastore', uuid, 'last-screenshot.png')), "last-screenshot.png should exist" assert os.path.isfile(os.path.join('test-datastore', uuid, 'last-screenshot.png')), "last-screenshot.png should exist"
assert os.path.isfile(os.path.join('test-datastore', uuid, 'elements.json')), "xpath elements.json data should exist" assert os.path.isfile(os.path.join('test-datastore', uuid, 'elements.json')), "xpath elements.json data should exist"

@ -19,7 +19,8 @@ chardet > 2.3.0
wtforms ~= 3.0 wtforms ~= 3.0
jsonpath-ng ~= 1.5.3 jsonpath-ng ~= 1.5.3
jq ~= 1.3.0
# jq not available on Windows so must be installed manually
# Notification library # Notification library
apprise ~= 1.1.0 apprise ~= 1.1.0

Loading…
Cancel
Save