diff --git a/README-pip.md b/README-pip.md
index 746175db..b6a00d32 100644
--- a/README-pip.md
+++ b/README-pip.md
@@ -33,7 +33,7 @@ _Need an actual Chrome runner with Javascript support? We support fetching via W
#### Key Features
- Lots of trigger filters, such as "Trigger on text", "Remove text by selector", "Ignore text", "Extract text", also using regular-expressions!
-- Target elements with xPath and CSS Selectors, Easily monitor complex JSON with JsonPath rules
+- Target elements with xPath and CSS Selectors, Easily monitor complex JSON with JSONPath or jq
- Switch between fast non-JS and Chrome JS based "fetchers"
- Easily specify how often a site should be checked
- Execute JS before extracting text (Good for logging in, see examples in the UI!)
diff --git a/README.md b/README.md
index 0d08d129..797f8c56 100644
--- a/README.md
+++ b/README.md
@@ -47,7 +47,7 @@ _Need an actual Chrome runner with Javascript support? We support fetching via W
#### Key Features
- Lots of trigger filters, such as "Trigger on text", "Remove text by selector", "Ignore text", "Extract text", also using regular-expressions!
-- Target elements with xPath and CSS Selectors, Easily monitor complex JSON with JsonPath rules
+- Target elements with xPath and CSS Selectors, Easily monitor complex JSON with JSONPath or jq
- Switch between fast non-JS and Chrome JS based "fetchers"
- Easily specify how often a site should be checked
- Execute JS before extracting text (Good for logging in, see examples in the UI!)
@@ -121,7 +121,7 @@ See the wiki for more information https://github.com/dgtlmoon/changedetection.io
## Filters
-XPath, JSONPath and CSS support comes baked in! You can be as specific as you need, use XPath exported from various XPath element query creation tools.
+XPath, JSONPath, jq, and CSS support comes baked in! You can be as specific as you need, use XPath exported from various XPath element query creation tools.
(We support LXML `re:test`, `re:math` and `re:replace`.)
@@ -151,7 +151,7 @@ Now you can also customise your notification content!
## JSON API Monitoring
-Detect changes and monitor data in JSON API's by using the built-in JSONPath selectors as a filter / selector.
+Detect changes and monitor data in JSON API's by using either JSONPath or jq to filter, parse, and restructure JSON as needed.
![image](https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/json-filter-field-example.png)
@@ -159,9 +159,52 @@ This will re-parse the JSON and apply formatting to the text, making it super ea
![image](https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/json-diff-example.png)
+### JSONPath or jq?
+
+For more complex parsing, filtering, and modifying of JSON data, jq is recommended due to the built-in operators and functions. Refer to the [documentation](https://stedolan.github.io/jq/manual/) for more information on jq.
+
+The example below adds the price in dollars to each item in the JSON data, and then filters to only show items that are greater than 10.
+
+#### Sample input data from API
+```
+{
+ "items": [
+ {
+ "name": "Product A",
+ "priceInCents": 2500
+ },
+ {
+ "name": "Product B",
+ "priceInCents": 500
+ },
+ {
+ "name": "Product C",
+ "priceInCents": 2000
+ }
+ ]
+}
+```
+
+#### Sample jq
+`jq:.items[] | . + { "priceInDollars": (.priceInCents / 100) } | select(.priceInDollars > 10)`
+
+#### Sample output data
+```
+{
+ "name": "Product A",
+ "priceInCents": 2500,
+ "priceInDollars": 25
+}
+{
+ "name": "Product C",
+ "priceInCents": 2000,
+ "priceInDollars": 20
+}
+```
+
### Parse JSON embedded in HTML!
-When you enable a `json:` filter, you can even automatically extract and parse embedded JSON inside a HTML page! Amazingly handy for sites that build content based on JSON, such as many e-commerce websites.
+When you enable a `json:` or `jq:` filter, you can even automatically extract and parse embedded JSON inside a HTML page! Amazingly handy for sites that build content based on JSON, such as many e-commerce websites.
```
@@ -171,7 +214,7 @@ When you enable a `json:` filter, you can even automatically extract and parse e
```
-`json:$.price` would give `23.50`, or you can extract the whole structure
+`json:$.price` or `jq:.price` would give `23.50`, or you can extract the whole structure
## Proxy configuration
diff --git a/changedetectionio/fetch_site_status.py b/changedetectionio/fetch_site_status.py
index 79e282b5..0f84da16 100644
--- a/changedetectionio/fetch_site_status.py
+++ b/changedetectionio/fetch_site_status.py
@@ -141,8 +141,9 @@ class perform_site_check():
has_filter_rule = True
if has_filter_rule:
- if 'json:' in css_filter_rule:
- stripped_text_from_html = html_tools.extract_json_as_string(content=fetcher.content, jsonpath_filter=css_filter_rule)
+ json_filter_prefixes = ['json:', 'jq:']
+ if any(prefix in css_filter_rule for prefix in json_filter_prefixes):
+ stripped_text_from_html = html_tools.extract_json_as_string(content=fetcher.content, json_filter=css_filter_rule)
is_html = False
if is_html or is_source:
diff --git a/changedetectionio/forms.py b/changedetectionio/forms.py
index 279f7c7f..7fa17f90 100644
--- a/changedetectionio/forms.py
+++ b/changedetectionio/forms.py
@@ -304,6 +304,21 @@ class ValidateCSSJSONXPATHInput(object):
# Re #265 - maybe in the future fetch the page and offer a
# warning/notice that its possible the rule doesnt yet match anything?
+ if 'jq:' in line:
+ if not self.allow_json:
+ raise ValidationError("jq not permitted in this field!")
+
+ import jq
+ input = line.replace('jq:', '')
+
+ try:
+ jq.compile(input)
+ except (ValueError) as e:
+ message = field.gettext('\'%s\' is not a valid jq expression. (%s)')
+ raise ValidationError(message % (input, str(e)))
+ except:
+ raise ValidationError("A system-error occurred when validating your jq expression")
+
class quickWatchForm(Form):
url = fields.URLField('URL', validators=[validateURL()])
diff --git a/changedetectionio/html_tools.py b/changedetectionio/html_tools.py
index a851a4d6..6cc8e20a 100644
--- a/changedetectionio/html_tools.py
+++ b/changedetectionio/html_tools.py
@@ -3,6 +3,7 @@ from typing import List
from bs4 import BeautifulSoup
from jsonpath_ng.ext import parse
+import jq
import re
from inscriptis import get_text
from inscriptis.model.config import ParserConfig
@@ -79,19 +80,26 @@ def extract_element(find='title', html_content=''):
return element_text
#
-def _parse_json(json_data, jsonpath_filter):
- s=[]
- jsonpath_expression = parse(jsonpath_filter.replace('json:', ''))
- match = jsonpath_expression.find(json_data)
-
+def _parse_json(json_data, json_filter):
+ if 'json:' in json_filter:
+ jsonpath_expression = parse(json_filter.replace('json:', ''))
+ match = jsonpath_expression.find(json_data)
+ return _get_stripped_text_from_json_match(match)
+ if 'jq:' in json_filter:
+ jq_expression = jq.compile(json_filter.replace('jq:', ''))
+ match = jq_expression.input(json_data).all()
+ return _get_stripped_text_from_json_match(match)
+
+def _get_stripped_text_from_json_match(match):
+ s = []
# More than one result, we will return it as a JSON list.
if len(match) > 1:
for i in match:
- s.append(i.value)
+ s.append(i.value if hasattr(i, 'value') else i)
# Single value, use just the value, as it could be later used in a token in notifications.
if len(match) == 1:
- s = match[0].value
+ s = match[0].value if hasattr(match[0], 'value') else match[0]
# Re #257 - Better handling where it does not exist, in the case the original 's' value was False..
if not match:
@@ -103,16 +111,16 @@ def _parse_json(json_data, jsonpath_filter):
return stripped_text_from_html
-def extract_json_as_string(content, jsonpath_filter):
+def extract_json_as_string(content, json_filter):
stripped_text_from_html = False
# Try to parse/filter out the JSON, if we get some parser error, then maybe it's embedded blob.. just return the first that matches jsonpath_filter
+ # Foreach blob.. just return the first that matches json_filter
s = []
soup = BeautifulSoup(content, 'html.parser')
bs_result = soup.findAll('script')
@@ -131,7 +139,7 @@ def extract_json_as_string(content, jsonpath_filter):
# Just skip it
continue
else:
- stripped_text_from_html = _parse_json(json_data, jsonpath_filter)
+ stripped_text_from_html = _parse_json(json_data, json_filter)
if stripped_text_from_html:
break
diff --git a/changedetectionio/templates/edit.html b/changedetectionio/templates/edit.html
index 64e9cee3..907894e1 100644
--- a/changedetectionio/templates/edit.html
+++ b/changedetectionio/templates/edit.html
@@ -184,8 +184,12 @@ User-Agent: wonderbra 1.0") }}
- CSS - Limit text to this CSS rule, only text matching this CSS rule is included.
- - JSON - Limit text to this JSON rule, using JSONPath, prefix with
"json:"
, use json:$
to force re-formatting if required, test your JSONPath here
+ - JSON - Limit text to this JSON rule, using either JSONPath or jq.
+
+ - JSONPath: Prefix with
json:
, use json:$
to force re-formatting if required, test your JSONPath here.
+ - jq: Prefix with
jq:
and test your jq here. Using jq allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation here.
+
+
- XPath - Limit text to this XPath rule, simply start with a forward-slash,
- Example:
//*[contains(@class, 'sametext')]
or xpath://*[contains(@class, 'sametext')]
,
- Please be sure that you thoroughly understand how to write CSS or JSONPath, XPath selector rules before filing an issue on GitHub! here for more CSS selector help.
diff --git a/changedetectionio/tests/test_jsonpath_selector.py b/changedetectionio/tests/test_jsonpath_jq_selector.py
similarity index 84%
rename from changedetectionio/tests/test_jsonpath_selector.py
rename to changedetectionio/tests/test_jsonpath_jq_selector.py
index 729a201d..d0082122 100644
--- a/changedetectionio/tests/test_jsonpath_selector.py
+++ b/changedetectionio/tests/test_jsonpath_jq_selector.py
@@ -2,7 +2,7 @@
# coding=utf-8
import time
-from flask import url_for
+from flask import url_for, escape
from . util import live_server_setup
import pytest
@@ -36,16 +36,26 @@ and it can also be repeated
from .. import html_tools
# See that we can find the second