diff --git a/changedetectionio/html_tools.py b/changedetectionio/html_tools.py index e9e0023f..a851a4d6 100644 --- a/changedetectionio/html_tools.py +++ b/changedetectionio/html_tools.py @@ -52,8 +52,15 @@ def xpath_filter(xpath_filter, html_content): if len(html_content) > 0 and len(r) == 0: raise FilterNotFoundInResponse(xpath_filter) - for item in r: - html_block += etree.tostring(item, pretty_print=True).decode('utf-8') + "
" + #@note: //title/text() wont work where CDATA.. + + for element in r: + if type(element) == etree._ElementStringResult: + html_block += str(element) + "<br/>" + elif type(element) == etree._ElementUnicodeResult: + html_block += str(element) + "<br/>" + else: + html_block += etree.tostring(element, pretty_print=True).decode('utf-8') + "<br/>" return html_block diff --git a/changedetectionio/templates/edit.html b/changedetectionio/templates/edit.html index 7b5d0c4a..8541d957 100644 --- a/changedetectionio/templates/edit.html +++ b/changedetectionio/templates/edit.html @@ -163,15 +163,26 @@ User-Agent: wonderbra 1.0") }} </div> </fieldset> <div class="pure-control-group"> - {{ render_field(form.css_filter, placeholder=".class-name or #some-id, or other CSS selector rule.", - class="m-d") }} + {% set field = render_field(form.css_filter, + placeholder=".class-name or #some-id, or other CSS selector rule.", + class="m-d") + %} + {{ field }} + {% if '/text()' in field %} + <span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the <element> contains <![CDATA[]]></strong></span><br/> + {% endif %} <span class="pure-form-message-inline"> <ul> <li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li> <li>JSON - Limit text to this JSON rule, using <a href="https://pypi.org/project/jsonpath-ng/">JSONPath</a>, prefix with <code>"json:"</code>, use <code>json:$</code> to force re-formatting if required, <a href="https://jsonpath.com/" target="new">test your JSONPath here</a></li> - <li>XPath - Limit text to this XPath rule, simply start with a forward-slash, example <code>//*[contains(@class, 'sametext')]</code> or <code>xpath://*[contains(@class, 'sametext')]</code>, <a + <li>XPath - Limit text to this XPath rule, simply start with a forward-slash, + <ul> + <li>Example: <code>//*[contains(@class, 'sametext')]</code> or <code>xpath://*[contains(@class, 'sametext')]</code>, <a href="http://xpather.com/" target="new">test your XPath here</a></li> + <li>Example: Get all titles from an RSS feed <code>//title/text()</code></li> + </ul> + </li> </ul> Please be sure that you thoroughly understand how to write CSS or JSONPath, XPath selector rules before filing an issue on GitHub! <a href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br/> diff --git a/changedetectionio/tests/test_xpath_selector.py b/changedetectionio/tests/test_xpath_selector.py index 1ac1dd7e..4e417a74 100644 --- a/changedetectionio/tests/test_xpath_selector.py +++ b/changedetectionio/tests/test_xpath_selector.py @@ -86,6 +86,7 @@ def test_check_xpath_filter_utf8(client, live_server): follow_redirects=True ) assert b"1 Imported" in res.data + time.sleep(1) res = client.post( url_for("edit_page", uuid="first"), data={"css_filter": filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, @@ -99,6 +100,68 @@ def test_check_xpath_filter_utf8(client, live_server): assert b'Deleted' in res.data +# Handle utf-8 charset replies https://github.com/dgtlmoon/changedetection.io/pull/613 +def test_check_xpath_text_function_utf8(client, live_server): + filter='//item/title/text()' + + d='''<?xml version="1.0" encoding="UTF-8"?> +<rss xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" xmlns:dc="http://purl.org/dc/elements/1.1/" version="2.0"> + <channel> + <title>rpilocator.com + https://rpilocator.com + Find Raspberry Pi Computers in Stock + Thu, 19 May 2022 23:27:30 GMT + + https://rpilocator.com/favicon.png + rpilocator.com + https://rpilocator.com/ + 32 + 32 + + + Stock Alert (UK): RPi CM4 + something else unrelated + + + Stock Alert (UK): Big monitor + something else unrelated + + +''' + + with open("test-datastore/endpoint-content.txt", "w") as f: + f.write(d) + + # Add our URL to the import page + test_url = url_for('test_endpoint', _external=True, content_type="application/rss+xml;charset=UTF-8") + res = client.post( + url_for("import_page"), + data={"urls": test_url}, + follow_redirects=True + ) + assert b"1 Imported" in res.data + time.sleep(1) + res = client.post( + url_for("edit_page", uuid="first"), + data={"css_filter": filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, + follow_redirects=True + ) + assert b"Updated watch." in res.data + time.sleep(3) + res = client.get(url_for("index")) + assert b'Unicode strings with encoding declaration are not supported.' not in res.data + + # The service should echo back the request headers + res = client.get( + url_for("preview_page", uuid="first"), + follow_redirects=True + ) + + assert b'
Stock Alert (UK): RPi CM4' in res.data + assert b'
Stock Alert (UK): Big monitor' in res.data + + res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) + assert b'Deleted' in res.data def test_check_markup_xpath_filter_restriction(client, live_server): sleep_time_for_fetch_thread = 3