changedetection.io/changedetectionio/tests/test_xpath_selector.py

#!/usr/bin/python3

import time
from flask import url_for
from .util import live_server_setup, wait_for_all_checks

from ..html_tools import *

def test_setup(live_server):
    live_server_setup(live_server)

def set_original_response():
    test_return_data = """<html>
       <body>
     Some initial text<br>
     <p>Which is across multiple lines</p>
     <br>
     So let's see what happens.  <br>
     <div class="sametext">Some text thats the same</div>
     <div class="changetext">Some text that will change</div>
     </body>
     </html>
    """

    with open("test-datastore/endpoint-content.txt", "w") as f:
        f.write(test_return_data)
    return None

def set_modified_response():
    test_return_data = """<html>
       <body>
     Some initial text<br>
     <p>Which is across multiple lines</p>
     <br>
     So let's see what happens.  THIS CHANGES AND SHOULDNT TRIGGER A CHANGE<br>
     <div class="sametext">Some text thats the same</div>
     <div class="changetext">Some new text</div>
     </body>
     </html>
    """

    with open("test-datastore/endpoint-content.txt", "w") as f:
        f.write(test_return_data)

    return None

# Handle utf-8 charset replies https://github.com/dgtlmoon/changedetection.io/pull/613
def test_check_xpath_filter_utf8(client, live_server):
    filter='//item/*[self::description]'

    d='''<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" xmlns:dc="http://purl.org/dc/elements/1.1/" version="2.0">
	<channel>
		<title>rpilocator.com</title>
		<link>https://rpilocator.com</link>
		<description>Find Raspberry Pi Computers in Stock</description>
		<lastBuildDate>Thu, 19 May 2022 23:27:30 GMT</lastBuildDate>
		<image>
			<url>https://rpilocator.com/favicon.png</url>
			<title>rpilocator.com</title>
			<link>https://rpilocator.com/</link>
			<width>32</width>
			<height>32</height>
		</image>
		<item>
			<title>Stock Alert (UK): RPi CM4 - 1GB RAM, No MMC, No Wifi is In Stock at Pimoroni</title>
			<description>Stock Alert (UK): RPi CM4 - 1GB RAM, No MMC, No Wifi is In Stock at Pimoroni</description>
			<link>https://rpilocator.com?vendor=pimoroni&amp;utm_source=feed&amp;utm_medium=rss</link>
			<category>pimoroni</category>
			<category>UK</category>
			<category>CM4</category>
			<guid isPermaLink="false">F9FAB0D9-DF6F-40C8-8DEE5FC0646BB722</guid>
			<pubDate>Thu, 19 May 2022 14:32:32 GMT</pubDate>
		</item>
	</channel>
</rss>'''

    with open("test-datastore/endpoint-content.txt", "w") as f:
        f.write(d)

    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True, content_type="application/rss+xml;charset=UTF-8")
    res = client.post(
        url_for("import_page"),
        data={"urls": test_url},
        follow_redirects=True
    )
    assert b"1 Imported" in res.data
    wait_for_all_checks(client)
    res = client.post(
        url_for("edit_page", uuid="first"),
        data={"include_filters": filter, "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests"},
        follow_redirects=True
    )
    assert b"Updated watch." in res.data
    wait_for_all_checks(client)

    res = client.get(url_for("index"))
    assert b'Unicode strings with encoding declaration are not supported.' not in res.data
    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
    assert b'Deleted' in res.data


# Handle utf-8 charset replies https://github.com/dgtlmoon/changedetection.io/pull/613
def test_check_xpath_text_function_utf8(client, live_server):
    filter='//item/title/text()'

    d='''<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" xmlns:dc="http://purl.org/dc/elements/1.1/" version="2.0">
	<channel>
		<title>rpilocator.com</title>
		<link>https://rpilocator.com</link>
		<description>Find Raspberry Pi Computers in Stock</description>
		<lastBuildDate>Thu, 19 May 2022 23:27:30 GMT</lastBuildDate>
		<image>
			<url>https://rpilocator.com/favicon.png</url>
			<title>rpilocator.com</title>
			<link>https://rpilocator.com/</link>
			<width>32</width>
			<height>32</height>
		</image>
		<item>
			<title>Stock Alert (UK): RPi CM4</title>
			<foo>something else unrelated</foo>
		</item>
		<item>
			<title>Stock Alert (UK): Big monitor</title>
			<foo>something else unrelated</foo>
		</item>		
	</channel>
</rss>'''

    with open("test-datastore/endpoint-content.txt", "w") as f:
        f.write(d)

    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True, content_type="application/rss+xml;charset=UTF-8")
    res = client.post(
        url_for("import_page"),
        data={"urls": test_url},
        follow_redirects=True
    )
    assert b"1 Imported" in res.data
    wait_for_all_checks(client)
    res = client.post(
        url_for("edit_page", uuid="first"),
        data={"include_filters": filter, "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests"},
        follow_redirects=True
    )
    assert b"Updated watch." in res.data
    wait_for_all_checks(client)
    res = client.get(url_for("index"))
    assert b'Unicode strings with encoding declaration are not supported.' not in res.data

    # The service should echo back the request headers
    res = client.get(
        url_for("preview_page", uuid="first"),
        follow_redirects=True
    )

    assert b'<div class="">Stock Alert (UK): RPi CM4' in res.data
    assert b'<div class="">Stock Alert (UK): Big monitor' in res.data

    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
    assert b'Deleted' in res.data

def test_check_markup_xpath_filter_restriction(client, live_server):
    sleep_time_for_fetch_thread = 3

    xpath_filter = "//*[contains(@class, 'sametext')]"

    set_original_response()

    # Give the endpoint time to spin up
    time.sleep(1)

    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
    res = client.post(
        url_for("import_page"),
        data={"urls": test_url},
        follow_redirects=True
    )
    assert b"1 Imported" in res.data

    # Give the thread time to pick it up
    wait_for_all_checks(client)

    # Goto the edit page, add our ignore text
    # Add our URL to the import page
    res = client.post(
        url_for("edit_page", uuid="first"),
        data={"include_filters": xpath_filter, "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests"},
        follow_redirects=True
    )
    assert b"Updated watch." in res.data

    # Give the thread time to pick it up
    wait_for_all_checks(client)

    # view it/reset state back to viewed
    client.get(url_for("diff_history_page", uuid="first"), follow_redirects=True)

    #  Make a change
    set_modified_response()

    # Trigger a check
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
    # Give the thread time to pick it up
    wait_for_all_checks(client)

    res = client.get(url_for("index"))
    assert b'unviewed' not in res.data
    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
    assert b'Deleted' in res.data


def test_xpath_validation(client, live_server):

    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
    res = client.post(
        url_for("import_page"),
        data={"urls": test_url},
        follow_redirects=True
    )
    assert b"1 Imported" in res.data
    wait_for_all_checks(client)

    res = client.post(
        url_for("edit_page", uuid="first"),
        data={"include_filters": "/something horrible", "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests"},
        follow_redirects=True
    )
    assert b"is not a valid XPath expression" in res.data
    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
    assert b'Deleted' in res.data


# actually only really used by the distll.io importer, but could be handy too
def test_check_with_prefix_include_filters(client, live_server):
    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
    assert b'Deleted' in res.data

    # Give the endpoint time to spin up
    time.sleep(1)

    set_original_response()

    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
    res = client.post(
        url_for("import_page"),
        data={"urls": test_url},
        follow_redirects=True
    )
    assert b"1 Imported" in res.data
    wait_for_all_checks(client)

    res = client.post(
        url_for("edit_page", uuid="first"),
        data={"include_filters":  "xpath://*[contains(@class, 'sametext')]", "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests"},
        follow_redirects=True
    )

    assert b"Updated watch." in res.data
    wait_for_all_checks(client)

    res = client.get(
        url_for("preview_page", uuid="first"),
        follow_redirects=True
    )

    assert b"Some text thats the same" in res.data #in selector
    assert b"Some text that will change" not in res.data #not in selector

    client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
XPath support (#355) * XPath support and minor improvements to form validation 3 years ago			`#!/usr/bin/python3`

			`import time`
			`from flask import url_for`
Remove static delays 2 years ago			`from .util import live_server_setup, wait_for_all_checks`
XPath support (#355) * XPath support and minor improvements to form validation 3 years ago
			`from ..html_tools import *`

			`def test_setup(live_server):`
			`live_server_setup(live_server)`

			`def set_original_response():`
			`test_return_data = """<html>`
			`<body>`
Fixing bad linebreak definition `</br>` in notifications and UI (#1465) 2 years ago			`Some initial text<br>`
XPath support (#355) * XPath support and minor improvements to form validation 3 years ago			`<p>Which is across multiple lines</p>`
Fixing bad linebreak definition `</br>` in notifications and UI (#1465) 2 years ago			`<br>`
			`So let's see what happens. <br>`
XPath support (#355) * XPath support and minor improvements to form validation 3 years ago			`<div class="sametext">Some text thats the same</div>`
			`<div class="changetext">Some text that will change</div>`
			`</body>`
			`</html>`
			`"""`

			`with open("test-datastore/endpoint-content.txt", "w") as f:`
			`f.write(test_return_data)`
			`return None`

			`def set_modified_response():`
			`test_return_data = """<html>`
			`<body>`
Fixing bad linebreak definition `</br>` in notifications and UI (#1465) 2 years ago			`Some initial text<br>`
XPath support (#355) * XPath support and minor improvements to form validation 3 years ago			`<p>Which is across multiple lines</p>`
Fixing bad linebreak definition `</br>` in notifications and UI (#1465) 2 years ago			`<br>`
			`So let's see what happens. THIS CHANGES AND SHOULDNT TRIGGER A CHANGE<br>`
XPath support (#355) * XPath support and minor improvements to form validation 3 years ago			`<div class="sametext">Some text thats the same</div>`
			`<div class="changetext">Some new text</div>`
			`</body>`
			`</html>`
			`"""`

			`with open("test-datastore/endpoint-content.txt", "w") as f:`
			`f.write(test_return_data)`

			`return None`

Fix encoding errors with XPath filters from UTF-8 responses (#619) 3 years ago			`# Handle utf-8 charset replies https://github.com/dgtlmoon/changedetection.io/pull/613`
			`def test_check_xpath_filter_utf8(client, live_server):`
			`filter='//item/*[self::description]'`

			`d='''<?xml version="1.0" encoding="UTF-8"?>`
			`<rss xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" xmlns:dc="http://purl.org/dc/elements/1.1/" version="2.0">`
			`<channel>`
			`<title>rpilocator.com</title>`
			`<link>https://rpilocator.com</link>`
			`<description>Find Raspberry Pi Computers in Stock</description>`
			`<lastBuildDate>Thu, 19 May 2022 23:27:30 GMT</lastBuildDate>`
			`<image>`
			`<url>https://rpilocator.com/favicon.png</url>`
			`<title>rpilocator.com</title>`
			`<link>https://rpilocator.com/</link>`
			`<width>32</width>`
			`<height>32</height>`
			`</image>`
			`<item>`
			`<title>Stock Alert (UK): RPi CM4 - 1GB RAM, No MMC, No Wifi is In Stock at Pimoroni</title>`
			`<description>Stock Alert (UK): RPi CM4 - 1GB RAM, No MMC, No Wifi is In Stock at Pimoroni</description>`
			`<link>https://rpilocator.com?vendor=pimoroni&utm_source=feed&utm_medium=rss</link>`
			`<category>pimoroni</category>`
			`<category>UK</category>`
			`<category>CM4</category>`
			`<guid isPermaLink="false">F9FAB0D9-DF6F-40C8-8DEE5FC0646BB722</guid>`
			`<pubDate>Thu, 19 May 2022 14:32:32 GMT</pubDate>`
			`</item>`
			`</channel>`
			`</rss>'''`

			`with open("test-datastore/endpoint-content.txt", "w") as f:`
			`f.write(d)`

			`# Add our URL to the import page`
			`test_url = url_for('test_endpoint', _external=True, content_type="application/rss+xml;charset=UTF-8")`
			`res = client.post(`
			`url_for("import_page"),`
			`data={"urls": test_url},`
			`follow_redirects=True`
			`)`
			`assert b"1 Imported" in res.data`
Remove static delays 2 years ago			`wait_for_all_checks(client)`
Fix encoding errors with XPath filters from UTF-8 responses (#619) 3 years ago			`res = client.post(`
			`url_for("edit_page", uuid="first"),`
UI/Functionality - Ability to manage/apply filters and notifications across tags/groups 2 years ago			`data={"include_filters": filter, "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests"},`
Fix encoding errors with XPath filters from UTF-8 responses (#619) 3 years ago			`follow_redirects=True`
			`)`
			`assert b"Updated watch." in res.data`
Remove more 2 years ago			`wait_for_all_checks(client)`

Fix encoding errors with XPath filters from UTF-8 responses (#619) 3 years ago			`res = client.get(url_for("index"))`
			`assert b'Unicode strings with encoding declaration are not supported.' not in res.data`
			`res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)`
			`assert b'Deleted' in res.data`


Enhancement - support xPath text() function filter, for example "//title/text()" in RSS feeds (#778) 2 years ago			`# Handle utf-8 charset replies https://github.com/dgtlmoon/changedetection.io/pull/613`
			`def test_check_xpath_text_function_utf8(client, live_server):`
			`filter='//item/title/text()'`

			`d='''<?xml version="1.0" encoding="UTF-8"?>`
			`<rss xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" xmlns:dc="http://purl.org/dc/elements/1.1/" version="2.0">`
			`<channel>`
			`<title>rpilocator.com</title>`
			`<link>https://rpilocator.com</link>`
			`<description>Find Raspberry Pi Computers in Stock</description>`
			`<lastBuildDate>Thu, 19 May 2022 23:27:30 GMT</lastBuildDate>`
			`<image>`
			`<url>https://rpilocator.com/favicon.png</url>`
			`<title>rpilocator.com</title>`
			`<link>https://rpilocator.com/</link>`
			`<width>32</width>`
			`<height>32</height>`
			`</image>`
			`<item>`
			`<title>Stock Alert (UK): RPi CM4</title>`
			`<foo>something else unrelated</foo>`
			`</item>`
			`<item>`
			`<title>Stock Alert (UK): Big monitor</title>`
			`<foo>something else unrelated</foo>`
			`</item>`
			`</channel>`
			`</rss>'''`

			`with open("test-datastore/endpoint-content.txt", "w") as f:`
			`f.write(d)`

			`# Add our URL to the import page`
			`test_url = url_for('test_endpoint', _external=True, content_type="application/rss+xml;charset=UTF-8")`
			`res = client.post(`
			`url_for("import_page"),`
			`data={"urls": test_url},`
			`follow_redirects=True`
			`)`
			`assert b"1 Imported" in res.data`
Remove static delays 2 years ago			`wait_for_all_checks(client)`
Enhancement - support xPath text() function filter, for example "//title/text()" in RSS feeds (#778) 2 years ago			`res = client.post(`
			`url_for("edit_page", uuid="first"),`
UI/Functionality - Ability to manage/apply filters and notifications across tags/groups 2 years ago			`data={"include_filters": filter, "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests"},`
Enhancement - support xPath text() function filter, for example "//title/text()" in RSS feeds (#778) 2 years ago			`follow_redirects=True`
			`)`
			`assert b"Updated watch." in res.data`
Remove static delays 2 years ago			`wait_for_all_checks(client)`
Enhancement - support xPath text() function filter, for example "//title/text()" in RSS feeds (#778) 2 years ago			`res = client.get(url_for("index"))`
			`assert b'Unicode strings with encoding declaration are not supported.' not in res.data`

			`# The service should echo back the request headers`
			`res = client.get(`
			`url_for("preview_page", uuid="first"),`
			`follow_redirects=True`
			`)`

			`assert b'<div class="">Stock Alert (UK): RPi CM4' in res.data`
			`assert b'<div class="">Stock Alert (UK): Big monitor' in res.data`

			`res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)`
			`assert b'Deleted' in res.data`
XPath support (#355) * XPath support and minor improvements to form validation 3 years ago
			`def test_check_markup_xpath_filter_restriction(client, live_server):`
			`sleep_time_for_fetch_thread = 3`

			`xpath_filter = "//*[contains(@class, 'sametext')]"`

			`set_original_response()`

			`# Give the endpoint time to spin up`
			`time.sleep(1)`

			`# Add our URL to the import page`
			`test_url = url_for('test_endpoint', _external=True)`
			`res = client.post(`
			`url_for("import_page"),`
			`data={"urls": test_url},`
			`follow_redirects=True`
			`)`
			`assert b"1 Imported" in res.data`

			`# Give the thread time to pick it up`
Remove static delays 2 years ago			`wait_for_all_checks(client)`
XPath support (#355) * XPath support and minor improvements to form validation 3 years ago
			`# Goto the edit page, add our ignore text`
			`# Add our URL to the import page`
			`res = client.post(`
			`url_for("edit_page", uuid="first"),`
UI/Functionality - Ability to manage/apply filters and notifications across tags/groups 2 years ago			`data={"include_filters": xpath_filter, "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests"},`
XPath support (#355) * XPath support and minor improvements to form validation 3 years ago			`follow_redirects=True`
			`)`
			`assert b"Updated watch." in res.data`

			`# Give the thread time to pick it up`
Remove static delays 2 years ago			`wait_for_all_checks(client)`
XPath support (#355) * XPath support and minor improvements to form validation 3 years ago
			`# view it/reset state back to viewed`
			`client.get(url_for("diff_history_page", uuid="first"), follow_redirects=True)`

			`# Make a change`
			`set_modified_response()`

			`# Trigger a check`
API Interface (#617) 3 years ago			`client.get(url_for("form_watch_checknow"), follow_redirects=True)`
XPath support (#355) * XPath support and minor improvements to form validation 3 years ago			`# Give the thread time to pick it up`
Remove static delays 2 years ago			`wait_for_all_checks(client)`
XPath support (#355) * XPath support and minor improvements to form validation 3 years ago
			`res = client.get(url_for("index"))`
			`assert b'unviewed' not in res.data`
Fix encoding errors with XPath filters from UTF-8 responses (#619) 3 years ago			`res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)`
			`assert b'Deleted' in res.data`
XPath support (#355) * XPath support and minor improvements to form validation 3 years ago
Ability to visualise trigger and filter rules against the current snapshot on the preview page 3 years ago
XPath support (#355) * XPath support and minor improvements to form validation 3 years ago			`def test_xpath_validation(client, live_server):`

			`# Add our URL to the import page`
			`test_url = url_for('test_endpoint', _external=True)`
			`res = client.post(`
			`url_for("import_page"),`
			`data={"urls": test_url},`
			`follow_redirects=True`
			`)`
			`assert b"1 Imported" in res.data`
Remove static delays 2 years ago			`wait_for_all_checks(client)`
XPath support (#355) * XPath support and minor improvements to form validation 3 years ago
			`res = client.post(`
			`url_for("edit_page", uuid="first"),`
UI/Functionality - Ability to manage/apply filters and notifications across tags/groups 2 years ago			`data={"include_filters": "/something horrible", "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests"},`
XPath support (#355) * XPath support and minor improvements to form validation 3 years ago			`follow_redirects=True`
			`)`
Distill.io JSON export file importer (#592) 3 years ago			`assert b"is not a valid XPath expression" in res.data`
Fix encoding errors with XPath filters from UTF-8 responses (#619) 3 years ago			`res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)`
			`assert b'Deleted' in res.data`
Distill.io JSON export file importer (#592) 3 years ago

			`# actually only really used by the distll.io importer, but could be handy too`
Filters can now accept a list/multiple filters (#1064) #623 2 years ago			`def test_check_with_prefix_include_filters(client, live_server):`
API Interface (#617) 3 years ago			`res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)`
Distill.io JSON export file importer (#592) 3 years ago			`assert b'Deleted' in res.data`

			`# Give the endpoint time to spin up`
			`time.sleep(1)`

			`set_original_response()`

			`# Add our URL to the import page`
			`test_url = url_for('test_endpoint', _external=True)`
			`res = client.post(`
			`url_for("import_page"),`
			`data={"urls": test_url},`
			`follow_redirects=True`
			`)`
			`assert b"1 Imported" in res.data`
Remove static delays 2 years ago			`wait_for_all_checks(client)`
Distill.io JSON export file importer (#592) 3 years ago
			`res = client.post(`
			`url_for("edit_page", uuid="first"),`
UI/Functionality - Ability to manage/apply filters and notifications across tags/groups 2 years ago			`data={"include_filters": "xpath://*[contains(@class, 'sametext')]", "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests"},`
Distill.io JSON export file importer (#592) 3 years ago			`follow_redirects=True`
			`)`

			`assert b"Updated watch." in res.data`
Remove static delays 2 years ago			`wait_for_all_checks(client)`
Distill.io JSON export file importer (#592) 3 years ago
			`res = client.get(`
			`url_for("preview_page", uuid="first"),`
			`follow_redirects=True`
			`)`

			`assert b"Some text thats the same" in res.data #in selector`
			`assert b"Some text that will change" not in res.data #not in selector`

API Interface (#617) 3 years ago			`client.get(url_for("form_delete", uuid="all"), follow_redirects=True)`