233 lines
6.3 KiB
233 lines
6.3 KiB
#!/usr/bin/env python3
|
|
|
|
import time
|
|
from flask import url_for
|
|
from .util import live_server_setup, wait_for_all_checks
|
|
|
|
from ..html_tools import *
|
|
|
|
|
|
def set_original_response():
|
|
test_return_data = """<html>
|
|
<body>
|
|
Some initial text<br>
|
|
<p>Which is across multiple lines</p>
|
|
<br>
|
|
So let's see what happens. <br>
|
|
<div id="sametext">Some text thats the same</div>
|
|
<div class="changetext">Some text that will change</div>
|
|
</body>
|
|
</html>
|
|
"""
|
|
|
|
with open("test-datastore/endpoint-content.txt", "w") as f:
|
|
f.write(test_return_data)
|
|
return None
|
|
|
|
|
|
def set_modified_response():
|
|
test_return_data = """<html>
|
|
<body>
|
|
Some initial text<br>
|
|
<p>which has this one new line</p>
|
|
<br>
|
|
So let's see what happens. <br>
|
|
<div id="sametext">Some text thats the same</div>
|
|
<div class="changetext">Some text that did change ( 1000 online <br> 80 guests<br> 2000 online )</div>
|
|
<div class="changetext">SomeCase insensitive 3456</div>
|
|
</body>
|
|
</html>
|
|
"""
|
|
|
|
with open("test-datastore/endpoint-content.txt", "w") as f:
|
|
f.write(test_return_data)
|
|
|
|
return None
|
|
|
|
|
|
def set_multiline_response():
|
|
test_return_data = """<html>
|
|
<body>
|
|
|
|
<p>Something <br>
|
|
across 6 billion multiple<br>
|
|
lines
|
|
</p>
|
|
|
|
<div>aaand something lines</div>
|
|
<br>
|
|
<div>and this should be</div>
|
|
</body>
|
|
</html>
|
|
"""
|
|
|
|
with open("test-datastore/endpoint-content.txt", "w") as f:
|
|
f.write(test_return_data)
|
|
|
|
return None
|
|
|
|
|
|
def test_setup(client, live_server, measure_memory_usage):
|
|
live_server_setup(live_server)
|
|
|
|
def test_check_filter_multiline(client, live_server, measure_memory_usage):
|
|
# live_server_setup(live_server)
|
|
set_multiline_response()
|
|
|
|
# Add our URL to the import page
|
|
test_url = url_for('test_endpoint', _external=True)
|
|
res = client.post(
|
|
url_for("import_page"),
|
|
data={"urls": test_url},
|
|
follow_redirects=True
|
|
)
|
|
assert b"1 Imported" in res.data
|
|
|
|
wait_for_all_checks(client)
|
|
|
|
# Goto the edit page, add our ignore text
|
|
# Add our URL to the import page
|
|
res = client.post(
|
|
url_for("edit_page", uuid="first"),
|
|
data={"include_filters": '',
|
|
# Test a regex and a plaintext
|
|
'extract_text': '/something.+?6 billion.+?lines/si\r\nand this should be',
|
|
"url": test_url,
|
|
"tags": "",
|
|
"headers": "",
|
|
'fetch_backend': "html_requests"
|
|
},
|
|
follow_redirects=True
|
|
)
|
|
|
|
assert b"Updated watch." in res.data
|
|
wait_for_all_checks(client)
|
|
|
|
res = client.get(url_for("index"))
|
|
|
|
# Issue 1828
|
|
assert b'not at the start of the expression' not in res.data
|
|
|
|
res = client.get(
|
|
url_for("preview_page", uuid="first"),
|
|
follow_redirects=True
|
|
)
|
|
# Plaintext that doesnt look like a regex should match also
|
|
assert b'and this should be' in res.data
|
|
|
|
assert b'Something' in res.data
|
|
assert b'across 6 billion multiple' in res.data
|
|
assert b'lines' in res.data
|
|
|
|
# but the last one, which also says 'lines' shouldnt be here (non-greedy match checking)
|
|
assert b'aaand something lines' not in res.data
|
|
|
|
def test_check_filter_and_regex_extract(client, live_server, measure_memory_usage):
|
|
|
|
include_filters = ".changetext"
|
|
|
|
set_original_response()
|
|
|
|
# Add our URL to the import page
|
|
test_url = url_for('test_endpoint', _external=True)
|
|
res = client.post(
|
|
url_for("import_page"),
|
|
data={"urls": test_url},
|
|
follow_redirects=True
|
|
)
|
|
assert b"1 Imported" in res.data
|
|
|
|
# Give the thread time to pick it up
|
|
wait_for_all_checks(client)
|
|
|
|
# Goto the edit page, add our ignore text
|
|
# Add our URL to the import page
|
|
res = client.post(
|
|
url_for("edit_page", uuid="first"),
|
|
data={"include_filters": include_filters,
|
|
'extract_text': '/\d+ online/\r\n/\d+ guests/\r\n/somecase insensitive \d+/i\r\n/somecase insensitive (345\d)/i\r\n/issue1828.+?2022/i',
|
|
"url": test_url,
|
|
"tags": "",
|
|
"headers": "",
|
|
'fetch_backend': "html_requests"
|
|
},
|
|
follow_redirects=True
|
|
)
|
|
|
|
assert b"Updated watch." in res.data
|
|
|
|
|
|
# Give the thread time to pick it up
|
|
wait_for_all_checks(client)
|
|
|
|
res = client.get(url_for("index"))
|
|
#issue 1828
|
|
assert b'not at the start of the expression' not in res.data
|
|
|
|
# Make a change
|
|
set_modified_response()
|
|
|
|
# Trigger a check
|
|
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
|
# Give the thread time to pick it up
|
|
wait_for_all_checks(client)
|
|
|
|
# It should have 'unviewed' still
|
|
# Because it should be looking at only that 'sametext' id
|
|
res = client.get(url_for("index"))
|
|
assert b'unviewed' in res.data
|
|
|
|
# Check HTML conversion detected and workd
|
|
res = client.get(
|
|
url_for("preview_page", uuid="first"),
|
|
follow_redirects=True
|
|
)
|
|
|
|
assert b'1000 online' in res.data
|
|
|
|
# All regex matching should be here
|
|
assert b'2000 online' in res.data
|
|
|
|
# Both regexs should be here
|
|
assert b'80 guests' in res.data
|
|
|
|
# Regex with flag handling should be here
|
|
assert b'SomeCase insensitive 3456' in res.data
|
|
|
|
# Singular group from /somecase insensitive (345\d)/i
|
|
assert b'3456' in res.data
|
|
|
|
# Regex with multiline flag handling should be here
|
|
|
|
# Should not be here
|
|
assert b'Some text that did change' not in res.data
|
|
|
|
|
|
|
|
def test_regex_error_handling(client, live_server, measure_memory_usage):
|
|
|
|
#live_server_setup(live_server)
|
|
|
|
# Add our URL to the import page
|
|
test_url = url_for('test_endpoint', _external=True)
|
|
res = client.post(
|
|
url_for("import_page"),
|
|
data={"urls": test_url},
|
|
follow_redirects=True
|
|
)
|
|
assert b"1 Imported" in res.data
|
|
|
|
### test regex error handling
|
|
res = client.post(
|
|
url_for("edit_page", uuid="first"),
|
|
data={"extract_text": '/something bad\d{3/XYZ',
|
|
"url": test_url,
|
|
"fetch_backend": "html_requests"},
|
|
follow_redirects=True
|
|
)
|
|
|
|
assert b'is not a valid regular expression.' in res.data
|
|
|
|
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
|
assert b'Deleted' in res.data
|