|
|
@ -45,6 +45,71 @@ def set_modified_response():
|
|
|
|
return None
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def set_multiline_response():
|
|
|
|
|
|
|
|
test_return_data = """<html>
|
|
|
|
|
|
|
|
<body>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<p>Something <br/>
|
|
|
|
|
|
|
|
across 6 billion multiple<br/>
|
|
|
|
|
|
|
|
lines
|
|
|
|
|
|
|
|
</p>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<div>aaand something lines</div>
|
|
|
|
|
|
|
|
</body>
|
|
|
|
|
|
|
|
</html>
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with open("test-datastore/endpoint-content.txt", "w") as f:
|
|
|
|
|
|
|
|
f.write(test_return_data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_check_filter_multiline(client, live_server):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
live_server_setup(live_server)
|
|
|
|
|
|
|
|
set_multiline_response()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Add our URL to the import page
|
|
|
|
|
|
|
|
test_url = url_for('test_endpoint', _external=True)
|
|
|
|
|
|
|
|
res = client.post(
|
|
|
|
|
|
|
|
url_for("import_page"),
|
|
|
|
|
|
|
|
data={"urls": test_url},
|
|
|
|
|
|
|
|
follow_redirects=True
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
assert b"1 Imported" in res.data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
time.sleep(3)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Goto the edit page, add our ignore text
|
|
|
|
|
|
|
|
# Add our URL to the import page
|
|
|
|
|
|
|
|
res = client.post(
|
|
|
|
|
|
|
|
url_for("edit_page", uuid="first"),
|
|
|
|
|
|
|
|
data={"css_filter": '',
|
|
|
|
|
|
|
|
'extract_text': '/something.+?6 billion.+?lines/si',
|
|
|
|
|
|
|
|
"url": test_url,
|
|
|
|
|
|
|
|
"tag": "",
|
|
|
|
|
|
|
|
"headers": "",
|
|
|
|
|
|
|
|
'fetch_backend': "html_requests"
|
|
|
|
|
|
|
|
},
|
|
|
|
|
|
|
|
follow_redirects=True
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
assert b"Updated watch." in res.data
|
|
|
|
|
|
|
|
time.sleep(3)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
res = client.get(
|
|
|
|
|
|
|
|
url_for("preview_page", uuid="first"),
|
|
|
|
|
|
|
|
follow_redirects=True
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
assert b'<div class="">Something' in res.data
|
|
|
|
|
|
|
|
assert b'<div class="">across 6 billion multiple' in res.data
|
|
|
|
|
|
|
|
assert b'<div class="">lines' in res.data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# but the last one, which also says 'lines' shouldnt be here (non-greedy match checking)
|
|
|
|
|
|
|
|
assert b'aaand something lines' not in res.data
|
|
|
|
|
|
|
|
|
|
|
|
def test_check_filter_and_regex_extract(client, live_server):
|
|
|
|
def test_check_filter_and_regex_extract(client, live_server):
|
|
|
|
sleep_time_for_fetch_thread = 3
|
|
|
|
sleep_time_for_fetch_thread = 3
|
|
|
|
|
|
|
|
|
|
|
@ -88,11 +153,6 @@ def test_check_filter_and_regex_extract(client, live_server):
|
|
|
|
|
|
|
|
|
|
|
|
assert b"Updated watch." in res.data
|
|
|
|
assert b"Updated watch." in res.data
|
|
|
|
|
|
|
|
|
|
|
|
# Check it saved
|
|
|
|
|
|
|
|
res = client.get(
|
|
|
|
|
|
|
|
url_for("edit_page", uuid="first"),
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Give the thread time to pick it up
|
|
|
|
# Give the thread time to pick it up
|
|
|
|
time.sleep(sleep_time_for_fetch_thread)
|
|
|
|
time.sleep(sleep_time_for_fetch_thread)
|
|
|
|
|
|
|
|
|
|
|
|