Option to render links as [Some Text ](/link), adds the ability to change-detect on hyperlink changes
parent
1890881977
commit
9809af142d
@ -0,0 +1,38 @@
|
|||||||
|
#!/usr/bin/python3
|
||||||
|
"""Test suite for the method to extract text from an html string"""
|
||||||
|
from ..html_tools import html_to_text
|
||||||
|
|
||||||
|
|
||||||
|
def test_html_to_text_func():
|
||||||
|
test_html = """<html>
|
||||||
|
<body>
|
||||||
|
Some initial text</br>
|
||||||
|
<p>Which is across multiple lines</p>
|
||||||
|
<a href="/first_link"> More Text </a>
|
||||||
|
</br>
|
||||||
|
So let's see what happens. </br>
|
||||||
|
<a href="second_link.com"> Even More Text </a>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
|
||||||
|
# extract text, with 'render_anchor_tag_content' set to False
|
||||||
|
text_content = html_to_text(test_html, render_anchor_tag_content=False)
|
||||||
|
|
||||||
|
no_links_text = \
|
||||||
|
"Some initial text\n\nWhich is across multiple " \
|
||||||
|
"lines\n\nMore Text So let's see what happens. Even More Text"
|
||||||
|
|
||||||
|
# check that no links are in the extracted text
|
||||||
|
assert text_content == no_links_text
|
||||||
|
|
||||||
|
# extract text, with 'render_anchor_tag_content' set to True
|
||||||
|
text_content = html_to_text(test_html, render_anchor_tag_content=True)
|
||||||
|
|
||||||
|
links_text = \
|
||||||
|
"Some initial text\n\nWhich is across multiple lines\n\n[ More Text " \
|
||||||
|
"](/first_link) So let's see what happens. [ Even More Text ]" \
|
||||||
|
"(second_link.com)"
|
||||||
|
|
||||||
|
# check that links are present in the extracted text
|
||||||
|
assert text_content == links_text
|
@ -0,0 +1,219 @@
|
|||||||
|
#!/usr/bin/python3
|
||||||
|
"""Test suite for the render/not render anchor tag content functionality"""
|
||||||
|
|
||||||
|
import time
|
||||||
|
from flask import url_for
|
||||||
|
from .util import live_server_setup
|
||||||
|
|
||||||
|
|
||||||
|
def test_setup(live_server):
|
||||||
|
live_server_setup(live_server)
|
||||||
|
|
||||||
|
def set_original_ignore_response():
|
||||||
|
test_return_data = """<html>
|
||||||
|
<body>
|
||||||
|
Some initial text</br>
|
||||||
|
<a href="/original_link"> Some More Text </a>
|
||||||
|
</br>
|
||||||
|
So let's see what happens. </br>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
|
||||||
|
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||||
|
f.write(test_return_data)
|
||||||
|
|
||||||
|
|
||||||
|
# Should be the same as set_original_ignore_response() but with a different
|
||||||
|
# link
|
||||||
|
def set_modified_ignore_response():
|
||||||
|
test_return_data = """<html>
|
||||||
|
<body>
|
||||||
|
Some initial text</br>
|
||||||
|
<a href="/modified_link"> Some More Text </a>
|
||||||
|
</br>
|
||||||
|
So let's see what happens. </br>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
|
||||||
|
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||||
|
f.write(test_return_data)
|
||||||
|
|
||||||
|
def test_render_anchor_tag_content_true(client, live_server):
|
||||||
|
"""Testing that the link changes are detected when
|
||||||
|
render_anchor_tag_content setting is set to true"""
|
||||||
|
sleep_time_for_fetch_thread = 3
|
||||||
|
|
||||||
|
# Give the endpoint time to spin up
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
# set original html text
|
||||||
|
set_original_ignore_response()
|
||||||
|
|
||||||
|
# Goto the settings page, choose not to ignore links
|
||||||
|
res = client.post(
|
||||||
|
url_for("settings_page"),
|
||||||
|
data={
|
||||||
|
"minutes_between_check": 180,
|
||||||
|
"render_anchor_tag_content": "true",
|
||||||
|
"fetch_backend": "html_requests",
|
||||||
|
},
|
||||||
|
follow_redirects=True,
|
||||||
|
)
|
||||||
|
assert b"Settings updated." in res.data
|
||||||
|
|
||||||
|
# Add our URL to the import page
|
||||||
|
test_url = url_for("test_endpoint", _external=True)
|
||||||
|
res = client.post(
|
||||||
|
url_for("import_page"), data={"urls": test_url},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
|
|
||||||
|
time.sleep(sleep_time_for_fetch_thread)
|
||||||
|
# Trigger a check
|
||||||
|
client.get(url_for("api_watch_checknow"), follow_redirects=True)
|
||||||
|
|
||||||
|
# set a new html text with a modified link
|
||||||
|
set_modified_ignore_response()
|
||||||
|
time.sleep(sleep_time_for_fetch_thread)
|
||||||
|
|
||||||
|
# Trigger a check
|
||||||
|
client.get(url_for("api_watch_checknow"), follow_redirects=True)
|
||||||
|
|
||||||
|
# Give the thread time to pick it up
|
||||||
|
time.sleep(sleep_time_for_fetch_thread)
|
||||||
|
|
||||||
|
# check that the anchor tag content is rendered
|
||||||
|
res = client.get(url_for("preview_page", uuid="first"))
|
||||||
|
assert '(/modified_link)' in res.data.decode()
|
||||||
|
|
||||||
|
# since the link has changed, and we chose to render anchor tag content,
|
||||||
|
# we should detect a change (new 'unviewed' class)
|
||||||
|
res = client.get(url_for("index"))
|
||||||
|
assert b"unviewed" in res.data
|
||||||
|
assert b"/test-endpoint" in res.data
|
||||||
|
|
||||||
|
# Cleanup everything
|
||||||
|
res = client.get(url_for("api_delete", uuid="all"),
|
||||||
|
follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
|
|
||||||
|
|
||||||
|
def test_render_anchor_tag_content_false(client, live_server):
|
||||||
|
"""Testing that anchor tag content changes are ignored when
|
||||||
|
render_anchor_tag_content setting is set to false"""
|
||||||
|
sleep_time_for_fetch_thread = 3
|
||||||
|
|
||||||
|
# Give the endpoint time to spin up
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
# set the original html text
|
||||||
|
set_original_ignore_response()
|
||||||
|
|
||||||
|
# Goto the settings page, choose to ignore hyperlinks
|
||||||
|
res = client.post(
|
||||||
|
url_for("settings_page"),
|
||||||
|
data={
|
||||||
|
"minutes_between_check": 180,
|
||||||
|
"render_anchor_tag_content": "false",
|
||||||
|
"fetch_backend": "html_requests",
|
||||||
|
},
|
||||||
|
follow_redirects=True,
|
||||||
|
)
|
||||||
|
assert b"Settings updated." in res.data
|
||||||
|
|
||||||
|
# Add our URL to the import page
|
||||||
|
test_url = url_for("test_endpoint", _external=True)
|
||||||
|
res = client.post(
|
||||||
|
url_for("import_page"), data={"urls": test_url}, follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
|
|
||||||
|
time.sleep(sleep_time_for_fetch_thread)
|
||||||
|
# Trigger a check
|
||||||
|
client.get(url_for("api_watch_checknow"), follow_redirects=True)
|
||||||
|
|
||||||
|
# set a new html text, with a modified link
|
||||||
|
set_modified_ignore_response()
|
||||||
|
time.sleep(sleep_time_for_fetch_thread)
|
||||||
|
|
||||||
|
# Trigger a check
|
||||||
|
client.get(url_for("api_watch_checknow"), follow_redirects=True)
|
||||||
|
|
||||||
|
# Give the thread time to pick it up
|
||||||
|
time.sleep(sleep_time_for_fetch_thread)
|
||||||
|
|
||||||
|
# check that the anchor tag content is not rendered
|
||||||
|
res = client.get(url_for("preview_page", uuid="first"))
|
||||||
|
assert '(/modified_link)' not in res.data.decode()
|
||||||
|
|
||||||
|
# even though the link has changed, we shouldn't detect a change since
|
||||||
|
# we selected to not render anchor tag content (no new 'unviewed' class)
|
||||||
|
res = client.get(url_for("index"))
|
||||||
|
assert b"unviewed" not in res.data
|
||||||
|
assert b"/test-endpoint" in res.data
|
||||||
|
|
||||||
|
# Cleanup everything
|
||||||
|
res = client.get(url_for("api_delete", uuid="all"), follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
|
|
||||||
|
|
||||||
|
def test_render_anchor_tag_content_default(client, live_server):
|
||||||
|
"""Testing that anchor tag content changes are ignored when the
|
||||||
|
render_anchor_tag_content setting is not explicitly selected"""
|
||||||
|
sleep_time_for_fetch_thread = 3
|
||||||
|
|
||||||
|
# Give the endpoint time to spin up
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
# set the original html text
|
||||||
|
set_original_ignore_response()
|
||||||
|
|
||||||
|
# Goto the settings page, not passing the render_anchor_tag_content setting
|
||||||
|
res = client.post(
|
||||||
|
url_for("settings_page"),
|
||||||
|
data={
|
||||||
|
"minutes_between_check": 180,
|
||||||
|
"fetch_backend": "html_requests",
|
||||||
|
},
|
||||||
|
follow_redirects=True,
|
||||||
|
)
|
||||||
|
assert b"Settings updated." in res.data
|
||||||
|
|
||||||
|
# Add our URL to the import page
|
||||||
|
test_url = url_for("test_endpoint", _external=True)
|
||||||
|
res = client.post(
|
||||||
|
url_for("import_page"), data={"urls": test_url}, follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
|
|
||||||
|
time.sleep(sleep_time_for_fetch_thread)
|
||||||
|
# Trigger a check
|
||||||
|
client.get(url_for("api_watch_checknow"), follow_redirects=True)
|
||||||
|
|
||||||
|
# set a new html text, with a modified link
|
||||||
|
set_modified_ignore_response()
|
||||||
|
time.sleep(sleep_time_for_fetch_thread)
|
||||||
|
|
||||||
|
# Trigger a check
|
||||||
|
client.get(url_for("api_watch_checknow"), follow_redirects=True)
|
||||||
|
|
||||||
|
# Give the thread time to pick it up
|
||||||
|
time.sleep(sleep_time_for_fetch_thread)
|
||||||
|
|
||||||
|
# check that the anchor tag content is not rendered
|
||||||
|
res = client.get(url_for("preview_page", uuid="first"))
|
||||||
|
assert '(/modified_link)' not in res.data.decode()
|
||||||
|
|
||||||
|
# even though the link has changed, we shouldn't detect a change since
|
||||||
|
# we did not select the setting and the default behaviour is to not
|
||||||
|
# render anchor tag content (no new 'unviewed' class)
|
||||||
|
res = client.get(url_for("index"))
|
||||||
|
assert b"unviewed" not in res.data
|
||||||
|
assert b"/test-endpoint" in res.data
|
||||||
|
|
||||||
|
# Cleanup everything
|
||||||
|
res = client.get(url_for("api_delete", uuid="all"), follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
Loading…
Reference in new issue