You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
changedetection.io/changedetectionio/tests/test_html_to_text.py

39 lines
1.3 KiB

#!/usr/bin/env python3
"""Test suite for the method to extract text from an html string"""
from ..html_tools import html_to_text
def test_html_to_text_func():
test_html = """<html>
<body>
Some initial text<br>
<p>Which is across multiple lines</p>
<a href="/first_link"> More Text </a>
<br>
So let's see what happens. <br>
<a href="second_link.com"> Even More Text </a>
</body>
</html>
"""
# extract text, with 'render_anchor_tag_content' set to False
text_content = html_to_text(test_html, render_anchor_tag_content=False)
no_links_text = \
"Some initial text\n\nWhich is across multiple " \
"lines\n\nMore Text\nSo let's see what happens.\nEven More Text"
# check that no links are in the extracted text
assert text_content == no_links_text
# extract text, with 'render_anchor_tag_content' set to True
text_content = html_to_text(test_html, render_anchor_tag_content=True)
links_text = \
"Some initial text\n\nWhich is across multiple lines\n\n[ More Text " \
"](/first_link)\nSo let's see what happens.\n[ Even More Text ]" \
"(second_link.com)"
# check that links are present in the extracted text
assert text_content == links_text