CSS Filter - strip text of whitespacing, preserve new lines where applicable, remove extra newlines

pull/107/head
dgtlmoon 3 years ago
parent c6a4709987
commit 47f7698b32

@ -89,7 +89,10 @@ class perform_site_check():
soup = BeautifulSoup(r.content, "html.parser") soup = BeautifulSoup(r.content, "html.parser")
stripped_text_from_html = "" stripped_text_from_html = ""
for item in soup.select(css_filter): for item in soup.select(css_filter):
text = str(item.get_text()).strip() + '\n' # By default, bs4's get_text will lump the text together
text = str(item.get_text(separator="\n", strip=True)).strip()
# Try to cut back on excessive linefeeds if there are any
text = text.replace("\n\n","\n")
stripped_text_from_html += text stripped_text_from_html += text
else: else:

Loading…
Cancel
Save