From e3a9847f7452e5dd4b4ad8801c1616934d68d6c5 Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Wed, 23 Jun 2021 12:49:53 +1000 Subject: [PATCH] @todo Comment - BS4's element.get_text() seems to lose the indentation format no-matter what --- backend/fetch_site_status.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/backend/fetch_site_status.py b/backend/fetch_site_status.py index 9c64623b..b21422ca 100644 --- a/backend/fetch_site_status.py +++ b/backend/fetch_site_status.py @@ -90,6 +90,8 @@ class perform_site_check(): stripped_text_from_html = "" for item in soup.select(css_filter): # By default, bs4's get_text will lump the text together + # BS4's element strip() will lose the indentation format, I've tried using a space as separator, setting strip=False etc, but doesnt help + # @todo ideas? if you compare the css_filtered output to non-filtered snapshot it will always lose the indentation/format text = str(item.get_text(separator="\n", strip=True)).strip() # Try to cut back on excessive linefeeds if there are any text = text.replace("\n\n","\n")