@todo Comment - BS4's element.get_text() seems to lose the indentation format no-matter what

4 years ago · e3a9847f74
parent 47f7698b32
commit e3a9847f74
1 changed files with 2 additions and 0 deletions
--- a/backend/fetch_site_status.py
+++ b/backend/fetch_site_status.py
@ -90,6 +90,8 @@ class perform_site_check():
                stripped_text_from_html = ""
                for item in soup.select(css_filter):
                    # By default, bs4's get_text will lump the text together
+                    # BS4's element strip() will lose the indentation format, I've tried using a space as separator, setting strip=False etc, but doesnt help
+                    # @todo ideas? if you compare the css_filtered output to non-filtered snapshot it will always lose the indentation/format
                    text = str(item.get_text(separator="\n", strip=True)).strip()
                    # Try to cut back on excessive linefeeds if there are any
                    text = text.replace("\n\n","\n")