pull/2313/head
dgtlmoon 1 month ago
parent d4dac23ba1
commit 0777017ae7

@ -169,13 +169,11 @@ def xpath1_filter(xpath_filter, html_content, append_pretty_line_formatting=Fals
# And where the matched result doesn't include something that will cause Inscriptis to add a newline
# (This way each 'match' reliably has a new-line in the diff)
# Divs are converted to 4 whitespaces by inscriptis
if append_pretty_line_formatting and len(html_block) and (not hasattr( element, 'tag' ) or not element.tag in (['br', 'hr', 'div', 'p'])):
if append_pretty_line_formatting and len(html_block) and (not hasattr(element, 'tag') or not element.tag in (['br', 'hr', 'div', 'p'])):
html_block += TEXT_FILTER_LIST_LINE_SUFFIX
if type(element) == etree._ElementStringResult:
html_block += str(element)
elif type(element) == etree._ElementUnicodeResult:
html_block += str(element)
if hasattr(element, 'text') and element.text is not None:
html_block += element.text
else:
html_block += etree.tostring(element, pretty_print=True).decode('utf-8')

@ -52,6 +52,7 @@ cryptography~=3.4
beautifulsoup4
# XPath filtering, lxml is required by bs4 anyway, but put it here to be safe.
# #2312 - In 5.1.1 _ElementStringResult was removed - ImportError: cannot import name '_ElementStringResult' from 'lxml.etree'
lxml
# XPath 2.0-3.1 support - 4.2.0 broke something?

Loading…
Cancel
Save