Bug fix - further work on lxml filter extract (#2313 #2312 #2317)

pull/2152/merge
dgtlmoon 2 weeks ago committed by GitHub
parent 6f3c3b7dfb
commit 3ae9bfa6f9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -172,10 +172,12 @@ def xpath1_filter(xpath_filter, html_content, append_pretty_line_formatting=Fals
if append_pretty_line_formatting and len(html_block) and (not hasattr(element, 'tag') or not element.tag in (['br', 'hr', 'div', 'p'])):
html_block += TEXT_FILTER_LIST_LINE_SUFFIX
if isinstance(element, str):
# Some kind of text, UTF-8 or other
if isinstance(element, (str, bytes)):
html_block += element
else:
html_block += etree.tostring(element, pretty_print=True, encoding='utf-8')
# Return the HTML which will get parsed as text
html_block += etree.tostring(element, pretty_print=True).decode('utf-8')
return html_block

@ -52,8 +52,7 @@ cryptography~=3.4
beautifulsoup4
# XPath filtering, lxml is required by bs4 anyway, but put it here to be safe.
# #2312 - In 5.1.1 _ElementStringResult was removed - ImportError: cannot import name '_ElementStringResult' from 'lxml.etree'
lxml
lxml >=4.8.0,<6
# XPath 2.0-3.1 support - 4.2.0 broke something?
elementpath==4.1.5

Loading…
Cancel
Save