Bug fix - further work on lxml filter extract (#2313 #2312 #2317)

8 months ago · 3ae9bfa6f9
parent 6f3c3b7dfb
commit 3ae9bfa6f9
2 changed files with 5 additions and 4 deletions
--- a/changedetectionio/html_tools.py
+++ b/changedetectionio/html_tools.py
@ -172,10 +172,12 @@ def xpath1_filter(xpath_filter, html_content, append_pretty_line_formatting=Fals
        if append_pretty_line_formatting and len(html_block) and (not hasattr(element, 'tag') or not element.tag in (['br', 'hr', 'div', 'p'])):
            html_block += TEXT_FILTER_LIST_LINE_SUFFIX

-        if isinstance(element, str):
+        # Some kind of text, UTF-8 or other
+        if isinstance(element, (str, bytes)):
            html_block += element
        else:
-            html_block += etree.tostring(element, pretty_print=True, encoding='utf-8')
+            # Return the HTML which will get parsed as text
+            html_block += etree.tostring(element, pretty_print=True).decode('utf-8')

    return html_block

--- a/requirements.txt
+++ b/requirements.txt
@ -52,8 +52,7 @@ cryptography~=3.4
 beautifulsoup4

 # XPath filtering, lxml is required by bs4 anyway, but put it here to be safe.
-# #2312 - In 5.1.1 _ElementStringResult was removed -  ImportError: cannot import name '_ElementStringResult' from 'lxml.etree'
-lxml
+lxml >=4.8.0,<6

 # XPath 2.0-3.1 support - 4.2.0 broke something?
 elementpath==4.1.5