From 3ae9bfa6f9c827acbdb324d72f7221dd8533498e Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Thu, 18 Apr 2024 11:53:45 +0200 Subject: [PATCH] Bug fix - further work on lxml filter extract (#2313 #2312 #2317) --- changedetectionio/html_tools.py | 6 ++++-- requirements.txt | 3 +-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/changedetectionio/html_tools.py b/changedetectionio/html_tools.py index 2a29bb32..a03653b9 100644 --- a/changedetectionio/html_tools.py +++ b/changedetectionio/html_tools.py @@ -172,10 +172,12 @@ def xpath1_filter(xpath_filter, html_content, append_pretty_line_formatting=Fals if append_pretty_line_formatting and len(html_block) and (not hasattr(element, 'tag') or not element.tag in (['br', 'hr', 'div', 'p'])): html_block += TEXT_FILTER_LIST_LINE_SUFFIX - if isinstance(element, str): + # Some kind of text, UTF-8 or other + if isinstance(element, (str, bytes)): html_block += element else: - html_block += etree.tostring(element, pretty_print=True, encoding='utf-8') + # Return the HTML which will get parsed as text + html_block += etree.tostring(element, pretty_print=True).decode('utf-8') return html_block diff --git a/requirements.txt b/requirements.txt index a543d00e..76e88c8f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -52,8 +52,7 @@ cryptography~=3.4 beautifulsoup4 # XPath filtering, lxml is required by bs4 anyway, but put it here to be safe. -# #2312 - In 5.1.1 _ElementStringResult was removed - ImportError: cannot import name '_ElementStringResult' from 'lxml.etree' -lxml +lxml >=4.8.0,<6 # XPath 2.0-3.1 support - 4.2.0 broke something? elementpath==4.1.5