From 3ae9bfa6f9c827acbdb324d72f7221dd8533498e Mon Sep 17 00:00:00 2001
From: dgtlmoon <dgtlmoon@gmail.com>
Date: Thu, 18 Apr 2024 11:53:45 +0200
Subject: [PATCH] Bug fix - further work on  lxml filter extract (#2313 #2312
 #2317)

---
 changedetectionio/html_tools.py | 6 ++++--
 requirements.txt                | 3 +--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/changedetectionio/html_tools.py b/changedetectionio/html_tools.py
index 2a29bb32..a03653b9 100644
--- a/changedetectionio/html_tools.py
+++ b/changedetectionio/html_tools.py
@@ -172,10 +172,12 @@ def xpath1_filter(xpath_filter, html_content, append_pretty_line_formatting=Fals
         if append_pretty_line_formatting and len(html_block) and (not hasattr(element, 'tag') or not element.tag in (['br', 'hr', 'div', 'p'])):
             html_block += TEXT_FILTER_LIST_LINE_SUFFIX
 
-        if isinstance(element, str):
+        # Some kind of text, UTF-8 or other
+        if isinstance(element, (str, bytes)):
             html_block += element
         else:
-            html_block += etree.tostring(element, pretty_print=True, encoding='utf-8')
+            # Return the HTML which will get parsed as text
+            html_block += etree.tostring(element, pretty_print=True).decode('utf-8')
 
     return html_block
 
diff --git a/requirements.txt b/requirements.txt
index a543d00e..76e88c8f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -52,8 +52,7 @@ cryptography~=3.4
 beautifulsoup4
 
 # XPath filtering, lxml is required by bs4 anyway, but put it here to be safe.
-# #2312 - In 5.1.1 _ElementStringResult was removed -  ImportError: cannot import name '_ElementStringResult' from 'lxml.etree'
-lxml
+lxml >=4.8.0,<6
 
 # XPath 2.0-3.1 support - 4.2.0 broke something?
 elementpath==4.1.5