From 74707909f1a6c742b9fc4059cbce7b9f59b1d510 Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Wed, 17 Apr 2024 19:55:45 +0200 Subject: [PATCH] Bug fix for newer lxml module - module 'lxml.etree' has no attribute '_ElementStringResult' - reimplement _ElementStringResult (#2313 #2312) --- changedetectionio/html_tools.py | 10 ++- .../tests/test_xpath_selector.py | 65 ++++++++++++++++++- requirements.txt | 1 + 3 files changed, 69 insertions(+), 7 deletions(-) diff --git a/changedetectionio/html_tools.py b/changedetectionio/html_tools.py index 7c9844c8..2a29bb32 100644 --- a/changedetectionio/html_tools.py +++ b/changedetectionio/html_tools.py @@ -169,15 +169,13 @@ def xpath1_filter(xpath_filter, html_content, append_pretty_line_formatting=Fals # And where the matched result doesn't include something that will cause Inscriptis to add a newline # (This way each 'match' reliably has a new-line in the diff) # Divs are converted to 4 whitespaces by inscriptis - if append_pretty_line_formatting and len(html_block) and (not hasattr( element, 'tag' ) or not element.tag in (['br', 'hr', 'div', 'p'])): + if append_pretty_line_formatting and len(html_block) and (not hasattr(element, 'tag') or not element.tag in (['br', 'hr', 'div', 'p'])): html_block += TEXT_FILTER_LIST_LINE_SUFFIX - if type(element) == etree._ElementStringResult: - html_block += str(element) - elif type(element) == etree._ElementUnicodeResult: - html_block += str(element) + if isinstance(element, str): + html_block += element else: - html_block += etree.tostring(element, pretty_print=True).decode('utf-8') + html_block += etree.tostring(element, pretty_print=True, encoding='utf-8') return html_block diff --git a/changedetectionio/tests/test_xpath_selector.py b/changedetectionio/tests/test_xpath_selector.py index 836dd5b5..1a9c5afa 100644 --- a/changedetectionio/tests/test_xpath_selector.py +++ b/changedetectionio/tests/test_xpath_selector.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +# -*- coding: utf-8 -*- import time from flask import url_for @@ -255,6 +255,69 @@ def test_xpath23_prefix_validation(client, live_server): res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) assert b'Deleted' in res.data +def test_xpath1_lxml(client, live_server): + #live_server_setup(live_server) + + d = ''' + + + rpilocator.com + https://rpilocator.com + Find Raspberry Pi Computers in Stock + Thu, 19 May 2022 23:27:30 GMT + + https://rpilocator.com/favicon.png + rpilocator.com + https://rpilocator.com/ + 32 + 32 + + + Stock Alert (UK): RPi CM4 + something else unrelated + + + Stock Alert (UK): Big monitorěěěě + something else unrelated + + + '''.encode('utf-8') + + with open("test-datastore/endpoint-content.txt", "wb") as f: + f.write(d) + + + test_url = url_for('test_endpoint', _external=True) + res = client.post( + url_for("import_page"), + data={"urls": test_url}, + follow_redirects=True + ) + assert b"1 Imported" in res.data + wait_for_all_checks(client) + + res = client.post( + url_for("edit_page", uuid="first"), + data={"include_filters": "xpath1://title/text()", "url": test_url, "tags": "", "headers": "", + 'fetch_backend': "html_requests"}, + follow_redirects=True + ) + + ##### #2312 + wait_for_all_checks(client) + res = client.get(url_for("index")) + assert b'_ElementStringResult' not in res.data # tested with 5.1.1 when it was removed and 5.1.0 + assert b'Exception' not in res.data + res = client.get( + url_for("preview_page", uuid="first"), + follow_redirects=True + ) + + assert b"rpilocator.com" in res.data # in selector + assert "Stock Alert (UK): Big monitorěěěě".encode('utf-8') in res.data # not in selector + + ##### + def test_xpath1_validation(client, live_server): # Add our URL to the import page diff --git a/requirements.txt b/requirements.txt index 6e0f2a70..a543d00e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -52,6 +52,7 @@ cryptography~=3.4 beautifulsoup4 # XPath filtering, lxml is required by bs4 anyway, but put it here to be safe. +# #2312 - In 5.1.1 _ElementStringResult was removed - ImportError: cannot import name '_ElementStringResult' from 'lxml.etree' lxml # XPath 2.0-3.1 support - 4.2.0 broke something?