From 4d266cac9f33d62ae1c662a3128d043d9a0579fd Mon Sep 17 00:00:00 2001 From: Constantin Hong Date: Fri, 10 May 2024 00:08:49 +0900 Subject: [PATCH] tests/test_xpath_selector_unit/feat: Do forest_transplanting by default --- changedetectionio/html_tools.py | 22 +++++-------------- .../tests/test_xpath_selector_unit.py | 3 +++ 2 files changed, 8 insertions(+), 17 deletions(-) diff --git a/changedetectionio/html_tools.py b/changedetectionio/html_tools.py index 276a6219..8a7bbd92 100644 --- a/changedetectionio/html_tools.py +++ b/changedetectionio/html_tools.py @@ -123,24 +123,12 @@ def forest_transplanting(root): root_siblings_preceding = [ s for s in root.itersiblings(preceding=True)] root_siblings = [s for s in root.itersiblings()] - Is_fragment=False - # If element node exsits in root element node's sibilings, it is fragment. - for node in chain(root_siblings_preceding, root_siblings): - if not hasattr(node.tag, '__name__'): - Is_fragment=True - # early exit. because the root is already root element. - # So, two root element nodes are detected. DOM violation. - break - - if Is_fragment: - new_root = etree.Element("new_root") - root_siblings_preceding.reverse() - for node in chain(root_siblings_preceding, [root], root_siblings): - new_root.append(node) - return new_root, True - - return root, False + new_root = etree.Element("new_root") + root_siblings_preceding.reverse() + for node in chain(root_siblings_preceding, [root], root_siblings): + new_root.append(node) + return new_root, True # Return str Utf-8 of matched rules def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False, is_rss=False): diff --git a/changedetectionio/tests/test_xpath_selector_unit.py b/changedetectionio/tests/test_xpath_selector_unit.py index 3f2b86d2..047191ab 100644 --- a/changedetectionio/tests/test_xpath_selector_unit.py +++ b/changedetectionio/tests/test_xpath_selector_unit.py @@ -218,6 +218,9 @@ DOM_violation_two_html_root_element = """ """ @pytest.mark.parametrize("html_content", [DOM_violation_two_html_root_element]) @pytest.mark.parametrize("xpath, answer", [ + (".", "First paragraph."), + ("/*", "First paragraph."), + ("/html", "First paragraph."), ("/html/body/p[1]", "First paragraph."), ("/html/body/p[1]", "Browsers parse this part by fixing it but lxml doesn't and returns two root element node"), ("count(/html/body/p[1])", "2"),