tests/test_xpath_selector_unit/feat: Do forest_transplanting by default

8 months ago · 4d266cac9f
parent 66a7dae381
commit 4d266cac9f
2 changed files with 8 additions and 17 deletions
--- a/changedetectionio/html_tools.py
+++ b/changedetectionio/html_tools.py
@ -123,24 +123,12 @@ def forest_transplanting(root):
    root_siblings_preceding = [ s for s in root.itersiblings(preceding=True)]
    root_siblings = [s for s in root.itersiblings()]
-    Is_fragment=False
+    new_root = etree.Element("new_root")
    # If element node exsits in root element node's sibilings, it is fragment.
    for node in chain(root_siblings_preceding, root_siblings):
        if not hasattr(node.tag, '__name__'):
            Is_fragment=True
            # early exit. because the root is already root element.
            # So, two root element nodes are detected. DOM violation.
            break
    if Is_fragment:
        new_root = etree.Element("new_root")
        root_siblings_preceding.reverse()
        for node in chain(root_siblings_preceding, [root], root_siblings):
            new_root.append(node)
        return new_root, True
    return root, False
    root_siblings_preceding.reverse()
    for node in chain(root_siblings_preceding, [root], root_siblings):
        new_root.append(node)
    return new_root, True
 # Return str Utf-8 of matched rules
 def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False, is_rss=False):
--- a/changedetectionio/tests/test_xpath_selector_unit.py
+++ b/changedetectionio/tests/test_xpath_selector_unit.py
@ -218,6 +218,9 @@ DOM_violation_two_html_root_element = """<!DOCTYPE html>
 </html>"""
@pytest.mark.parametrize("html_content", [DOM_violation_two_html_root_element])
@pytest.mark.parametrize("xpath, answer", [
    (".", "First paragraph."),
    ("/*", "First paragraph."),
    ("/html", "First paragraph."),
    ("/html/body/p[1]", "First paragraph."),
    ("/html/body/p[1]", "Browsers parse this part by fixing it but lxml doesn't and returns two root element node"),
    ("count(/html/body/p[1])", "2"),