tests/test_xpath_selector_unit/test: Fix test and add more small tests for fragment

pull/2351/head
Constantin Hong 8 months ago
parent fbd55129ed
commit 20195e7a79

@ -205,22 +205,27 @@ def test_trips(html_content, xpath, answer):
DOM_violation_two_html_root_element = """<!DOCTYPE html>
<html>
<body>
<h1>Hello absurd world</h1>
<h1>Hello world</h1>
<p>First paragraph.</p>
</body>
</html>
<html>
<body>
<h1>Hello absurd world</h1>
<h1>Hello world</h1>
<p>Browsers parse this part by fixing it but lxml doesn't and returns two root element node</p>
<p>Therefore, if the path is /html/body/p[1], lxml(libxml2) returns two element nodes not one.</p>
</body>
</html>"""
@pytest.mark.parametrize("html_content", [DOM_violation_two_html_root_element])
@pytest.mark.parametrize("xpath, answer", [
("/html/body/p[1]", "First paragraph."),
("/html/body/p[1]", "Browsers parse this part by fixing it but lxml doesn't and returns two root element node"),
("//html/body/p[1]", "First paragraph."),
("//html/body/p[1]", "Browsers parse this part by fixing it but lxml doesn't and returns two root element node"),
("//body/p[1]", "First paragraph."),
("//body/p[1]", "Browsers parse this part by fixing it but lxml doesn't and returns two root element node"),
])
def test_trips(html_content, xpath, answer):
html_content = html_tools.xpath_filter(xpath, html_content, append_pretty_line_formatting=True)
assert type(html_content) == str
assert answer not in html_content
assert answer in html_content

Loading…
Cancel
Save