|
|
|
@ -205,22 +205,27 @@ def test_trips(html_content, xpath, answer):
|
|
|
|
|
DOM_violation_two_html_root_element = """<!DOCTYPE html>
|
|
|
|
|
<html>
|
|
|
|
|
<body>
|
|
|
|
|
<h1>Hello absurd world</h1>
|
|
|
|
|
<h1>Hello world</h1>
|
|
|
|
|
<p>First paragraph.</p>
|
|
|
|
|
</body>
|
|
|
|
|
</html>
|
|
|
|
|
<html>
|
|
|
|
|
<body>
|
|
|
|
|
<h1>Hello absurd world</h1>
|
|
|
|
|
<h1>Hello world</h1>
|
|
|
|
|
<p>Browsers parse this part by fixing it but lxml doesn't and returns two root element node</p>
|
|
|
|
|
<p>Therefore, if the path is /html/body/p[1], lxml(libxml2) returns two element nodes not one.</p>
|
|
|
|
|
</body>
|
|
|
|
|
</html>"""
|
|
|
|
|
@pytest.mark.parametrize("html_content", [DOM_violation_two_html_root_element])
|
|
|
|
|
@pytest.mark.parametrize("xpath, answer", [
|
|
|
|
|
("/html/body/p[1]", "First paragraph."),
|
|
|
|
|
("/html/body/p[1]", "Browsers parse this part by fixing it but lxml doesn't and returns two root element node"),
|
|
|
|
|
("//html/body/p[1]", "First paragraph."),
|
|
|
|
|
("//html/body/p[1]", "Browsers parse this part by fixing it but lxml doesn't and returns two root element node"),
|
|
|
|
|
("//body/p[1]", "First paragraph."),
|
|
|
|
|
("//body/p[1]", "Browsers parse this part by fixing it but lxml doesn't and returns two root element node"),
|
|
|
|
|
])
|
|
|
|
|
def test_trips(html_content, xpath, answer):
|
|
|
|
|
html_content = html_tools.xpath_filter(xpath, html_content, append_pretty_line_formatting=True)
|
|
|
|
|
assert type(html_content) == str
|
|
|
|
|
assert answer not in html_content
|
|
|
|
|
assert answer in html_content
|
|
|
|
|