tests/test_xpath_selector_unit/test: Fix test and add more small tests for fragment

pull/2351/head
Constantin Hong 8 months ago
parent fbd55129ed
commit 20195e7a79

@ -205,22 +205,27 @@ def test_trips(html_content, xpath, answer):
DOM_violation_two_html_root_element = """<!DOCTYPE html> DOM_violation_two_html_root_element = """<!DOCTYPE html>
<html> <html>
<body> <body>
<h1>Hello absurd world</h1> <h1>Hello world</h1>
<p>First paragraph.</p> <p>First paragraph.</p>
</body> </body>
</html> </html>
<html> <html>
<body> <body>
<h1>Hello absurd world</h1> <h1>Hello world</h1>
<p>Browsers parse this part by fixing it but lxml doesn't and returns two root element node</p> <p>Browsers parse this part by fixing it but lxml doesn't and returns two root element node</p>
<p>Therefore, if the path is /html/body/p[1], lxml(libxml2) returns two element nodes not one.</p> <p>Therefore, if the path is /html/body/p[1], lxml(libxml2) returns two element nodes not one.</p>
</body> </body>
</html>""" </html>"""
@pytest.mark.parametrize("html_content", [DOM_violation_two_html_root_element]) @pytest.mark.parametrize("html_content", [DOM_violation_two_html_root_element])
@pytest.mark.parametrize("xpath, answer", [ @pytest.mark.parametrize("xpath, answer", [
("/html/body/p[1]", "First paragraph."),
("/html/body/p[1]", "Browsers parse this part by fixing it but lxml doesn't and returns two root element node"), ("/html/body/p[1]", "Browsers parse this part by fixing it but lxml doesn't and returns two root element node"),
("//html/body/p[1]", "First paragraph."),
("//html/body/p[1]", "Browsers parse this part by fixing it but lxml doesn't and returns two root element node"),
("//body/p[1]", "First paragraph."),
("//body/p[1]", "Browsers parse this part by fixing it but lxml doesn't and returns two root element node"),
]) ])
def test_trips(html_content, xpath, answer): def test_trips(html_content, xpath, answer):
html_content = html_tools.xpath_filter(xpath, html_content, append_pretty_line_formatting=True) html_content = html_tools.xpath_filter(xpath, html_content, append_pretty_line_formatting=True)
assert type(html_content) == str assert type(html_content) == str
assert answer not in html_content assert answer in html_content

Loading…
Cancel
Save