|
|
@ -123,25 +123,13 @@ def forest_transplanting(root):
|
|
|
|
root_siblings_preceding = [ s for s in root.itersiblings(preceding=True)]
|
|
|
|
root_siblings_preceding = [ s for s in root.itersiblings(preceding=True)]
|
|
|
|
root_siblings = [s for s in root.itersiblings()]
|
|
|
|
root_siblings = [s for s in root.itersiblings()]
|
|
|
|
|
|
|
|
|
|
|
|
Is_fragment=False
|
|
|
|
|
|
|
|
# If element node exsits in root element node's sibilings, it is fragment.
|
|
|
|
|
|
|
|
for node in chain(root_siblings_preceding, root_siblings):
|
|
|
|
|
|
|
|
if not hasattr(node.tag, '__name__'):
|
|
|
|
|
|
|
|
Is_fragment=True
|
|
|
|
|
|
|
|
# early exit. because the root is already root element.
|
|
|
|
|
|
|
|
# So, two root element nodes are detected. DOM violation.
|
|
|
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if Is_fragment:
|
|
|
|
|
|
|
|
new_root = etree.Element("new_root")
|
|
|
|
new_root = etree.Element("new_root")
|
|
|
|
|
|
|
|
|
|
|
|
root_siblings_preceding.reverse()
|
|
|
|
root_siblings_preceding.reverse()
|
|
|
|
for node in chain(root_siblings_preceding, [root], root_siblings):
|
|
|
|
for node in chain(root_siblings_preceding, [root], root_siblings):
|
|
|
|
new_root.append(node)
|
|
|
|
new_root.append(node)
|
|
|
|
return new_root, True
|
|
|
|
return new_root, True
|
|
|
|
|
|
|
|
|
|
|
|
return root, False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Return str Utf-8 of matched rules
|
|
|
|
# Return str Utf-8 of matched rules
|
|
|
|
def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False, is_rss=False):
|
|
|
|
def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False, is_rss=False):
|
|
|
|
from lxml import etree, html
|
|
|
|
from lxml import etree, html
|
|
|
|