Auto extract html title as title (#102)
* Auto extract <title> as watch title, Minor refactor for html toolingpull/107/head
parent
9af1ea9fc0
commit
25185e6d00
@ -0,0 +1,23 @@
|
|||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
|
||||||
|
# Given a CSS Rule, and a blob of HTML, return the blob of HTML that matches
|
||||||
|
def css_filter(css_filter, html_content):
|
||||||
|
soup = BeautifulSoup(html_content, "html.parser")
|
||||||
|
html_block = ""
|
||||||
|
for item in soup.select(css_filter, separator=""):
|
||||||
|
html_block += str(item)
|
||||||
|
|
||||||
|
return html_block + "\n"
|
||||||
|
|
||||||
|
|
||||||
|
# Extract/find element
|
||||||
|
def extract_element(find='title', html_content=''):
|
||||||
|
html_title = False
|
||||||
|
|
||||||
|
soup = BeautifulSoup(html_content, 'html.parser')
|
||||||
|
title = soup.find(find)
|
||||||
|
if title and title.string is not None:
|
||||||
|
html_title = title.string.strip()
|
||||||
|
|
||||||
|
return html_title
|
Loading…
Reference in new issue