|
|
|
@ -409,23 +409,6 @@ def has_ldjson_product_info(content):
|
|
|
|
|
x=bool(pricing_data)
|
|
|
|
|
return x
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def workarounds_for_obfuscations(content):
|
|
|
|
|
"""
|
|
|
|
|
Some sites are using sneaky tactics to make prices and other information un-renderable by Inscriptis
|
|
|
|
|
This could go into its own Pip package in the future, for faster updates
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
# HomeDepot.com style <span>$<!-- -->90<!-- -->.<!-- -->74</span>
|
|
|
|
|
# https://github.com/weblyzard/inscriptis/issues/45
|
|
|
|
|
if not content:
|
|
|
|
|
return content
|
|
|
|
|
|
|
|
|
|
content = re.sub('<!--\s+-->', '', content)
|
|
|
|
|
|
|
|
|
|
return content
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_triggered_text(content, trigger_text):
|
|
|
|
|
triggered_text = []
|
|
|
|
|
result = strip_ignore_text(content=content,
|
|
|
|
|