Updating inscriptis library, removing fixes from 2.2

pull/2152/head
dgtlmoon 3 months ago
parent 3b16b19a94
commit c8310b7e93

@ -409,23 +409,6 @@ def has_ldjson_product_info(content):
x=bool(pricing_data)
return x
def workarounds_for_obfuscations(content):
"""
Some sites are using sneaky tactics to make prices and other information un-renderable by Inscriptis
This could go into its own Pip package in the future, for faster updates
"""
# HomeDepot.com style <span>$<!-- -->90<!-- -->.<!-- -->74</span>
# https://github.com/weblyzard/inscriptis/issues/45
if not content:
return content
content = re.sub('<!--\s+-->', '', content)
return content
def get_triggered_text(content, trigger_text):
triggered_text = []
result = strip_ignore_text(content=content,

@ -151,7 +151,6 @@ class perform_site_check(difference_detection_processor):
if is_html or watch.is_source_type_url:
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
self.fetcher.content = html_tools.workarounds_for_obfuscations(self.fetcher.content)
html_content = self.fetcher.content
# If not JSON, and if it's not text/plain..

@ -8,7 +8,7 @@ flask_expects_json~=1.7
flask_restful
flask_wtf~=1.2
flask~=2.3
inscriptis~=2.2
inscriptis~=2.4
pytz
timeago~=1.0
validators~=0.21

Loading…
Cancel
Save