dgtlmoon 4 weeks ago committed by GitHub
commit 83af38a0ad
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -409,23 +409,6 @@ def has_ldjson_product_info(content):
x=bool(pricing_data)
return x
def workarounds_for_obfuscations(content):
"""
Some sites are using sneaky tactics to make prices and other information un-renderable by Inscriptis
This could go into its own Pip package in the future, for faster updates
"""
# HomeDepot.com style <span>$<!-- -->90<!-- -->.<!-- -->74</span>
# https://github.com/weblyzard/inscriptis/issues/45
if not content:
return content
content = re.sub('<!--\s+-->', '', content)
return content
def get_triggered_text(content, trigger_text):
triggered_text = []
result = strip_ignore_text(content=content,

@ -154,7 +154,6 @@ class perform_site_check(difference_detection_processor):
if is_html or watch.is_source_type_url:
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
self.fetcher.content = html_tools.workarounds_for_obfuscations(self.fetcher.content)
html_content = self.fetcher.content
# If not JSON, and if it's not text/plain..

@ -2,7 +2,7 @@
import time
from flask import url_for
from .util import live_server_setup
from .util import live_server_setup, wait_for_all_checks
def set_original_ignore_response():
@ -21,7 +21,7 @@ def set_original_ignore_response():
def test_obfuscations(client, live_server):
set_original_ignore_response()
live_server_setup(live_server)
time.sleep(1)
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
res = client.post(
@ -32,12 +32,12 @@ def test_obfuscations(client, live_server):
assert b"1 Imported" in res.data
# Give the thread time to pick it up
time.sleep(3)
wait_for_all_checks(client)
# Check HTML conversion detected and workd
res = client.get(
url_for("preview_page", uuid="first"),
follow_redirects=True
)
# whitespace appears but it renders https://github.com/weblyzard/inscriptis/issues/45#issuecomment-1923339265
assert b'$90.74' in res.data

@ -12,7 +12,7 @@ flask_restful
flask_cors # For the Chrome extension to operate
flask_wtf~=1.2
flask~=2.3
inscriptis~=2.2
inscriptis~=2.4
pytz
timeago~=1.0
validators~=0.21

Loading…
Cancel
Save