Re #340 - snapshot should not be modified by ignore text (#344)

pull/350/head
dgtlmoon 3 years ago committed by GitHub
parent b5c1fce136
commit fbd9ecab62
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -122,6 +122,9 @@ class perform_site_check():
# get_text() via inscriptis # get_text() via inscriptis
stripped_text_from_html = get_text(html_content) stripped_text_from_html = get_text(html_content)
# Re #340 - return the content before the 'ignore text' was applied
text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8')
# We rely on the actual text in the html output.. many sites have random script vars etc, # We rely on the actual text in the html output.. many sites have random script vars etc,
# in the future we'll implement other mechanisms. # in the future we'll implement other mechanisms.
@ -181,4 +184,4 @@ class perform_site_check():
update_obj['title'] = html_tools.extract_element(find='title', html_content=fetcher.content) update_obj['title'] = html_tools.extract_element(find='title', html_content=fetcher.content)
return changed_detected, update_obj, stripped_text_from_html return changed_detected, update_obj, text_content_before_ignored_filter

Loading…
Cancel
Save