refactor-filters
dgtlmoon 3 months ago
parent 5ed7f43f6e
commit 5d1f317e30

@ -266,30 +266,30 @@ class perform_site_check(difference_detection_processor):
# incase they specified something in '/.../x' # incase they specified something in '/.../x'
if re.search(PERL_STYLE_REGEX, s_re, re.IGNORECASE): if re.search(PERL_STYLE_REGEX, s_re, re.IGNORECASE):
regex = html_tools.perl_style_slash_enclosed_regex_to_options(s_re) regex = html_tools.perl_style_slash_enclosed_regex_to_options(s_re)
result = re.findall(regex.encode('utf-8'), stripped_text_from_html) result = re.findall(regex, stripped_text_from_html)
for l in result: for l in result:
if type(l) is tuple: if type(l) is tuple:
# @todo - some formatter option default (between groups) # @todo - some formatter option default (between groups)
regex_matched_output += list(l) + [b'\n'] regex_matched_output += list(l) + ['\n']
else: else:
# @todo - some formatter option default (between each ungrouped result) # @todo - some formatter option default (between each ungrouped result)
regex_matched_output += [l] + [b'\n'] regex_matched_output += [l] + ['\n']
else: else:
# Doesnt look like regex, just hunt for plaintext and return that which matches # Doesnt look like regex, just hunt for plaintext and return that which matches
# `stripped_text_from_html` will be bytes, so we must encode s_re also to bytes # `stripped_text_from_html` will be bytes, so we must encode s_re also to bytes
r = re.compile(re.escape(s_re.encode('utf-8')), re.IGNORECASE) r = re.compile(re.escape(s_re), re.IGNORECASE)
res = r.findall(stripped_text_from_html) res = r.findall(stripped_text_from_html)
if res: if res:
for match in res: for match in res:
regex_matched_output += [match] + [b'\n'] regex_matched_output += [match] + ['\n']
########################################################## ##########################################################
stripped_text_from_html = b'' stripped_text_from_html = ''
text_content_before_ignored_filter = b''
if regex_matched_output: if regex_matched_output:
# @todo some formatter for presentation? # @todo some formatter for presentation?
stripped_text_from_html = b''.join(regex_matched_output) stripped_text_from_html = ''.join(regex_matched_output)
if watch.get('remove_duplicate_lines'): if watch.get('remove_duplicate_lines'):
stripped_text_from_html = '\n'.join(dict.fromkeys(line for line in stripped_text_from_html.replace("\n\n", "\n").splitlines())) stripped_text_from_html = '\n'.join(dict.fromkeys(line for line in stripped_text_from_html.replace("\n\n", "\n").splitlines()))
@ -302,7 +302,7 @@ class perform_site_check(difference_detection_processor):
stripped_text_from_html = '\n'.join(sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower())) stripped_text_from_html = '\n'.join(sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower()))
# Re #133 - if we should strip whitespaces from triggering the change detected comparison # Re #133 - if we should strip whitespaces from triggering the change detected comparison
if self.datastore.data['settings']['application'].get('ignore_whitespace', False): if stripped_text_from_html and self.datastore.data['settings']['application'].get('ignore_whitespace', False):
fetched_md5 = hashlib.md5(stripped_text_from_html.translate(b'\r\n\t ').encode('utf-8')).hexdigest() fetched_md5 = hashlib.md5(stripped_text_from_html.translate(b'\r\n\t ').encode('utf-8')).hexdigest()
else: else:
fetched_md5 = hashlib.md5(stripped_text_from_html.encode('utf-8')).hexdigest() fetched_md5 = hashlib.md5(stripped_text_from_html.encode('utf-8')).hexdigest()

@ -71,7 +71,7 @@ def test_setup(client, live_server, measure_memory_usage):
live_server_setup(live_server) live_server_setup(live_server)
def test_check_filter_multiline(client, live_server, measure_memory_usage): def test_check_filter_multiline(client, live_server, measure_memory_usage):
#live_server_setup(live_server) live_server_setup(live_server)
set_multiline_response() set_multiline_response()
# Add our URL to the import page # Add our URL to the import page

Loading…
Cancel
Save