diff --git a/changedetectionio/fetch_site_status.py b/changedetectionio/fetch_site_status.py index 0ace205d..48342d93 100644 --- a/changedetectionio/fetch_site_status.py +++ b/changedetectionio/fetch_site_status.py @@ -11,6 +11,7 @@ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) # Some common stuff here that can be moved to a base class +# (set_proxy_from_list) class perform_site_check(): def __init__(self, *args, datastore, **kwargs): @@ -45,6 +46,20 @@ class perform_site_check(): return proxy_args + # Doesn't look like python supports forward slash auto enclosure in re.findall + # So convert it to inline flag "foobar(?i)" type configuration + def forward_slash_enclosed_regex_to_options(self, regex): + res = re.search(r'^/(.*?)/(\w+)$', regex, re.IGNORECASE) + + if res: + regex = res.group(1) + regex += '(?{})'.format(res.group(2)) + else: + regex += '(?{})'.format('i') + + return regex + + def run(self, uuid): timestamp = int(time.time()) # used for storage etc too @@ -215,15 +230,17 @@ class perform_site_check(): if len(extract_text) > 0: regex_matched_output = [] for s_re in extract_text: - result = re.findall(s_re.encode('utf8'), stripped_text_from_html, flags=re.DOTALL) - if result: - for l in result: - if type(l) is tuple: - #@todo - some formatter option default (between groups) - regex_matched_output += list(l) + [b'\n'] - else: - # @todo - some formatter option default (between each ungrouped result) - regex_matched_output += [l] + [b'\n'] + # incase they specified something in '/.../x' + regex = self.forward_slash_enclosed_regex_to_options(s_re) + result = re.findall(regex.encode('utf-8'), stripped_text_from_html) + + for l in result: + if type(l) is tuple: + #@todo - some formatter option default (between groups) + regex_matched_output += list(l) + [b'\n'] + else: + # @todo - some formatter option default (between each ungrouped result) + regex_matched_output += [l] + [b'\n'] # Now we will only show what the regex matched stripped_text_from_html = b'' diff --git a/changedetectionio/templates/edit.html b/changedetectionio/templates/edit.html index c706b0b2..7b5d0c4a 100644 --- a/changedetectionio/templates/edit.html +++ b/changedetectionio/templates/edit.html @@ -239,8 +239,15 @@ Unavailable") }} {{ render_field(form.extract_text, rows=5, placeholder="\d+ online") }}