|
|
|
@ -11,6 +11,7 @@ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Some common stuff here that can be moved to a base class
|
|
|
|
|
# (set_proxy_from_list)
|
|
|
|
|
class perform_site_check():
|
|
|
|
|
|
|
|
|
|
def __init__(self, *args, datastore, **kwargs):
|
|
|
|
@ -45,6 +46,20 @@ class perform_site_check():
|
|
|
|
|
|
|
|
|
|
return proxy_args
|
|
|
|
|
|
|
|
|
|
# Doesn't look like python supports forward slash auto enclosure in re.findall
|
|
|
|
|
# So convert it to inline flag "foobar(?i)" type configuration
|
|
|
|
|
def forward_slash_enclosed_regex_to_options(self, regex):
|
|
|
|
|
res = re.search(r'^/(.*?)/(\w+)$', regex, re.IGNORECASE)
|
|
|
|
|
|
|
|
|
|
if res:
|
|
|
|
|
regex = res.group(1)
|
|
|
|
|
regex += '(?{})'.format(res.group(2))
|
|
|
|
|
else:
|
|
|
|
|
regex += '(?{})'.format('i')
|
|
|
|
|
|
|
|
|
|
return regex
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def run(self, uuid):
|
|
|
|
|
timestamp = int(time.time()) # used for storage etc too
|
|
|
|
|
|
|
|
|
@ -215,15 +230,17 @@ class perform_site_check():
|
|
|
|
|
if len(extract_text) > 0:
|
|
|
|
|
regex_matched_output = []
|
|
|
|
|
for s_re in extract_text:
|
|
|
|
|
result = re.findall(s_re.encode('utf8'), stripped_text_from_html, flags=re.DOTALL)
|
|
|
|
|
if result:
|
|
|
|
|
for l in result:
|
|
|
|
|
if type(l) is tuple:
|
|
|
|
|
#@todo - some formatter option default (between groups)
|
|
|
|
|
regex_matched_output += list(l) + [b'\n']
|
|
|
|
|
else:
|
|
|
|
|
# @todo - some formatter option default (between each ungrouped result)
|
|
|
|
|
regex_matched_output += [l] + [b'\n']
|
|
|
|
|
# incase they specified something in '/.../x'
|
|
|
|
|
regex = self.forward_slash_enclosed_regex_to_options(s_re)
|
|
|
|
|
result = re.findall(regex.encode('utf-8'), stripped_text_from_html)
|
|
|
|
|
|
|
|
|
|
for l in result:
|
|
|
|
|
if type(l) is tuple:
|
|
|
|
|
#@todo - some formatter option default (between groups)
|
|
|
|
|
regex_matched_output += list(l) + [b'\n']
|
|
|
|
|
else:
|
|
|
|
|
# @todo - some formatter option default (between each ungrouped result)
|
|
|
|
|
regex_matched_output += [l] + [b'\n']
|
|
|
|
|
|
|
|
|
|
# Now we will only show what the regex matched
|
|
|
|
|
stripped_text_from_html = b''
|
|
|
|
|