|
|
@ -191,42 +191,50 @@ def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None
|
|
|
|
#
|
|
|
|
#
|
|
|
|
# wordlist - list of regex's (str) or words (str)
|
|
|
|
# wordlist - list of regex's (str) or words (str)
|
|
|
|
def strip_ignore_text(content, wordlist, mode="content"):
|
|
|
|
def strip_ignore_text(content, wordlist, mode="content"):
|
|
|
|
ignore = []
|
|
|
|
i = 0
|
|
|
|
|
|
|
|
output = []
|
|
|
|
|
|
|
|
ignore_text = []
|
|
|
|
ignore_regex = []
|
|
|
|
ignore_regex = []
|
|
|
|
|
|
|
|
|
|
|
|
# @todo check this runs case insensitive
|
|
|
|
ignored_line_numbers = []
|
|
|
|
for k in wordlist:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for k in wordlist:
|
|
|
|
# Is it a regex?
|
|
|
|
# Is it a regex?
|
|
|
|
if k[0] == '/':
|
|
|
|
x = re.search('^\/(.*)\/(.*)', k.strip())
|
|
|
|
ignore_regex.append(k.strip(" /"))
|
|
|
|
if x:
|
|
|
|
|
|
|
|
# Starts with / but doesn't look like a regex
|
|
|
|
|
|
|
|
p = x.group(1)
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
|
|
# @Todo python regex options can go before the regex str, but not really many of the options apply on a per-line basis
|
|
|
|
|
|
|
|
ignore_regex.append(re.compile(rf"{p}", re.IGNORECASE))
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
|
|
# Badly formed regex, treat as text
|
|
|
|
|
|
|
|
ignore_text.append(k.strip())
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
ignore.append(k)
|
|
|
|
# Had a / but doesn't work as regex
|
|
|
|
|
|
|
|
ignore_text.append(k.strip())
|
|
|
|
|
|
|
|
|
|
|
|
i = 0
|
|
|
|
|
|
|
|
output = []
|
|
|
|
|
|
|
|
ignored_line_numbers = []
|
|
|
|
|
|
|
|
for line in content.splitlines():
|
|
|
|
for line in content.splitlines():
|
|
|
|
i += 1
|
|
|
|
i += 1
|
|
|
|
# Always ignore blank lines in this mode. (when this function gets called)
|
|
|
|
# Always ignore blank lines in this mode. (when this function gets called)
|
|
|
|
|
|
|
|
got_match = False
|
|
|
|
if len(line.strip()):
|
|
|
|
if len(line.strip()):
|
|
|
|
regex_matches = False
|
|
|
|
for l in ignore_text:
|
|
|
|
|
|
|
|
if l.lower() in line.lower():
|
|
|
|
|
|
|
|
got_match = True
|
|
|
|
|
|
|
|
|
|
|
|
# if any of these match, skip
|
|
|
|
if not got_match:
|
|
|
|
for regex in ignore_regex:
|
|
|
|
for r in ignore_regex:
|
|
|
|
try:
|
|
|
|
if r.search(line):
|
|
|
|
if re.search(regex, line, re.IGNORECASE):
|
|
|
|
got_match = True
|
|
|
|
regex_matches = True
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if not regex_matches and not any(skip_text.lower() in line.lower() for skip_text in ignore):
|
|
|
|
if not got_match:
|
|
|
|
|
|
|
|
# Not ignored
|
|
|
|
output.append(line.encode('utf8'))
|
|
|
|
output.append(line.encode('utf8'))
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
ignored_line_numbers.append(i)
|
|
|
|
ignored_line_numbers.append(i)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Used for finding out what to highlight
|
|
|
|
# Used for finding out what to highlight
|
|
|
|
if mode == "line numbers":
|
|
|
|
if mode == "line numbers":
|
|
|
|
return ignored_line_numbers
|
|
|
|
return ignored_line_numbers
|
|
|
|