Ability to visualise trigger and filter rules against the current snapshot on the preview page

pull/435/head
dgtlmoon 3 years ago committed by GitHub
parent dd384619e0
commit 014fda9058
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -35,6 +35,7 @@ from flask import (
url_for, url_for,
) )
from flask_login import login_required from flask_login import login_required
from changedetectionio import html_tools
__version__ = '0.39.8' __version__ = '0.39.8'
@ -441,7 +442,7 @@ def changedetection_app(config=None, datastore_o=None):
raw_content = file.read() raw_content = file.read()
handler = fetch_site_status.perform_site_check(datastore=datastore) handler = fetch_site_status.perform_site_check(datastore=datastore)
stripped_content = handler.strip_ignore_text(raw_content, stripped_content = html_tools.strip_ignore_text(raw_content,
datastore.data['watching'][uuid]['ignore_text']) datastore.data['watching'][uuid]['ignore_text'])
if datastore.data['settings']['application'].get('ignore_whitespace', False): if datastore.data['settings']['application'].get('ignore_whitespace', False):
@ -546,8 +547,12 @@ def changedetection_app(config=None, datastore_o=None):
flash('No notification URLs set, cannot send test.', 'error') flash('No notification URLs set, cannot send test.', 'error')
# Diff page [edit] link should go back to diff page # Diff page [edit] link should go back to diff page
if request.args.get("next") and request.args.get("next") == 'diff': if request.args.get("next") and request.args.get("next") == 'diff' and not form.save_and_preview_button.data:
return redirect(url_for('diff_history_page', uuid=uuid)) return redirect(url_for('diff_history_page', uuid=uuid))
else:
if form.save_and_preview_button.data:
flash('You may need to reload this page to see the new content.')
return redirect(url_for('preview_page', uuid=uuid))
else: else:
return redirect(url_for('index')) return redirect(url_for('index'))
@ -721,8 +726,12 @@ def changedetection_app(config=None, datastore_o=None):
# Save the current newest history as the most recently viewed # Save the current newest history as the most recently viewed
datastore.set_last_viewed(uuid, dates[0]) datastore.set_last_viewed(uuid, dates[0])
newest_file = watch['history'][dates[0]] newest_file = watch['history'][dates[0]]
try:
with open(newest_file, 'r') as f: with open(newest_file, 'r') as f:
newest_version_file_contents = f.read() newest_version_file_contents = f.read()
except Exception as e:
newest_version_file_contents = "Unable to read {}.\n".format(newest_file)
previous_version = request.args.get('previous_version') previous_version = request.args.get('previous_version')
try: try:
@ -731,8 +740,11 @@ def changedetection_app(config=None, datastore_o=None):
# Not present, use a default value, the second one in the sorted list. # Not present, use a default value, the second one in the sorted list.
previous_file = watch['history'][dates[1]] previous_file = watch['history'][dates[1]]
try:
with open(previous_file, 'r') as f: with open(previous_file, 'r') as f:
previous_version_file_contents = f.read() previous_version_file_contents = f.read()
except Exception as e:
previous_version_file_contents = "Unable to read {}.\n".format(previous_file)
output = render_template("diff.html", watch_a=watch, output = render_template("diff.html", watch_a=watch,
newest=newest_version_file_contents, newest=newest_version_file_contents,
@ -751,6 +763,7 @@ def changedetection_app(config=None, datastore_o=None):
@app.route("/preview/<string:uuid>", methods=['GET']) @app.route("/preview/<string:uuid>", methods=['GET'])
@login_required @login_required
def preview_page(uuid): def preview_page(uuid):
content = []
# More for testing, possible to return the first/only # More for testing, possible to return the first/only
if uuid == 'first': if uuid == 'first':
@ -764,14 +777,38 @@ def changedetection_app(config=None, datastore_o=None):
flash("No history found for the specified link, bad link?", "error") flash("No history found for the specified link, bad link?", "error")
return redirect(url_for('index')) return redirect(url_for('index'))
newest = list(watch['history'].keys())[-1] if len(watch['history']):
with open(watch['history'][newest], 'r') as f: timestamps = sorted(watch['history'].keys(), key=lambda x: int(x))
filename = watch['history'][timestamps[-1]]
try:
with open(filename, 'r') as f:
content = f.readlines() content = f.readlines()
except:
content.append("File doesnt exist or unable to read file {}".format(filename))
else:
content.append("No history found")
# Get what needs to be highlighted
ignore_rules = watch.get('ignore_text', []) + datastore.data['settings']['application']['global_ignore_text']
# .readlines will keep the \n, but we will parse it here again, in the future tidy this up
ignored_line_numbers = html_tools.strip_ignore_text(content="".join(content),
wordlist=ignore_rules,
mode='line numbers'
)
trigger_line_numbers = html_tools.strip_ignore_text(content="".join(content),
wordlist=watch['trigger_text'],
mode='line numbers'
)
output = render_template("preview.html", output = render_template("preview.html",
content=content, content=content,
extra_stylesheets=extra_stylesheets, extra_stylesheets=extra_stylesheets,
ignored_line_numbers=ignored_line_numbers,
triggered_line_numbers=trigger_line_numbers,
current_diff_url=watch['url'], current_diff_url=watch['url'],
watch=watch,
uuid=uuid) uuid=uuid)
return output return output

@ -1,5 +1,6 @@
import time import time
from changedetectionio import content_fetcher from changedetectionio import content_fetcher
from changedetectionio import html_tools
import hashlib import hashlib
from inscriptis import get_text from inscriptis import get_text
import urllib3 import urllib3
@ -16,40 +17,6 @@ class perform_site_check():
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
self.datastore = datastore self.datastore = datastore
def strip_ignore_text(self, content, list_ignore_text):
import re
ignore = []
ignore_regex = []
for k in list_ignore_text:
# Is it a regex?
if k[0] == '/':
ignore_regex.append(k.strip(" /"))
else:
ignore.append(k)
output = []
for line in content.splitlines():
# Always ignore blank lines in this mode. (when this function gets called)
if len(line.strip()):
regex_matches = False
# if any of these match, skip
for regex in ignore_regex:
try:
if re.search(regex, line, re.IGNORECASE):
regex_matches = True
except Exception as e:
continue
if not regex_matches and not any(skip_text in line for skip_text in ignore):
output.append(line.encode('utf8'))
return "\n".encode('utf8').join(output)
def run(self, uuid): def run(self, uuid):
timestamp = int(time.time()) # used for storage etc too timestamp = int(time.time()) # used for storage etc too
@ -147,7 +114,7 @@ class perform_site_check():
# @todo we could abstract out the get_text() to handle this cleaner # @todo we could abstract out the get_text() to handle this cleaner
text_to_ignore = watch.get('ignore_text', []) + self.datastore.data['settings']['application'].get('global_ignore_text', []) text_to_ignore = watch.get('ignore_text', []) + self.datastore.data['settings']['application'].get('global_ignore_text', [])
if len(text_to_ignore): if len(text_to_ignore):
stripped_text_from_html = self.strip_ignore_text(stripped_text_from_html, text_to_ignore) stripped_text_from_html = html_tools.strip_ignore_text(stripped_text_from_html, text_to_ignore)
else: else:
stripped_text_from_html = stripped_text_from_html.encode('utf8') stripped_text_from_html = stripped_text_from_html.encode('utf8')
@ -165,22 +132,14 @@ class perform_site_check():
blocked_by_not_found_trigger_text = False blocked_by_not_found_trigger_text = False
if len(watch['trigger_text']): if len(watch['trigger_text']):
# Yeah, lets block first until something matches
blocked_by_not_found_trigger_text = True blocked_by_not_found_trigger_text = True
for line in watch['trigger_text']: # Filter and trigger works the same, so reuse it
# Because JSON wont serialize a re.compile object result = html_tools.strip_ignore_text(content=str(stripped_text_from_html),
if line[0] == '/' and line[-1] == '/': wordlist=watch['trigger_text'],
regex = re.compile(line.strip('/'), re.IGNORECASE) mode="line numbers")
# Found it? so we don't wait for it anymore if result:
r = re.search(regex, str(stripped_text_from_html))
if r:
blocked_by_not_found_trigger_text = False blocked_by_not_found_trigger_text = False
break
elif line.lower() in str(stripped_text_from_html).lower():
# We found it don't wait for it.
blocked_by_not_found_trigger_text = False
break
if not blocked_by_not_found_trigger_text and watch['previous_md5'] != fetched_md5: if not blocked_by_not_found_trigger_text and watch['previous_md5'] != fetched_md5:

@ -1,6 +1,7 @@
from wtforms import Form, SelectField, RadioField, BooleanField, StringField, PasswordField, validators, IntegerField, fields, TextAreaField, \ from wtforms import Form, SelectField, RadioField, BooleanField, StringField, PasswordField, validators, IntegerField, fields, TextAreaField, \
Field Field
from wtforms import widgets
from wtforms import widgets, SubmitField
from wtforms.validators import ValidationError from wtforms.validators import ValidationError
from wtforms.fields import html5 from wtforms.fields import html5
from changedetectionio import content_fetcher from changedetectionio import content_fetcher
@ -290,6 +291,9 @@ class watchForm(commonSettingsForm):
method = SelectField('Request Method', choices=valid_method, default=default_method) method = SelectField('Request Method', choices=valid_method, default=default_method)
trigger_text = StringListField('Trigger/wait for text', [validators.Optional(), ValidateListRegex()]) trigger_text = StringListField('Trigger/wait for text', [validators.Optional(), ValidateListRegex()])
save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
save_and_preview_button = SubmitField('Save & Preview', render_kw={"class": "pure-button pure-button-primary"})
def validate(self, **kwargs): def validate(self, **kwargs):
if not super().validate(): if not super().validate():
return False return False

@ -1,7 +1,7 @@
import json import json
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from jsonpath_ng.ext import parse from jsonpath_ng.ext import parse
import re
class JSONNotFound(ValueError): class JSONNotFound(ValueError):
def __init__(self, msg): def __init__(self, msg):
@ -105,3 +105,50 @@ def extract_json_as_string(content, jsonpath_filter):
return '' return ''
return stripped_text_from_html return stripped_text_from_html
# Mode - "content" return the content without the matches (default)
# - "line numbers" return a list of line numbers that match (int list)
#
# wordlist - list of regex's (str) or words (str)
def strip_ignore_text(content, wordlist, mode="content"):
ignore = []
ignore_regex = []
# @todo check this runs case insensitive
for k in wordlist:
# Is it a regex?
if k[0] == '/':
ignore_regex.append(k.strip(" /"))
else:
ignore.append(k)
i = 0
output = []
ignored_line_numbers = []
for line in content.splitlines():
i += 1
# Always ignore blank lines in this mode. (when this function gets called)
if len(line.strip()):
regex_matches = False
# if any of these match, skip
for regex in ignore_regex:
try:
if re.search(regex, line, re.IGNORECASE):
regex_matches = True
except Exception as e:
continue
if not regex_matches and not any(skip_text in line for skip_text in ignore):
output.append(line.encode('utf8'))
else:
ignored_line_numbers.append(i)
# Used for finding out what to highlight
if mode == "line numbers":
return ignored_line_numbers
return "\n".encode('utf8').join(output)

@ -54,3 +54,19 @@ ins {
body { body {
height: 99%; height: 99%;
/* Hide scroll bar in Firefox */ } } /* Hide scroll bar in Firefox */ } }
td#diff-col div {
text-align: justify;
white-space: pre-wrap; }
.ignored {
background-color: #ccc;
/* border: #0d91fa 1px solid; */
opacity: 0.7; }
.triggered {
background-color: #1b98f8; }
/* ignored and triggered? make it obvious error */
.ignored.triggered {
background-color: #ff0000; }

@ -66,3 +66,23 @@ ins {
height: 99%; /* Hide scroll bar in Firefox */ height: 99%; /* Hide scroll bar in Firefox */
} }
} }
td#diff-col div {
text-align: justify;
white-space: pre-wrap;
}
.ignored {
background-color: #ccc;
/* border: #0d91fa 1px solid; */
opacity: 0.7;
}
.triggered {
background-color: #1b98f8;
}
/* ignored and triggered? make it obvious error */
.ignored.triggered {
background-color: #ff0000;
}

@ -4,8 +4,7 @@
"description": "", "description": "",
"main": "index.js", "main": "index.js",
"scripts": { "scripts": {
"build": "node-sass styles.scss diff.scss -o .", "build": "node-sass styles.scss -o .;node-sass diff.scss -o ."
"watch": "node-sass --watch styles.scss diff.scss -o ."
}, },
"author": "", "author": "",
"license": "ISC", "license": "ISC",

File diff suppressed because one or more lines are too long

@ -567,3 +567,8 @@ $form-edge-padding: 20px;
} }
} }
ul {
padding-left: 1em;
padding-top: 0px;
margin-top: 4px;
}

@ -25,3 +25,6 @@
{% endmacro %} {% endmacro %}
{% macro render_button(field) %}
{{ field(**kwargs)|safe }}
{% endmacro %}

@ -36,6 +36,7 @@
<a onclick="next_diff();">Jump</a> <a onclick="next_diff();">Jump</a>
</div> </div>
<div id="diff-ui"> <div id="diff-ui">
<div class="tip">Pro-tip: Use <strong>show current snapshot</strong> tab to visualise what will be ignored.</div>
<table> <table>
<tbody> <tbody>
<tr> <tr>

@ -1,6 +1,7 @@
{% extends 'base.html' %} {% extends 'base.html' %}
{% block content %} {% block content %}
{% from '_helpers.jinja' import render_field %} {% from '_helpers.jinja' import render_field %}
{% from '_helpers.jinja' import render_button %}
{% from '_common_fields.jinja' import render_common_settings_form %} {% from '_common_fields.jinja' import render_common_settings_form %}
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script> <script type="text/javascript" src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
@ -88,6 +89,18 @@ User-Agent: wonderbra 1.0") }}
<div class="tab-pane-inner" id="filters-and-triggers"> <div class="tab-pane-inner" id="filters-and-triggers">
<fieldset> <fieldset>
<div class="pure-control-group">
<strong>Pro-tips:</strong><br/>
<ul>
<li>
Use the preview page to see your filters and triggers highlighted.
</li>
<li>
Some sites use JavaScript to create the content, for this you should <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">use the Chrome/WebDriver Fetcher</a>
</li>
</ul>
</div>
<div class="pure-control-group"> <div class="pure-control-group">
{{ render_field(form.css_filter, placeholder=".class-name or #some-id, or other CSS selector rule.", {{ render_field(form.css_filter, placeholder=".class-name or #some-id, or other CSS selector rule.",
class="m-d") }} class="m-d") }}
@ -114,6 +127,7 @@ User-Agent: wonderbra 1.0") }}
<li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li> <li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li>
<li>Regular Expression support, wrap the line in forward slash <b>/regex/</b></li> <li>Regular Expression support, wrap the line in forward slash <b>/regex/</b></li>
<li>Changing this will affect the comparison checksum which may trigger an alert</li> <li>Changing this will affect the comparison checksum which may trigger an alert</li>
<li>Use the preview/show current tab to see ignores</li>
</ul> </ul>
</span> </span>
@ -138,7 +152,8 @@ User-Agent: wonderbra 1.0") }}
<div id="actions"> <div id="actions">
<div class="pure-control-group"> <div class="pure-control-group">
<button type="submit" class="pure-button pure-button-primary">Save</button> {{ render_button(form.save_button) }} {{ render_button(form.save_and_preview_button) }}
<a href="{{url_for('api_delete', uuid=uuid)}}" <a href="{{url_for('api_delete', uuid=uuid)}}"
class="pure-button button-small button-error ">Delete</a> class="pure-button button-small button-error ">Delete</a>
<a href="{{url_for('api_clone', uuid=uuid)}}" <a href="{{url_for('api_clone', uuid=uuid)}}"

@ -3,19 +3,24 @@
{% block content %} {% block content %}
<div id="settings"> <div id="settings">
<h1>Current</h1> <h1>Current - {{watch.last_checked|format_timestamp_timeago}}</h1>
</div> </div>
<div id="diff-ui"> <div id="diff-ui">
<span class="ignored">Grey lines are ignored</span> <span class="triggered">Blue lines are triggers</span>
<table> <table>
<tbody> <tbody>
<tr> <tr>
<td id="diff-col"> <td id="diff-col">
<span id="result">{% for row in content %}{{row}}{% endfor %}</span> {% for row in content %}
{% set classes = [] %}
{% if (loop.index in ignored_line_numbers) %}{{ classes.append("ignored") }}{% endif %}
{% if (loop.index in triggered_line_numbers) %}{{ classes.append("triggered") }}{% endif %}
<div class="{{ classes|join(' ') }}">{{row}}</div>
{% endfor %}
</td> </td>
</tr> </tr>
</tbody> </tbody>
</table> </table>
</div> </div>
{% endblock %} {% endblock %}

@ -95,6 +95,7 @@
<li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li> <li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li>
<li>Regular Expression support, wrap the line in forward slash <b>/regex/</b></li> <li>Regular Expression support, wrap the line in forward slash <b>/regex/</b></li>
<li>Changing this will affect the comparison checksum which may trigger an alert</li> <li>Changing this will affect the comparison checksum which may trigger an alert</li>
<li>Use the preview/show current tab to see ignores</li>
</ul> </ul>
</span> </span>
</fieldset> </fieldset>

@ -3,6 +3,7 @@
import time import time
from flask import url_for from flask import url_for
from . util import live_server_setup from . util import live_server_setup
from changedetectionio import html_tools
def test_setup(live_server): def test_setup(live_server):
live_server_setup(live_server) live_server_setup(live_server)
@ -23,7 +24,7 @@ def test_strip_regex_text_func():
ignore_lines = ["sometimes", "/\s\d{2,3}\s/", "/ignore-case text/"] ignore_lines = ["sometimes", "/\s\d{2,3}\s/", "/ignore-case text/"]
fetcher = fetch_site_status.perform_site_check(datastore=False) fetcher = fetch_site_status.perform_site_check(datastore=False)
stripped_content = fetcher.strip_ignore_text(test_content, ignore_lines) stripped_content = html_tools.strip_ignore_text(test_content, ignore_lines)
assert b"but 1 lines" in stripped_content assert b"but 1 lines" in stripped_content
assert b"igNORe-cAse text" not in stripped_content assert b"igNORe-cAse text" not in stripped_content

@ -3,6 +3,7 @@
import time import time
from flask import url_for from flask import url_for
from . util import live_server_setup from . util import live_server_setup
from changedetectionio import html_tools
def test_setup(live_server): def test_setup(live_server):
live_server_setup(live_server) live_server_setup(live_server)
@ -23,7 +24,7 @@ def test_strip_text_func():
ignore_lines = ["sometimes"] ignore_lines = ["sometimes"]
fetcher = fetch_site_status.perform_site_check(datastore=False) fetcher = fetch_site_status.perform_site_check(datastore=False)
stripped_content = fetcher.strip_ignore_text(test_content, ignore_lines) stripped_content = html_tools.strip_ignore_text(test_content, ignore_lines)
assert b"sometimes" not in stripped_content assert b"sometimes" not in stripped_content
assert b"Some content" in stripped_content assert b"Some content" in stripped_content
@ -52,6 +53,8 @@ def set_modified_original_ignore_response():
<p>Which is across multiple lines</p> <p>Which is across multiple lines</p>
</br> </br>
So let's see what happens. </br> So let's see what happens. </br>
<p>new ignore stuff</p>
<p>blah</p>
</body> </body>
</html> </html>
@ -82,7 +85,7 @@ def set_modified_ignore_response():
def test_check_ignore_text_functionality(client, live_server): def test_check_ignore_text_functionality(client, live_server):
sleep_time_for_fetch_thread = 3 sleep_time_for_fetch_thread = 3
ignore_text = "XXXXX\r\nYYYYY\r\nZZZZZ" ignore_text = "XXXXX\r\nYYYYY\r\nZZZZZ\r\nnew ignore stuff"
set_original_ignore_response() set_original_ignore_response()
# Give the endpoint time to spin up # Give the endpoint time to spin up
@ -142,13 +145,25 @@ def test_check_ignore_text_functionality(client, live_server):
assert b'unviewed' not in res.data assert b'unviewed' not in res.data
assert b'/test-endpoint' in res.data assert b'/test-endpoint' in res.data
# Just to be sure.. set a regular modified change.. # Just to be sure.. set a regular modified change..
set_modified_original_ignore_response() set_modified_original_ignore_response()
client.get(url_for("api_watch_checknow"), follow_redirects=True) client.get(url_for("api_watch_checknow"), follow_redirects=True)
time.sleep(sleep_time_for_fetch_thread) time.sleep(sleep_time_for_fetch_thread)
res = client.get(url_for("index")) res = client.get(url_for("index"))
assert b'unviewed' in res.data assert b'unviewed' in res.data
# Check the preview/highlighter, we should be able to see what we ignored, but it should be highlighted
# We only introduce the "modified" content that includes what we ignore so we can prove the newest version also displays
# at /preview
res = client.get(url_for("preview_page", uuid="first"))
# We should be able to see what we ignored
assert b'<div class="ignored">new ignore stuff' in res.data
res = client.get(url_for("api_delete", uuid="all"), follow_redirects=True) res = client.get(url_for("api_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data assert b'Deleted' in res.data

@ -129,3 +129,8 @@ def test_trigger_functionality(client, live_server):
time.sleep(sleep_time_for_fetch_thread) time.sleep(sleep_time_for_fetch_thread)
res = client.get(url_for("index")) res = client.get(url_for("index"))
assert b'unviewed' in res.data assert b'unviewed' in res.data
# Check the preview/highlighter, we should be able to see what we triggered on, but it should be highlighted
res = client.get(url_for("preview_page", uuid="first"))
# We should be able to see what we ignored
assert b'<div class="triggered">foobar' in res.data

@ -96,6 +96,7 @@ def test_check_markup_xpath_filter_restriction(client, live_server):
res = client.get(url_for("index")) res = client.get(url_for("index"))
assert b'unviewed' not in res.data assert b'unviewed' not in res.data
def test_xpath_validation(client, live_server): def test_xpath_validation(client, live_server):
# Give the endpoint time to spin up # Give the endpoint time to spin up

Loading…
Cancel
Save