Ability to visualise trigger and filter rules against the current snapshot on the preview page

3 years ago · 014fda9058
parent dd384619e0
commit 014fda9058
18 changed files with 612 additions and 72 deletions
--- a/changedetectionio/init.py
+++ b/changedetectionio/init.py
@ -35,6 +35,7 @@ from flask import (
    url_for,
 )
 from flask_login import login_required
 from changedetectionio import html_tools
 __version__ = '0.39.8'
@ -441,7 +442,7 @@ def changedetection_app(config=None, datastore_o=None):
                raw_content = file.read()
                handler = fetch_site_status.perform_site_check(datastore=datastore)
-                stripped_content = handler.strip_ignore_text(raw_content,
+                stripped_content = html_tools.strip_ignore_text(raw_content,
                                                             datastore.data['watching'][uuid]['ignore_text'])
                if datastore.data['settings']['application'].get('ignore_whitespace', False):
@ -546,8 +547,12 @@ def changedetection_app(config=None, datastore_o=None):
                    flash('No notification URLs set, cannot send test.', 'error')
            # Diff page [edit] link should go back to diff page
-            if request.args.get("next") and request.args.get("next") == 'diff':
+            if request.args.get("next") and request.args.get("next") == 'diff' and not form.save_and_preview_button.data:
                return redirect(url_for('diff_history_page', uuid=uuid))
            else:
                if form.save_and_preview_button.data:
                    flash('You may need to reload this page to see the new content.')
                    return redirect(url_for('preview_page', uuid=uuid))
                else:
                    return redirect(url_for('index'))
@ -721,8 +726,12 @@ def changedetection_app(config=None, datastore_o=None):
        # Save the current newest history as the most recently viewed
        datastore.set_last_viewed(uuid, dates[0])
        newest_file = watch['history'][dates[0]]
        try:
            with open(newest_file, 'r') as f:
                newest_version_file_contents = f.read()
        except Exception as e:
            newest_version_file_contents = "Unable to read {}.\n".format(newest_file)
        previous_version = request.args.get('previous_version')
        try:
@ -731,8 +740,11 @@ def changedetection_app(config=None, datastore_o=None):
            # Not present, use a default value, the second one in the sorted list.
            previous_file = watch['history'][dates[1]]
        try:
            with open(previous_file, 'r') as f:
                previous_version_file_contents = f.read()
        except Exception as e:
            previous_version_file_contents = "Unable to read {}.\n".format(previous_file)
        output = render_template("diff.html", watch_a=watch,
                                 newest=newest_version_file_contents,
@ -751,6 +763,7 @@ def changedetection_app(config=None, datastore_o=None):
    @app.route("/preview/<string:uuid>", methods=['GET'])
    @login_required
    def preview_page(uuid):
        content = []
        # More for testing, possible to return the first/only
        if uuid == 'first':
@ -764,14 +777,38 @@ def changedetection_app(config=None, datastore_o=None):
            flash("No history found for the specified link, bad link?", "error")
            return redirect(url_for('index'))
-        newest = list(watch['history'].keys())[-1]
+        if len(watch['history']):
-        with open(watch['history'][newest], 'r') as f:
+            timestamps = sorted(watch['history'].keys(), key=lambda x: int(x))
            filename = watch['history'][timestamps[-1]]
            try:
                with open(filename, 'r') as f:
                    content = f.readlines()
            except:
                content.append("File doesnt exist or unable to read file {}".format(filename))
        else:
            content.append("No history found")
        # Get what needs to be highlighted
        ignore_rules = watch.get('ignore_text', []) + datastore.data['settings']['application']['global_ignore_text']
        # .readlines will keep the \n, but we will parse it here again, in the future tidy this up
        ignored_line_numbers = html_tools.strip_ignore_text(content="".join(content),
                                                            wordlist=ignore_rules,
                                                            mode='line numbers'
                                                            )
        trigger_line_numbers = html_tools.strip_ignore_text(content="".join(content),
                                                            wordlist=watch['trigger_text'],
                                                            mode='line numbers'
                                                            )
        output = render_template("preview.html",
                                 content=content,
                                 extra_stylesheets=extra_stylesheets,
                                 ignored_line_numbers=ignored_line_numbers,
                                 triggered_line_numbers=trigger_line_numbers,
                                 current_diff_url=watch['url'],
                                 watch=watch,
                                 uuid=uuid)
        return output
--- a/changedetectionio/fetch_site_status.py
+++ b/changedetectionio/fetch_site_status.py
@ -1,5 +1,6 @@
 import time
 from changedetectionio import content_fetcher
 from changedetectionio import html_tools
 import hashlib
 from inscriptis import get_text
 import urllib3
@ -16,40 +17,6 @@ class perform_site_check():
        super().__init__(*args, **kwargs)
        self.datastore = datastore
    def strip_ignore_text(self, content, list_ignore_text):
        import re
        ignore = []
        ignore_regex = []
        for k in list_ignore_text:
            # Is it a regex?
            if k[0] == '/':
                ignore_regex.append(k.strip(" /"))
            else:
                ignore.append(k)
        output = []
        for line in content.splitlines():
            # Always ignore blank lines in this mode. (when this function gets called)
            if len(line.strip()):
                regex_matches = False
                # if any of these match, skip
                for regex in ignore_regex:
                    try:
                        if re.search(regex, line, re.IGNORECASE):
                            regex_matches = True
                    except Exception as e:
                        continue
                if not regex_matches and not any(skip_text in line for skip_text in ignore):
                    output.append(line.encode('utf8'))
        return "\n".encode('utf8').join(output)
    def run(self, uuid):
        timestamp = int(time.time())  # used for storage etc too
@ -147,7 +114,7 @@ class perform_site_check():
            # @todo we could abstract out the get_text() to handle this cleaner
            text_to_ignore = watch.get('ignore_text', []) + self.datastore.data['settings']['application'].get('global_ignore_text', [])
            if len(text_to_ignore):
-                stripped_text_from_html = self.strip_ignore_text(stripped_text_from_html, text_to_ignore)
+                stripped_text_from_html = html_tools.strip_ignore_text(stripped_text_from_html, text_to_ignore)
            else:
                stripped_text_from_html = stripped_text_from_html.encode('utf8')
@ -165,22 +132,14 @@ class perform_site_check():
            blocked_by_not_found_trigger_text = False
            if len(watch['trigger_text']):
                # Yeah, lets block first until something matches
                blocked_by_not_found_trigger_text = True
-                for line in watch['trigger_text']:
+                # Filter and trigger works the same, so reuse it
-                    # Because JSON wont serialize a re.compile object
+                result = html_tools.strip_ignore_text(content=str(stripped_text_from_html),
-                    if line[0] == '/' and line[-1] == '/':
+                                                      wordlist=watch['trigger_text'],
-                        regex = re.compile(line.strip('/'), re.IGNORECASE)
+                                                      mode="line numbers")
-                        # Found it? so we don't wait for it anymore
+                if result:
                        r = re.search(regex, str(stripped_text_from_html))
                        if r:
                    blocked_by_not_found_trigger_text = False
                            break
                    elif line.lower() in str(stripped_text_from_html).lower():
                        # We found it don't wait for it.
                        blocked_by_not_found_trigger_text = False
                        break
            if not blocked_by_not_found_trigger_text and watch['previous_md5'] != fetched_md5:
--- a/changedetectionio/forms.py
+++ b/changedetectionio/forms.py
@ -1,6 +1,7 @@
 from wtforms import Form, SelectField, RadioField, BooleanField, StringField, PasswordField, validators, IntegerField, fields, TextAreaField, \
    Field
-from wtforms import widgets
+
 from wtforms import widgets, SubmitField
 from wtforms.validators import ValidationError
 from wtforms.fields import html5
 from changedetectionio import content_fetcher
@ -290,6 +291,9 @@ class watchForm(commonSettingsForm):
    method = SelectField('Request Method', choices=valid_method, default=default_method)
    trigger_text = StringListField('Trigger/wait for text', [validators.Optional(), ValidateListRegex()])
    save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
    save_and_preview_button = SubmitField('Save & Preview', render_kw={"class": "pure-button pure-button-primary"})
    def validate(self, **kwargs):
        if not super().validate():
            return False
--- a/changedetectionio/html_tools.py
+++ b/changedetectionio/html_tools.py
@ -1,7 +1,7 @@
 import json
 from bs4 import BeautifulSoup
 from jsonpath_ng.ext import parse
-
+import re
 class JSONNotFound(ValueError):
    def __init__(self, msg):
@ -105,3 +105,50 @@ def extract_json_as_string(content, jsonpath_filter):
        return ''
    return stripped_text_from_html
 # Mode     - "content" return the content without the matches (default)
 #          - "line numbers" return a list of line numbers that match (int list)
 #
 # wordlist - list of regex's (str) or words (str)
 def strip_ignore_text(content, wordlist, mode="content"):
    ignore = []
    ignore_regex = []
    # @todo check this runs case insensitive
    for k in wordlist:
        # Is it a regex?
        if k[0] == '/':
            ignore_regex.append(k.strip(" /"))
        else:
            ignore.append(k)
    i = 0
    output = []
    ignored_line_numbers = []
    for line in content.splitlines():
        i += 1
        # Always ignore blank lines in this mode. (when this function gets called)
        if len(line.strip()):
            regex_matches = False
            # if any of these match, skip
            for regex in ignore_regex:
                try:
                    if re.search(regex, line, re.IGNORECASE):
                        regex_matches = True
                except Exception as e:
                    continue
            if not regex_matches and not any(skip_text in line for skip_text in ignore):
                output.append(line.encode('utf8'))
            else:
                ignored_line_numbers.append(i)
    # Used for finding out what to highlight
    if mode == "line numbers":
        return ignored_line_numbers
    return "\n".encode('utf8').join(output)
--- a/changedetectionio/static/styles/diff.css
+++ b/changedetectionio/static/styles/diff.css
@ -54,3 +54,19 @@ ins {
  body {
    height: 99%;
    /* Hide scroll bar in Firefox */ } }
 td#diff-col div {
  text-align: justify;
  white-space: pre-wrap; }
 .ignored {
  background-color: #ccc;
  /*  border: #0d91fa 1px solid; */
  opacity: 0.7; }
 .triggered {
  background-color: #1b98f8; }
 /* ignored and triggered? make it obvious error */
 .ignored.triggered {
  background-color: #ff0000; }
--- a/changedetectionio/static/styles/diff.scss
+++ b/changedetectionio/static/styles/diff.scss
@ -66,3 +66,23 @@ ins {
 		height: 99%; /* Hide scroll bar in Firefox */
 	}
 }
 td#diff-col div {
    text-align: justify;
    white-space: pre-wrap;
 }
 .ignored {
    background-color: #ccc;
   /*  border: #0d91fa 1px solid; */
    opacity: 0.7;
 }
 .triggered {
    background-color: #1b98f8;
 }
 /* ignored and triggered? make it obvious error */
 .ignored.triggered {
  background-color: #ff0000;
 }
--- a/changedetectionio/static/styles/package.json
+++ b/changedetectionio/static/styles/package.json
@ -4,8 +4,7 @@
  "description": "",
  "main": "index.js",
  "scripts": {
-    "build": "node-sass styles.scss diff.scss -o .",
+    "build": "node-sass styles.scss -o .;node-sass diff.scss -o ."
    "watch": "node-sass --watch styles.scss diff.scss -o ."
  },
  "author": "",
  "license": "ISC",
--- a/changedetectionio/static/styles/styles.css
+++ b/changedetectionio/static/styles/styles.css
--- a/changedetectionio/static/styles/styles.scss
+++ b/changedetectionio/static/styles/styles.scss
@ -567,3 +567,8 @@ $form-edge-padding: 20px;
  }
 }
 ul {
    padding-left: 1em;
    padding-top: 0px;
    margin-top: 4px;
 }
--- a/changedetectionio/templates/_helpers.jinja
+++ b/changedetectionio/templates/_helpers.jinja
@ -25,3 +25,6 @@
 {% endmacro %}
 {% macro render_button(field) %}
  {{ field(**kwargs)|safe }}
 {% endmacro %}
--- a/changedetectionio/templates/diff.html
+++ b/changedetectionio/templates/diff.html
@ -36,6 +36,7 @@
    <a onclick="next_diff();">Jump</a>
 </div>
 <div id="diff-ui">
    <div class="tip">Pro-tip: Use <strong>show current snapshot</strong> tab to visualise what will be ignored.</div>
    <table>
        <tbody>
        <tr>
--- a/changedetectionio/templates/edit.html
+++ b/changedetectionio/templates/edit.html
@ -1,6 +1,7 @@
 {% extends 'base.html' %}
 {% block content %}
 {% from '_helpers.jinja' import render_field %}
 {% from '_helpers.jinja' import render_button %}
 {% from '_common_fields.jinja' import render_common_settings_form %}
 <script type="text/javascript" src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
@ -88,6 +89,18 @@ User-Agent: wonderbra 1.0") }}
            <div class="tab-pane-inner" id="filters-and-triggers">
                <fieldset>
                        <div class="pure-control-group">
                            <strong>Pro-tips:</strong><br/>
                            <ul>
                                <li>
                                    Use the preview page to see your filters and triggers highlighted.
                                </li>
                                <li>
                                    Some sites use JavaScript to create the content, for this you should <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">use the Chrome/WebDriver Fetcher</a>
                                </li>
                            </ul>
                    </div>
                    <div class="pure-control-group">
                        {{ render_field(form.css_filter, placeholder=".class-name or #some-id, or other CSS selector rule.",
                        class="m-d") }}
@ -114,6 +127,7 @@ User-Agent: wonderbra 1.0") }}
                            <li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li>
                            <li>Regular Expression support, wrap the line in forward slash <b>/regex/</b></li>
                            <li>Changing this will affect the comparison checksum which may trigger an alert</li>
                            <li>Use the preview/show current tab to see ignores</li>
                        </ul>
                </span>
@ -138,7 +152,8 @@ User-Agent: wonderbra 1.0") }}
            <div id="actions">
                <div class="pure-control-group">
-                    <button type="submit" class="pure-button pure-button-primary">Save</button>
+                      {{ render_button(form.save_button) }} {{ render_button(form.save_and_preview_button) }}
                    <a href="{{url_for('api_delete', uuid=uuid)}}"
                       class="pure-button button-small button-error ">Delete</a>
                    <a href="{{url_for('api_clone', uuid=uuid)}}"
--- a/changedetectionio/templates/preview.html
+++ b/changedetectionio/templates/preview.html
@ -3,19 +3,24 @@
 {% block content %}
 <div id="settings">
-    <h1>Current</h1>
+    <h1>Current - {{watch.last_checked|format_timestamp_timeago}}</h1>
 </div>
 <div id="diff-ui">
    <span class="ignored">Grey lines are ignored</span> <span class="triggered">Blue lines are triggers</span>
    <table>
        <tbody>
        <tr>
            <td id="diff-col">
-                <span id="result">{% for row in content %}{{row}}{% endfor %}</span>
+                    {% for row in content %}
                        {% set classes = [] %}
                           {% if (loop.index in ignored_line_numbers) %}{{ classes.append("ignored") }}{% endif %}
                           {% if (loop.index in triggered_line_numbers) %}{{ classes.append("triggered") }}{% endif %}
                    <div class="{{ classes|join(' ') }}">{{row}}</div>
                    {% endfor %}
            </td>
        </tr>
        </tbody>
    </table>
 </div>
 {% endblock %}
--- a/changedetectionio/templates/settings.html
+++ b/changedetectionio/templates/settings.html
@ -95,6 +95,7 @@
                            <li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li>
                            <li>Regular Expression support, wrap the line in forward slash <b>/regex/</b></li>
                            <li>Changing this will affect the comparison checksum which may trigger an alert</li>
                            <li>Use the preview/show current tab to see ignores</li>
                        </ul>
                     </span>
                    </fieldset>
--- a/changedetectionio/tests/test_ignore_regex_text.py
+++ b/changedetectionio/tests/test_ignore_regex_text.py
@ -3,6 +3,7 @@
 import time
 from flask import url_for
 from . util import live_server_setup
 from changedetectionio import html_tools
 def test_setup(live_server):
    live_server_setup(live_server)
@ -23,7 +24,7 @@ def test_strip_regex_text_func():
    ignore_lines = ["sometimes", "/\s\d{2,3}\s/", "/ignore-case text/"]
    fetcher = fetch_site_status.perform_site_check(datastore=False)
-    stripped_content = fetcher.strip_ignore_text(test_content, ignore_lines)
+    stripped_content = html_tools.strip_ignore_text(test_content, ignore_lines)
    assert b"but 1 lines" in stripped_content
    assert b"igNORe-cAse text" not in stripped_content
--- a/changedetectionio/tests/test_ignore_text.py
+++ b/changedetectionio/tests/test_ignore_text.py
@ -3,6 +3,7 @@
 import time
 from flask import url_for
 from . util import live_server_setup
 from changedetectionio import html_tools
 def test_setup(live_server):
    live_server_setup(live_server)
@ -23,7 +24,7 @@ def test_strip_text_func():
    ignore_lines = ["sometimes"]
    fetcher = fetch_site_status.perform_site_check(datastore=False)
-    stripped_content = fetcher.strip_ignore_text(test_content, ignore_lines)
+    stripped_content = html_tools.strip_ignore_text(test_content, ignore_lines)
    assert b"sometimes" not in stripped_content
    assert b"Some content" in stripped_content
@ -52,6 +53,8 @@ def set_modified_original_ignore_response():
     <p>Which is across multiple lines</p>
     </br>
     So let's see what happens.  </br>
     <p>new ignore stuff</p>
     <p>blah</p>
     </body>
     </html>
@ -82,7 +85,7 @@ def set_modified_ignore_response():
 def test_check_ignore_text_functionality(client, live_server):
    sleep_time_for_fetch_thread = 3
-    ignore_text = "XXXXX\r\nYYYYY\r\nZZZZZ"
+    ignore_text = "XXXXX\r\nYYYYY\r\nZZZZZ\r\nnew ignore stuff"
    set_original_ignore_response()
    # Give the endpoint time to spin up
@ -142,13 +145,25 @@ def test_check_ignore_text_functionality(client, live_server):
    assert b'unviewed' not in res.data
    assert b'/test-endpoint' in res.data
    # Just to be sure.. set a regular modified change..
    set_modified_original_ignore_response()
    client.get(url_for("api_watch_checknow"), follow_redirects=True)
    time.sleep(sleep_time_for_fetch_thread)
    res = client.get(url_for("index"))
    assert b'unviewed' in res.data
    # Check the preview/highlighter, we should be able to see what we ignored, but it should be highlighted
    # We only introduce the "modified" content that includes what we ignore so we can prove the newest version also displays
    # at /preview
    res = client.get(url_for("preview_page", uuid="first"))
    # We should be able to see what we ignored
    assert b'<div class="ignored">new ignore stuff' in res.data
    res = client.get(url_for("api_delete", uuid="all"), follow_redirects=True)
    assert b'Deleted' in res.data
--- a/changedetectionio/tests/test_trigger.py
+++ b/changedetectionio/tests/test_trigger.py
@ -129,3 +129,8 @@ def test_trigger_functionality(client, live_server):
    time.sleep(sleep_time_for_fetch_thread)
    res = client.get(url_for("index"))
    assert b'unviewed' in res.data
    # Check the preview/highlighter, we should be able to see what we triggered on, but it should be highlighted
    res = client.get(url_for("preview_page", uuid="first"))
    # We should be able to see what we ignored
    assert b'<div class="triggered">foobar' in res.data
--- a/changedetectionio/tests/test_xpath_selector.py
+++ b/changedetectionio/tests/test_xpath_selector.py
@ -96,6 +96,7 @@ def test_check_markup_xpath_filter_restriction(client, live_server):
    res = client.get(url_for("index"))
    assert b'unviewed' not in res.data
 def test_xpath_validation(client, live_server):
    # Give the endpoint time to spin up