Filters & Text - Preview refactor/improvements (#2689)

pull/1945/merge
dgtlmoon 3 months ago committed by GitHub
parent 6cd1d50a4f
commit 783926962d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -1,7 +1,6 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import datetime import datetime
import importlib
import flask_login import flask_login
import locale import locale
@ -12,9 +11,7 @@ import threading
import time import time
import timeago import timeago
from .content_fetchers.exceptions import ReplyWithContentButNoText
from .processors import find_processors, get_parent_module, get_custom_watch_obj_for_processor from .processors import find_processors, get_parent_module, get_custom_watch_obj_for_processor
from .processors.text_json_diff.processor import FilterNotFoundInResponse
from .safe_jinja import render as jinja_render from .safe_jinja import render as jinja_render
from changedetectionio.strtobool import strtobool from changedetectionio.strtobool import strtobool
from copy import deepcopy from copy import deepcopy
@ -1381,79 +1378,9 @@ def changedetection_app(config=None, datastore_o=None):
@app.route("/edit/<string:uuid>/preview-rendered", methods=['POST']) @app.route("/edit/<string:uuid>/preview-rendered", methods=['POST'])
@login_optionally_required @login_optionally_required
def watch_get_preview_rendered(uuid): def watch_get_preview_rendered(uuid):
from flask import jsonify
'''For when viewing the "preview" of the rendered text from inside of Edit''' '''For when viewing the "preview" of the rendered text from inside of Edit'''
now = time.time() from .processors.text_json_diff import prepare_filter_prevew
import brotli return prepare_filter_prevew(watch_uuid=uuid, datastore=datastore)
from . import forms
text_after_filter = ''
tmp_watch = deepcopy(datastore.data['watching'].get(uuid))
if tmp_watch and tmp_watch.history and os.path.isdir(tmp_watch.watch_data_dir):
# Splice in the temporary stuff from the form
form = forms.processor_text_json_diff_form(formdata=request.form if request.method == 'POST' else None,
data=request.form
)
# Only update vars that came in via the AJAX post
p = {k: v for k, v in form.data.items() if k in request.form.keys()}
tmp_watch.update(p)
latest_filename = next(reversed(tmp_watch.history))
html_fname = os.path.join(tmp_watch.watch_data_dir, f"{latest_filename}.html.br")
with open(html_fname, 'rb') as f:
decompressed_data = brotli.decompress(f.read()).decode('utf-8') if html_fname.endswith('.br') else f.read().decode('utf-8')
# Just like a normal change detection except provide a fake "watch" object and dont call .call_browser()
processor_module = importlib.import_module("changedetectionio.processors.text_json_diff.processor")
update_handler = processor_module.perform_site_check(datastore=datastore,
watch_uuid=uuid # probably not needed anymore anyway?
)
# Use the last loaded HTML as the input
update_handler.fetcher.content = decompressed_data
update_handler.fetcher.headers['content-type'] = tmp_watch.get('content-type')
try:
changed_detected, update_obj, text_after_filter = update_handler.run_changedetection(
watch=tmp_watch,
skip_when_checksum_same=False,
)
except FilterNotFoundInResponse as e:
text_after_filter = f"Filter not found in HTML: {str(e)}"
except ReplyWithContentButNoText as e:
text_after_filter = f"Filter found but no text (empty result)"
except Exception as e:
text_after_filter = f"Error: {str(e)}"
if not text_after_filter.strip():
text_after_filter = 'Empty content'
# because run_changedetection always returns bytes due to saving the snapshots etc
text_after_filter = text_after_filter.decode('utf-8') if isinstance(text_after_filter, bytes) else text_after_filter
do_anchor = datastore.data["settings"]["application"].get("render_anchor_tag_content", False)
trigger_line_numbers = []
try:
text_before_filter = html_tools.html_to_text(html_content=decompressed_data,
render_anchor_tag_content=do_anchor)
trigger_line_numbers = html_tools.strip_ignore_text(content=text_after_filter,
wordlist=tmp_watch['trigger_text'],
mode='line numbers'
)
except Exception as e:
text_before_filter = f"Error: {str(e)}"
logger.trace(f"Parsed in {time.time() - now:.3f}s")
return jsonify(
{
'after_filter': text_after_filter,
'before_filter': text_before_filter.decode('utf-8') if isinstance(text_before_filter, bytes) else text_before_filter,
'duration': time.time() - now,
'trigger_line_numbers': trigger_line_numbers,
}
)
@app.route("/form/add/quickwatch", methods=['POST']) @app.route("/form/add/quickwatch", methods=['POST'])

@ -36,7 +36,8 @@ class model(watch_base):
jitter_seconds = 0 jitter_seconds = 0
def __init__(self, *arg, **kw): def __init__(self, *arg, **kw):
self.__datastore_path = kw['datastore_path'] self.__datastore_path = kw.get('datastore_path')
if kw.get('datastore_path'):
del kw['datastore_path'] del kw['datastore_path']
super(model, self).__init__(*arg, **kw) super(model, self).__init__(*arg, **kw)
if kw.get('default'): if kw.get('default'):
@ -171,6 +172,10 @@ class model(watch_base):
""" """
tmp_history = {} tmp_history = {}
# In the case we are only using the watch for processing without history
if not self.watch_data_dir:
return []
# Read the history file as a dict # Read the history file as a dict
fname = os.path.join(self.watch_data_dir, "history.txt") fname = os.path.join(self.watch_data_dir, "history.txt")
if os.path.isfile(fname): if os.path.isfile(fname):
@ -396,7 +401,7 @@ class model(watch_base):
@property @property
def watch_data_dir(self): def watch_data_dir(self):
# The base dir of the watch data # The base dir of the watch data
return os.path.join(self.__datastore_path, self['uuid']) return os.path.join(self.__datastore_path, self['uuid']) if self.__datastore_path else None
def get_error_text(self): def get_error_text(self):
"""Return the text saved from a previous request that resulted in a non-200 error""" """Return the text saved from a previous request that resulted in a non-200 error"""

@ -0,0 +1,107 @@
from loguru import logger
def _task(watch, update_handler):
from changedetectionio.content_fetchers.exceptions import ReplyWithContentButNoText
from changedetectionio.processors.text_json_diff.processor import FilterNotFoundInResponse
text_after_filter = ''
try:
# The slow process (we run 2 of these in parallel)
changed_detected, update_obj, text_after_filter = update_handler.run_changedetection(
watch=watch,
skip_when_checksum_same=False,
)
except FilterNotFoundInResponse as e:
text_after_filter = f"Filter not found in HTML: {str(e)}"
except ReplyWithContentButNoText as e:
text_after_filter = f"Filter found but no text (empty result)"
except Exception as e:
text_after_filter = f"Error: {str(e)}"
if not text_after_filter.strip():
text_after_filter = 'Empty content'
# because run_changedetection always returns bytes due to saving the snapshots etc
text_after_filter = text_after_filter.decode('utf-8') if isinstance(text_after_filter, bytes) else text_after_filter
return text_after_filter
def prepare_filter_prevew(datastore, watch_uuid):
'''Used by @app.route("/edit/<string:uuid>/preview-rendered", methods=['POST'])'''
from changedetectionio import forms, html_tools
from changedetectionio.model.Watch import model as watch_model
from concurrent.futures import ProcessPoolExecutor
from copy import deepcopy
from flask import request, jsonify
import brotli
import importlib
import os
import time
now = time.time()
text_after_filter = ''
text_before_filter = ''
tmp_watch = deepcopy(datastore.data['watching'].get(watch_uuid))
if tmp_watch and tmp_watch.history and os.path.isdir(tmp_watch.watch_data_dir):
# Splice in the temporary stuff from the form
form = forms.processor_text_json_diff_form(formdata=request.form if request.method == 'POST' else None,
data=request.form
)
# Only update vars that came in via the AJAX post
p = {k: v for k, v in form.data.items() if k in request.form.keys()}
tmp_watch.update(p)
blank_watch_no_filters = watch_model()
blank_watch_no_filters['url'] = tmp_watch.get('url')
latest_filename = next(reversed(tmp_watch.history))
html_fname = os.path.join(tmp_watch.watch_data_dir, f"{latest_filename}.html.br")
with open(html_fname, 'rb') as f:
decompressed_data = brotli.decompress(f.read()).decode('utf-8') if html_fname.endswith('.br') else f.read().decode('utf-8')
# Just like a normal change detection except provide a fake "watch" object and dont call .call_browser()
processor_module = importlib.import_module("changedetectionio.processors.text_json_diff.processor")
update_handler = processor_module.perform_site_check(datastore=datastore,
watch_uuid=tmp_watch.get('uuid') # probably not needed anymore anyway?
)
# Use the last loaded HTML as the input
update_handler.datastore = datastore
update_handler.fetcher.content = decompressed_data
update_handler.fetcher.headers['content-type'] = tmp_watch.get('content-type')
# Process our watch with filters and the HTML from disk, and also a blank watch with no filters but also with the same HTML from disk
# Do this as a parallel process because it could take some time
with ProcessPoolExecutor(max_workers=2) as executor:
future1 = executor.submit(_task, tmp_watch, update_handler)
future2 = executor.submit(_task, blank_watch_no_filters, update_handler)
text_after_filter = future1.result()
text_before_filter = future2.result()
trigger_line_numbers = []
try:
trigger_line_numbers = html_tools.strip_ignore_text(content=text_after_filter,
wordlist=tmp_watch['trigger_text'],
mode='line numbers'
)
except Exception as e:
text_before_filter = f"Error: {str(e)}"
logger.trace(f"Parsed in {time.time() - now:.3f}s")
return jsonify(
{
'after_filter': text_after_filter,
'before_filter': text_before_filter.decode('utf-8') if isinstance(text_before_filter, bytes) else text_before_filter,
'duration': time.time() - now,
'trigger_line_numbers': trigger_line_numbers,
}
)

@ -1,56 +0,0 @@
/**
* debounce
* @param {integer} milliseconds This param indicates the number of milliseconds
* to wait after the last call before calling the original function.
* @param {object} What "this" refers to in the returned function.
* @return {function} This returns a function that when called will wait the
* indicated number of milliseconds after the last call before
* calling the original function.
*/
Function.prototype.debounce = function (milliseconds, context) {
var baseFunction = this,
timer = null,
wait = milliseconds;
return function () {
var self = context || this,
args = arguments;
function complete() {
baseFunction.apply(self, args);
timer = null;
}
if (timer) {
clearTimeout(timer);
}
timer = setTimeout(complete, wait);
};
};
/**
* throttle
* @param {integer} milliseconds This param indicates the number of milliseconds
* to wait between calls before calling the original function.
* @param {object} What "this" refers to in the returned function.
* @return {function} This returns a function that when called will wait the
* indicated number of milliseconds between calls before
* calling the original function.
*/
Function.prototype.throttle = function (milliseconds, context) {
var baseFunction = this,
lastEventTimestamp = null,
limit = milliseconds;
return function () {
var self = context || this,
args = arguments,
now = Date.now();
if (!lastEventTimestamp || now - lastEventTimestamp >= limit) {
lastEventTimestamp = now;
baseFunction.apply(self, args);
}
};
};

@ -1,18 +1,60 @@
(function ($) { (function ($) {
/**
* debounce
* @param {integer} milliseconds This param indicates the number of milliseconds
* to wait after the last call before calling the original function.
* @param {object} What "this" refers to in the returned function.
* @return {function} This returns a function that when called will wait the
* indicated number of milliseconds after the last call before
* calling the original function.
*/
Function.prototype.debounce = function (milliseconds, context) {
var baseFunction = this,
timer = null,
wait = milliseconds;
return function () {
var self = context || this,
args = arguments;
function complete() {
baseFunction.apply(self, args);
timer = null;
}
/* if (timer) {
$('#code-block').highlightLines([ clearTimeout(timer);
{
'color': '#dd0000',
'lines': [10, 12]
},
{
'color': '#ee0000',
'lines': [15, 18]
} }
]);
}); timer = setTimeout(complete, wait);
};
};
/**
* throttle
* @param {integer} milliseconds This param indicates the number of milliseconds
* to wait between calls before calling the original function.
* @param {object} What "this" refers to in the returned function.
* @return {function} This returns a function that when called will wait the
* indicated number of milliseconds between calls before
* calling the original function.
*/ */
Function.prototype.throttle = function (milliseconds, context) {
var baseFunction = this,
lastEventTimestamp = null,
limit = milliseconds;
return function () {
var self = context || this,
args = arguments,
now = Date.now();
if (!lastEventTimestamp || now - lastEventTimestamp >= limit) {
lastEventTimestamp = now;
baseFunction.apply(self, args);
}
};
};
$.fn.highlightLines = function (configurations) { $.fn.highlightLines = function (configurations) {
return this.each(function () { return this.each(function () {

@ -49,4 +49,9 @@ $(document).ready(function () {
$("#overlay").toggleClass('visible'); $("#overlay").toggleClass('visible');
heartpath.style.fill = document.getElementById("overlay").classList.contains("visible") ? '#ff0000' : 'var(--color-background)'; heartpath.style.fill = document.getElementById("overlay").classList.contains("visible") ? '#ff0000' : 'var(--color-background)';
}); });
setInterval(function () {
$('body').toggleClass('spinner-active', $.active > 0);
}, 2000);
}); });

@ -25,14 +25,16 @@ function request_textpreview_update() {
const name = $element.attr('name'); // Get the name attribute of the element const name = $element.attr('name'); // Get the name attribute of the element
data[name] = $element.is(':checkbox') ? ($element.is(':checked') ? $element.val() : false) : $element.val(); data[name] = $element.is(':checkbox') ? ($element.is(':checked') ? $element.val() : false) : $element.val();
}); });
$('#text-preview-spinner').show();
$('body').toggleClass('spinner-active', 1);
$.abortiveSingularAjax({ $.abortiveSingularAjax({
type: "POST", type: "POST",
url: preview_text_edit_filters_url, url: preview_text_edit_filters_url,
data: data, data: data,
namespace: 'watchEdit' namespace: 'watchEdit'
}).done(function (data) { }).done(function (data) {
$('#text-preview-spinner').fadeOut(); console.debug(data['duration'])
$('#filters-and-triggers #text-preview-before-inner').text(data['before_filter']); $('#filters-and-triggers #text-preview-before-inner').text(data['before_filter']);
$('#filters-and-triggers #text-preview-inner') $('#filters-and-triggers #text-preview-inner')
.text(data['after_filter']) .text(data['after_filter'])
@ -43,7 +45,6 @@ function request_textpreview_update() {
} }
]); ]);
}).fail(function (error) { }).fail(function (error) {
$('#text-preview-spinner').fadeOut();
if (error.statusText === 'abort') { if (error.statusText === 'abort') {
console.log('Request was aborted due to a new request being fired.'); console.log('Request was aborted due to a new request being fired.');
} else { } else {
@ -71,18 +72,13 @@ $(document).ready(function () {
$("#text-preview-inner").css('max-height', (vh-300)+"px"); $("#text-preview-inner").css('max-height', (vh-300)+"px");
$("#text-preview-before-inner").css('max-height', (vh-300)+"px"); $("#text-preview-before-inner").css('max-height', (vh-300)+"px");
// Realtime preview of 'Filters & Text' setup
var debounced_request_textpreview_update = request_textpreview_update.debounce(100);
$("#activate-text-preview").click(function (e) { $("#activate-text-preview").click(function (e) {
$('body').toggleClass('preview-text-enabled') $('body').toggleClass('preview-text-enabled')
request_textpreview_update(); request_textpreview_update();
const method = $('body').hasClass('preview-text-enabled') ? 'on' : 'off'; const method = $('body').hasClass('preview-text-enabled') ? 'on' : 'off';
$("#text-preview-refresh")[method]('click', debounced_request_textpreview_update); $('textarea:visible')[method]('keyup blur', request_textpreview_update.throttle(1000));
$('textarea:visible')[method]('keyup blur', debounced_request_textpreview_update); $('input:visible')[method]('keyup blur change', request_textpreview_update.throttle(1000));
$('input:visible')[method]('keyup blur change', debounced_request_textpreview_update); $("#filters-and-triggers-tab")[method]('click', request_textpreview_update.throttle(1000));
$("#filters-and-triggers-tab")[method]('click', debounced_request_textpreview_update);
}); });
$('.minitabs-wrapper').miniTabs({ $('.minitabs-wrapper').miniTabs({
"Content after filters": "#text-preview-inner", "Content after filters": "#text-preview-inner",

@ -106,10 +106,34 @@ button.toggle-button {
padding: 5px; padding: 5px;
display: flex; display: flex;
justify-content: space-between; justify-content: space-between;
border-bottom: 2px solid var(--color-menu-accent);
align-items: center; align-items: center;
} }
#pure-menu-horizontal-spinner {
height: 3px;
background: linear-gradient(-75deg, #ff6000, #ff8f00, #ffdd00, #ed0000);
background-size: 400% 400%;
width: 100%;
animation: gradient 200s ease infinite;
}
body.spinner-active {
#pure-menu-horizontal-spinner {
animation: gradient 1s ease infinite;
}
}
@keyframes gradient {
0% {
background-position: 0% 50%;
}
50% {
background-position: 100% 50%;
}
100% {
background-position: 0% 50%;
}
}
.pure-menu-heading { .pure-menu-heading {
color: var(--color-text-menu-heading); color: var(--color-text-menu-heading);
} }

@ -573,9 +573,26 @@ button.toggle-button {
padding: 5px; padding: 5px;
display: flex; display: flex;
justify-content: space-between; justify-content: space-between;
border-bottom: 2px solid var(--color-menu-accent);
align-items: center; } align-items: center; }
#pure-menu-horizontal-spinner {
height: 3px;
background: linear-gradient(-75deg, #ff6000, #ff8f00, #ffdd00, #ed0000);
background-size: 400% 400%;
width: 100%;
animation: gradient 200s ease infinite; }
body.spinner-active #pure-menu-horizontal-spinner {
animation: gradient 1s ease infinite; }
@keyframes gradient {
0% {
background-position: 0% 50%; }
50% {
background-position: 100% 50%; }
100% {
background-position: 0% 50%; } }
.pure-menu-heading { .pure-menu-heading {
color: var(--color-text-menu-heading); } color: var(--color-text-menu-heading); }

@ -35,7 +35,9 @@
<body class=""> <body class="">
<div class="header"> <div class="header">
<div class="home-menu pure-menu pure-menu-horizontal pure-menu-fixed" id="nav-menu"> <div class="pure-menu-fixed" style="width: 100%;">
<div class="home-menu pure-menu pure-menu-horizontal" id="nav-menu">
{% if has_password and not current_user.is_authenticated %} {% if has_password and not current_user.is_authenticated %}
<a class="pure-menu-heading" href="https://changedetection.io" rel="noopener"> <a class="pure-menu-heading" href="https://changedetection.io" rel="noopener">
<strong>Change</strong>Detection.io</a> <strong>Change</strong>Detection.io</a>
@ -129,7 +131,12 @@
</li> </li>
</ul> </ul>
</div> </div>
<div id="pure-menu-horizontal-spinner"></div>
</div>
</div> </div>
{% if hosted_sticky %} {% if hosted_sticky %}
<div class="sticky-tab" id="hosted-sticky"> <div class="sticky-tab" id="hosted-sticky">
<a href="https://changedetection.io/?ref={{guid}}">Let us host your instance!</a> <a href="https://changedetection.io/?ref={{guid}}">Let us host your instance!</a>

@ -398,7 +398,9 @@ Unavailable") }}
</fieldset> </fieldset>
<fieldset> <fieldset>
<div class="pure-control-group"> <div class="pure-control-group">
{{ render_field(form.extract_text, rows=5, placeholder="\d+ online") }} {{ render_field(form.extract_text, rows=5, placeholder="/.+?\d+ comments.+?/
or
keyword") }}
<span class="pure-form-message-inline"> <span class="pure-form-message-inline">
<ul> <ul>
<li>Extracts text in the final output (line by line) after other filters using regular expressions or string match; <li>Extracts text in the final output (line by line) after other filters using regular expressions or string match;
@ -434,7 +436,6 @@ Unavailable") }}
</div> </div>
</div> </div>
</div> </div>
<div class="spinner" style="display: none;" id="text-preview-spinner"></div>
</div> </div>
</div> </div>
</div> </div>

Loading…
Cancel
Save