Merge branch 'master' into store-watch-configs-in-own-dir

store-watch-configs-in-own-dir
dgtlmoon 4 months ago
commit 52df602af7

@ -0,0 +1,78 @@
# include the decorator
from apprise.decorators import notify
@notify(on="delete")
@notify(on="deletes")
@notify(on="get")
@notify(on="gets")
@notify(on="post")
@notify(on="posts")
@notify(on="put")
@notify(on="puts")
def apprise_custom_api_call_wrapper(body, title, notify_type, *args, **kwargs):
import requests
import json
from apprise.utils import parse_url as apprise_parse_url
from apprise import URLBase
url = kwargs['meta'].get('url')
if url.startswith('post'):
r = requests.post
elif url.startswith('get'):
r = requests.get
elif url.startswith('put'):
r = requests.put
elif url.startswith('delete'):
r = requests.delete
url = url.replace('post://', 'http://')
url = url.replace('posts://', 'https://')
url = url.replace('put://', 'http://')
url = url.replace('puts://', 'https://')
url = url.replace('get://', 'http://')
url = url.replace('gets://', 'https://')
url = url.replace('put://', 'http://')
url = url.replace('puts://', 'https://')
url = url.replace('delete://', 'http://')
url = url.replace('deletes://', 'https://')
headers = {}
params = {}
auth = None
# Convert /foobar?+some-header=hello to proper header dictionary
results = apprise_parse_url(url)
if results:
# Add our headers that the user can potentially over-ride if they wish
# to to our returned result set and tidy entries by unquoting them
headers = {URLBase.unquote(x): URLBase.unquote(y)
for x, y in results['qsd+'].items()}
# https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation
# In Apprise, it relies on prefixing each request arg with "-", because it uses say &method=update as a flag for apprise
# but here we are making straight requests, so we need todo convert this against apprise's logic
for k, v in results['qsd'].items():
if not k.strip('+-') in results['qsd+'].keys():
params[URLBase.unquote(k)] = URLBase.unquote(v)
# Determine Authentication
auth = ''
if results.get('user') and results.get('password'):
auth = (URLBase.unquote(results.get('user')), URLBase.unquote(results.get('user')))
elif results.get('user'):
auth = (URLBase.unquote(results.get('user')))
# Try to auto-guess if it's JSON
try:
json.loads(body)
headers['Content-Type'] = 'application/json; charset=utf-8'
except ValueError as e:
pass
r(results.get('url'),
auth=auth,
data=body.encode('utf-8') if type(body) is str else body,
headers=headers,
params=params
)

@ -25,6 +25,7 @@ browser_step_ui_config = {'Choose one': '0 0',
'Click element if exists': '1 0', 'Click element if exists': '1 0',
'Click element': '1 0', 'Click element': '1 0',
'Click element containing text': '0 1', 'Click element containing text': '0 1',
'Click element containing text if exists': '0 1',
'Enter text in field': '1 1', 'Enter text in field': '1 1',
'Execute JS': '0 1', 'Execute JS': '0 1',
# 'Extract text and use as filter': '1 0', # 'Extract text and use as filter': '1 0',
@ -96,12 +97,24 @@ class steppable_browser_interface():
return self.action_goto_url(value=self.start_url) return self.action_goto_url(value=self.start_url)
def action_click_element_containing_text(self, selector=None, value=''): def action_click_element_containing_text(self, selector=None, value=''):
logger.debug("Clicking element containing text")
if not len(value.strip()): if not len(value.strip()):
return return
elem = self.page.get_by_text(value) elem = self.page.get_by_text(value)
if elem.count(): if elem.count():
elem.first.click(delay=randint(200, 500), timeout=3000) elem.first.click(delay=randint(200, 500), timeout=3000)
def action_click_element_containing_text_if_exists(self, selector=None, value=''):
logger.debug("Clicking element containing text if exists")
if not len(value.strip()):
return
elem = self.page.get_by_text(value)
logger.debug(f"Clicking element containing text - {elem.count()} elements found")
if elem.count():
elem.first.click(delay=randint(200, 500), timeout=3000)
else:
return
def action_enter_text_in_field(self, selector, value): def action_enter_text_in_field(self, selector, value):
if not len(selector.strip()): if not len(selector.strip()):
return return

@ -58,9 +58,9 @@ xpath://body/div/span[contains(@class, 'example-class')]",
{% if '/text()' in field %} {% if '/text()' in field %}
<span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the &lt;element&gt; contains &lt;![CDATA[]]&gt;</strong></span><br> <span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the &lt;element&gt; contains &lt;![CDATA[]]&gt;</strong></span><br>
{% endif %} {% endif %}
<span class="pure-form-message-inline">One rule per line, <i>any</i> rules that matches will be used.<br> <span class="pure-form-message-inline">One CSS, xPath, JSON Path/JQ selector per line, <i>any</i> rules that matches will be used.<br>
<div data-target="#advanced-help-selectors" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</div>
<ul> <ul id="advanced-help-selectors">
<li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li> <li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
<li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed). <li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed).
<ul> <ul>
@ -89,11 +89,13 @@ xpath://body/div/span[contains(@class, 'example-class')]",
{{ render_field(form.subtractive_selectors, rows=5, placeholder="header {{ render_field(form.subtractive_selectors, rows=5, placeholder="header
footer footer
nav nav
.stockticker") }} .stockticker
//*[contains(text(), 'Advertisement')]") }}
<span class="pure-form-message-inline"> <span class="pure-form-message-inline">
<ul> <ul>
<li> Remove HTML element(s) by CSS selector before text conversion. </li> <li> Remove HTML element(s) by CSS and XPath selectors before text conversion. </li>
<li> Add multiple elements or CSS selectors per line to ignore multiple parts of the HTML. </li> <li> Don't paste HTML here, use only CSS and XPath selectors </li>
<li> Add multiple elements, CSS or XPath selectors per line to ignore multiple parts of the HTML. </li>
</ul> </ul>
</span> </span>
</fieldset> </fieldset>

@ -1,8 +1,6 @@
from loguru import logger from loguru import logger
import chardet
import hashlib import hashlib
import os import os
import requests
from changedetectionio import strtobool from changedetectionio import strtobool
from changedetectionio.content_fetchers.exceptions import BrowserStepsInUnsupportedFetcher, EmptyReply, Non200ErrorCodeReceived from changedetectionio.content_fetchers.exceptions import BrowserStepsInUnsupportedFetcher, EmptyReply, Non200ErrorCodeReceived
from changedetectionio.content_fetchers.base import Fetcher from changedetectionio.content_fetchers.base import Fetcher
@ -28,6 +26,9 @@ class fetcher(Fetcher):
is_binary=False, is_binary=False,
empty_pages_are_a_change=False): empty_pages_are_a_change=False):
import chardet
import requests
if self.browser_steps_get_valid_steps(): if self.browser_steps_get_valid_steps():
raise BrowserStepsInUnsupportedFetcher(url=url) raise BrowserStepsInUnsupportedFetcher(url=url)

@ -1,6 +1,8 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import datetime import datetime
import importlib
import flask_login import flask_login
import locale import locale
import os import os
@ -10,7 +12,9 @@ import threading
import time import time
import timeago import timeago
from .content_fetchers.exceptions import ReplyWithContentButNoText
from .processors import find_processors, get_parent_module, get_custom_watch_obj_for_processor from .processors import find_processors, get_parent_module, get_custom_watch_obj_for_processor
from .processors.text_json_diff.processor import FilterNotFoundInResponse
from .safe_jinja import render as jinja_render from .safe_jinja import render as jinja_render
from changedetectionio.strtobool import strtobool from changedetectionio.strtobool import strtobool
from copy import deepcopy from copy import deepcopy
@ -537,7 +541,8 @@ def changedetection_app(config=None, datastore_o=None):
import random import random
from .apprise_asset import asset from .apprise_asset import asset
apobj = apprise.Apprise(asset=asset) apobj = apprise.Apprise(asset=asset)
# so that the custom endpoints are registered
from changedetectionio.apprise_plugin import apprise_custom_api_call_wrapper
is_global_settings_form = request.args.get('mode', '') == 'global-settings' is_global_settings_form = request.args.get('mode', '') == 'global-settings'
is_group_settings_form = request.args.get('mode', '') == 'group-settings' is_group_settings_form = request.args.get('mode', '') == 'group-settings'
@ -1395,6 +1400,57 @@ def changedetection_app(config=None, datastore_o=None):
# Return a 500 error # Return a 500 error
abort(500) abort(500)
@app.route("/edit/<string:uuid>/preview-rendered", methods=['POST'])
@login_optionally_required
def watch_get_preview_rendered(uuid):
'''For when viewing the "preview" of the rendered text from inside of Edit'''
now = time.time()
import brotli
from . import forms
text_after_filter = ''
tmp_watch = deepcopy(datastore.data['watching'].get(uuid))
if tmp_watch and tmp_watch.history and os.path.isdir(tmp_watch.watch_data_dir):
# Splice in the temporary stuff from the form
form = forms.processor_text_json_diff_form(formdata=request.form if request.method == 'POST' else None,
data=request.form
)
# Only update vars that came in via the AJAX post
p = {k: v for k, v in form.data.items() if k in request.form.keys()}
tmp_watch.update(p)
latest_filename = next(reversed(tmp_watch.history))
html_fname = os.path.join(tmp_watch.watch_data_dir, f"{latest_filename}.html.br")
with open(html_fname, 'rb') as f:
decompressed_data = brotli.decompress(f.read()).decode('utf-8') if html_fname.endswith('.br') else f.read().decode('utf-8')
# Just like a normal change detection except provide a fake "watch" object and dont call .call_browser()
processor_module = importlib.import_module("changedetectionio.processors.text_json_diff.processor")
update_handler = processor_module.perform_site_check(datastore=datastore,
watch_uuid=uuid # probably not needed anymore anyway?
)
# Use the last loaded HTML as the input
update_handler.fetcher.content = decompressed_data
try:
changed_detected, update_obj, contents, text_after_filter = update_handler.run_changedetection(
watch=tmp_watch,
skip_when_checksum_same=False,
)
except FilterNotFoundInResponse as e:
text_after_filter = f"Filter not found in HTML: {str(e)}"
except ReplyWithContentButNoText as e:
text_after_filter = f"Filter found but no text (empty result)"
except Exception as e:
text_after_filter = f"Error: {str(e)}"
if not text_after_filter.strip():
text_after_filter = 'Empty content'
logger.trace(f"Parsed in {time.time()-now:.3f}s")
return text_after_filter.strip()
@app.route("/form/add/quickwatch", methods=['POST']) @app.route("/form/add/quickwatch", methods=['POST'])
@login_optionally_required @login_optionally_required
def form_quick_watch_add(): def form_quick_watch_add():

@ -221,7 +221,8 @@ class ValidateAppRiseServers(object):
def __call__(self, form, field): def __call__(self, form, field):
import apprise import apprise
apobj = apprise.Apprise() apobj = apprise.Apprise()
# so that the custom endpoints are registered
from changedetectionio.apprise_plugin import apprise_custom_api_call_wrapper
for server_url in field.data: for server_url in field.data:
if not apobj.add(server_url): if not apobj.add(server_url):
message = field.gettext('\'%s\' is not a valid AppRise URL.' % (server_url)) message = field.gettext('\'%s\' is not a valid AppRise URL.' % (server_url))
@ -468,7 +469,7 @@ class processor_text_json_diff_form(commonSettingsForm):
include_filters = StringListField('CSS/JSONPath/JQ/XPath Filters', [ValidateCSSJSONXPATHInput()], default='') include_filters = StringListField('CSS/JSONPath/JQ/XPath Filters', [ValidateCSSJSONXPATHInput()], default='')
subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)]) subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_json=False)])
extract_text = StringListField('Extract text', [ValidateListRegex()]) extract_text = StringListField('Extract text', [ValidateListRegex()])
@ -479,8 +480,10 @@ class processor_text_json_diff_form(commonSettingsForm):
body = TextAreaField('Request body', [validators.Optional()]) body = TextAreaField('Request body', [validators.Optional()])
method = SelectField('Request method', choices=valid_method, default=default_method) method = SelectField('Request method', choices=valid_method, default=default_method)
ignore_status_codes = BooleanField('Ignore status codes (process non-2xx status codes as normal)', default=False) ignore_status_codes = BooleanField('Ignore status codes (process non-2xx status codes as normal)', default=False)
check_unique_lines = BooleanField('Only trigger when unique lines appear', default=False) check_unique_lines = BooleanField('Only trigger when unique lines appear in all history', default=False)
remove_duplicate_lines = BooleanField('Remove duplicate lines of text', default=False)
sort_text_alphabetically = BooleanField('Sort text alphabetically', default=False) sort_text_alphabetically = BooleanField('Sort text alphabetically', default=False)
trim_text_whitespace = BooleanField('Trim whitespace before and after text', default=False)
filter_text_added = BooleanField('Added lines', default=True) filter_text_added = BooleanField('Added lines', default=True)
filter_text_replaced = BooleanField('Replaced/changed lines', default=True) filter_text_replaced = BooleanField('Replaced/changed lines', default=True)
@ -575,7 +578,7 @@ class globalSettingsApplicationForm(commonSettingsForm):
empty_pages_are_a_change = BooleanField('Treat empty pages as a change?', default=False) empty_pages_are_a_change = BooleanField('Treat empty pages as a change?', default=False)
fetch_backend = RadioField('Fetch Method', default="html_requests", choices=content_fetchers.available_fetchers(), validators=[ValidateContentFetcherIsReady()]) fetch_backend = RadioField('Fetch Method', default="html_requests", choices=content_fetchers.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
global_ignore_text = StringListField('Ignore Text', [ValidateListRegex()]) global_ignore_text = StringListField('Ignore Text', [ValidateListRegex()])
global_subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)]) global_subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_json=False)])
ignore_whitespace = BooleanField('Ignore whitespace') ignore_whitespace = BooleanField('Ignore whitespace')
password = SaltyPasswordField() password = SaltyPasswordField()
pager_size = IntegerField('Pager size', pager_size = IntegerField('Pager size',

@ -1,10 +1,5 @@
from bs4 import BeautifulSoup
from inscriptis import get_text
from jsonpath_ng.ext import parse
from typing import List from typing import List
from inscriptis.model.config import ParserConfig from lxml import etree
from xml.sax.saxutils import escape as xml_escape
import json import json
import re import re
@ -39,6 +34,7 @@ def perl_style_slash_enclosed_regex_to_options(regex):
# Given a CSS Rule, and a blob of HTML, return the blob of HTML that matches # Given a CSS Rule, and a blob of HTML, return the blob of HTML that matches
def include_filters(include_filters, html_content, append_pretty_line_formatting=False): def include_filters(include_filters, html_content, append_pretty_line_formatting=False):
from bs4 import BeautifulSoup
soup = BeautifulSoup(html_content, "html.parser") soup = BeautifulSoup(html_content, "html.parser")
html_block = "" html_block = ""
r = soup.select(include_filters, separator="") r = soup.select(include_filters, separator="")
@ -56,16 +52,32 @@ def include_filters(include_filters, html_content, append_pretty_line_formatting
return html_block return html_block
def subtractive_css_selector(css_selector, html_content): def subtractive_css_selector(css_selector, html_content):
from bs4 import BeautifulSoup
soup = BeautifulSoup(html_content, "html.parser") soup = BeautifulSoup(html_content, "html.parser")
for item in soup.select(css_selector): for item in soup.select(css_selector):
item.decompose() item.decompose()
return str(soup) return str(soup)
def subtractive_xpath_selector(xpath_selector, html_content):
html_tree = etree.HTML(html_content)
elements_to_remove = html_tree.xpath(xpath_selector)
for element in elements_to_remove:
element.getparent().remove(element)
modified_html = etree.tostring(html_tree, method="html").decode("utf-8")
return modified_html
def element_removal(selectors: List[str], html_content): def element_removal(selectors: List[str], html_content):
"""Joins individual filters into one css filter.""" """Removes elements that match a list of CSS or xPath selectors."""
selector = ",".join(selectors) modified_html = html_content
return subtractive_css_selector(selector, html_content) for selector in selectors:
if selector.startswith(('xpath:', 'xpath1:', '//')):
xpath_selector = selector.removeprefix('xpath:').removeprefix('xpath1:')
modified_html = subtractive_xpath_selector(xpath_selector, modified_html)
else:
modified_html = subtractive_css_selector(selector, modified_html)
return modified_html
def elementpath_tostring(obj): def elementpath_tostring(obj):
""" """
@ -181,6 +193,7 @@ def xpath1_filter(xpath_filter, html_content, append_pretty_line_formatting=Fals
# Extract/find element # Extract/find element
def extract_element(find='title', html_content=''): def extract_element(find='title', html_content=''):
from bs4 import BeautifulSoup
#Re #106, be sure to handle when its not found #Re #106, be sure to handle when its not found
element_text = None element_text = None
@ -194,6 +207,8 @@ def extract_element(find='title', html_content=''):
# #
def _parse_json(json_data, json_filter): def _parse_json(json_data, json_filter):
from jsonpath_ng.ext import parse
if json_filter.startswith("json:"): if json_filter.startswith("json:"):
jsonpath_expression = parse(json_filter.replace('json:', '')) jsonpath_expression = parse(json_filter.replace('json:', ''))
match = jsonpath_expression.find(json_data) match = jsonpath_expression.find(json_data)
@ -242,6 +257,8 @@ def _get_stripped_text_from_json_match(match):
# json_filter - ie json:$..price # json_filter - ie json:$..price
# ensure_is_ldjson_info_type - str "product", optional, "@type == product" (I dont know how to do that as a json selector) # ensure_is_ldjson_info_type - str "product", optional, "@type == product" (I dont know how to do that as a json selector)
def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None): def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None):
from bs4 import BeautifulSoup
stripped_text_from_html = False stripped_text_from_html = False
# https://github.com/dgtlmoon/changedetection.io/pull/2041#issuecomment-1848397161w # https://github.com/dgtlmoon/changedetection.io/pull/2041#issuecomment-1848397161w
# Try to parse/filter out the JSON, if we get some parser error, then maybe it's embedded within HTML tags # Try to parse/filter out the JSON, if we get some parser error, then maybe it's embedded within HTML tags
@ -352,6 +369,7 @@ def strip_ignore_text(content, wordlist, mode="content"):
return "\n".encode('utf8').join(output) return "\n".encode('utf8').join(output)
def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False) -> str: def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False) -> str:
from xml.sax.saxutils import escape as xml_escape
pattern = '<!\[CDATA\[(\s*(?:.(?<!\]\]>)\s*)*)\]\]>' pattern = '<!\[CDATA\[(\s*(?:.(?<!\]\]>)\s*)*)\]\]>'
def repl(m): def repl(m):
text = m.group(1) text = m.group(1)
@ -360,6 +378,9 @@ def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False
return re.sub(pattern, repl, html_content) return re.sub(pattern, repl, html_content)
def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=False) -> str: def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=False) -> str:
from inscriptis import get_text
from inscriptis.model.config import ParserConfig
"""Converts html string to a string with just the text. If ignoring """Converts html string to a string with just the text. If ignoring
rendering anchor tag content is enable, anchor tag content are also rendering anchor tag content is enable, anchor tag content are also
included in the text included in the text

@ -60,6 +60,8 @@ class watch_base(dict):
'time_between_check_use_default': True, 'time_between_check_use_default': True,
'title': None, 'title': None,
'track_ldjson_price_data': None, 'track_ldjson_price_data': None,
'trim_text_whitespace': False,
'remove_duplicate_lines': False,
'trigger_text': [], # List of text or regex to wait for until a change is detected 'trigger_text': [], # List of text or regex to wait for until a change is detected
'url': '', 'url': '',
'uuid': str(uuid.uuid4()), 'uuid': str(uuid.uuid4()),

@ -1,9 +1,10 @@
import apprise
import time import time
from apprise import NotifyFormat from apprise import NotifyFormat
import json import apprise
from loguru import logger from loguru import logger
valid_tokens = { valid_tokens = {
'base_url': '', 'base_url': '',
'current_snapshot': '', 'current_snapshot': '',
@ -34,86 +35,11 @@ valid_notification_formats = {
default_notification_format_for_watch: default_notification_format_for_watch default_notification_format_for_watch: default_notification_format_for_watch
} }
# include the decorator
from apprise.decorators import notify
@notify(on="delete")
@notify(on="deletes")
@notify(on="get")
@notify(on="gets")
@notify(on="post")
@notify(on="posts")
@notify(on="put")
@notify(on="puts")
def apprise_custom_api_call_wrapper(body, title, notify_type, *args, **kwargs):
import requests
from apprise.utils import parse_url as apprise_parse_url
from apprise import URLBase
url = kwargs['meta'].get('url')
if url.startswith('post'):
r = requests.post
elif url.startswith('get'):
r = requests.get
elif url.startswith('put'):
r = requests.put
elif url.startswith('delete'):
r = requests.delete
url = url.replace('post://', 'http://')
url = url.replace('posts://', 'https://')
url = url.replace('put://', 'http://')
url = url.replace('puts://', 'https://')
url = url.replace('get://', 'http://')
url = url.replace('gets://', 'https://')
url = url.replace('put://', 'http://')
url = url.replace('puts://', 'https://')
url = url.replace('delete://', 'http://')
url = url.replace('deletes://', 'https://')
headers = {}
params = {}
auth = None
# Convert /foobar?+some-header=hello to proper header dictionary
results = apprise_parse_url(url)
if results:
# Add our headers that the user can potentially over-ride if they wish
# to to our returned result set and tidy entries by unquoting them
headers = {URLBase.unquote(x): URLBase.unquote(y)
for x, y in results['qsd+'].items()}
# https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation
# In Apprise, it relies on prefixing each request arg with "-", because it uses say &method=update as a flag for apprise
# but here we are making straight requests, so we need todo convert this against apprise's logic
for k, v in results['qsd'].items():
if not k.strip('+-') in results['qsd+'].keys():
params[URLBase.unquote(k)] = URLBase.unquote(v)
# Determine Authentication
auth = ''
if results.get('user') and results.get('password'):
auth = (URLBase.unquote(results.get('user')), URLBase.unquote(results.get('user')))
elif results.get('user'):
auth = (URLBase.unquote(results.get('user')))
# Try to auto-guess if it's JSON
try:
json.loads(body)
headers['Content-Type'] = 'application/json; charset=utf-8'
except ValueError as e:
pass
r(results.get('url'),
auth=auth,
data=body.encode('utf-8') if type(body) is str else body,
headers=headers,
params=params
)
def process_notification(n_object, datastore): def process_notification(n_object, datastore):
# so that the custom endpoints are registered
from changedetectionio.apprise_plugin import apprise_custom_api_call_wrapper
from .safe_jinja import render as jinja_render from .safe_jinja import render as jinja_render
now = time.time() now = time.time()

@ -1,4 +1,6 @@
from abc import abstractmethod from abc import abstractmethod
from changedetectionio.content_fetchers.base import Fetcher
from changedetectionio.strtobool import strtobool from changedetectionio.strtobool import strtobool
from copy import deepcopy from copy import deepcopy
@ -23,10 +25,11 @@ class difference_detection_processor():
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
self.datastore = datastore self.datastore = datastore
self.watch = deepcopy(self.datastore.data['watching'].get(watch_uuid)) self.watch = deepcopy(self.datastore.data['watching'].get(watch_uuid))
# Generic fetcher that should be extended (requests, playwright etc)
self.fetcher = Fetcher()
def call_browser(self): def call_browser(self):
from requests.structures import CaseInsensitiveDict from requests.structures import CaseInsensitiveDict
from changedetectionio.content_fetchers.exceptions import EmptyReply
# Protect against file:// access # Protect against file:// access
if re.search(r'^file://', self.watch.get('url', '').strip(), re.IGNORECASE): if re.search(r'^file://', self.watch.get('url', '').strip(), re.IGNORECASE):
@ -159,7 +162,7 @@ class difference_detection_processor():
some_data = 'xxxxx' some_data = 'xxxxx'
update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest() update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest()
changed_detected = False changed_detected = False
return changed_detected, update_obj, ''.encode('utf-8') return changed_detected, update_obj, ''.encode('utf-8'), b''
def find_sub_packages(package_name): def find_sub_packages(package_name):

@ -2,8 +2,7 @@ from .. import difference_detection_processor
from ..exceptions import ProcessorException from ..exceptions import ProcessorException
from . import Restock from . import Restock
from loguru import logger from loguru import logger
import hashlib
import re
import urllib3 import urllib3
import time import time
@ -27,6 +26,25 @@ def _search_prop_by_value(matches, value):
if value in prop[0]: if value in prop[0]:
return prop[1] # Yield the desired value and exit the function return prop[1] # Yield the desired value and exit the function
def _deduplicate_prices(data):
seen = set()
unique_data = []
for datum in data:
# Convert 'value' to float if it can be a numeric string, otherwise leave it as is
try:
normalized_value = float(datum.value) if isinstance(datum.value, str) and datum.value.replace('.', '', 1).isdigit() else datum.value
except ValueError:
normalized_value = datum.value
# If the normalized value hasn't been seen yet, add it to unique data
if normalized_value not in seen:
unique_data.append(datum)
seen.add(normalized_value)
return unique_data
# should return Restock() # should return Restock()
# add casting? # add casting?
def get_itemprop_availability(html_content) -> Restock: def get_itemprop_availability(html_content) -> Restock:
@ -36,6 +54,7 @@ def get_itemprop_availability(html_content) -> Restock:
""" """
from jsonpath_ng import parse from jsonpath_ng import parse
import re
now = time.time() now = time.time()
import extruct import extruct
logger.trace(f"Imported extruct module in {time.time() - now:.3f}s") logger.trace(f"Imported extruct module in {time.time() - now:.3f}s")
@ -60,7 +79,7 @@ def get_itemprop_availability(html_content) -> Restock:
pricecurrency_parse = parse('$..(pricecurrency|currency|priceCurrency )') pricecurrency_parse = parse('$..(pricecurrency|currency|priceCurrency )')
availability_parse = parse('$..(availability|Availability)') availability_parse = parse('$..(availability|Availability)')
price_result = price_parse.find(data) price_result = _deduplicate_prices(price_parse.find(data))
if price_result: if price_result:
# Right now, we just support single product items, maybe we will store the whole actual metadata seperately in teh future and # Right now, we just support single product items, maybe we will store the whole actual metadata seperately in teh future and
# parse that for the UI? # parse that for the UI?
@ -122,6 +141,8 @@ class perform_site_check(difference_detection_processor):
xpath_data = None xpath_data = None
def run_changedetection(self, watch, skip_when_checksum_same=True): def run_changedetection(self, watch, skip_when_checksum_same=True):
import hashlib
if not watch: if not watch:
raise Exception("Watch no longer exists.") raise Exception("Watch no longer exists.")
@ -135,6 +156,20 @@ class perform_site_check(difference_detection_processor):
update_obj['content_type'] = self.fetcher.headers.get('Content-Type', '') update_obj['content_type'] = self.fetcher.headers.get('Content-Type', '')
update_obj["last_check_status"] = self.fetcher.get_last_status_code() update_obj["last_check_status"] = self.fetcher.get_last_status_code()
# Only try to process restock information (like scraping for keywords) if the page was actually rendered correctly.
# Otherwise it will assume "in stock" because nothing suggesting the opposite was found
from ...html_tools import html_to_text
text = html_to_text(self.fetcher.content)
logger.debug(f"Length of text after conversion: {len(text)}")
if not len(text):
from ...content_fetchers.exceptions import ReplyWithContentButNoText
raise ReplyWithContentButNoText(url=watch.link,
status_code=self.fetcher.get_last_status_code(),
screenshot=self.fetcher.screenshot,
html_content=self.fetcher.content,
xpath_data=self.fetcher.xpath_data
)
# Which restock settings to compare against? # Which restock settings to compare against?
restock_settings = watch.get('restock_settings', {}) restock_settings = watch.get('restock_settings', {})
@ -149,7 +184,7 @@ class perform_site_check(difference_detection_processor):
itemprop_availability = {} itemprop_availability = {}
try: try:
itemprop_availability = get_itemprop_availability(html_content=self.fetcher.content) itemprop_availability = get_itemprop_availability(self.fetcher.content)
except MoreThanOnePriceFound as e: except MoreThanOnePriceFound as e:
# Add the real data # Add the real data
raise ProcessorException(message="Cannot run, more than one price detected, this plugin is only for product pages with ONE product, try the content-change detection mode.", raise ProcessorException(message="Cannot run, more than one price detected, this plugin is only for product pages with ONE product, try the content-change detection mode.",
@ -263,4 +298,4 @@ class perform_site_check(difference_detection_processor):
# Always record the new checksum # Always record the new checksum
update_obj["previous_md5"] = fetched_md5 update_obj["previous_md5"] = fetched_md5
return changed_detected, update_obj, snapshot_content.encode('utf-8').strip() return changed_detected, update_obj, snapshot_content.encode('utf-8').strip(), b''

@ -36,6 +36,7 @@ class PDFToHTMLToolNotFound(ValueError):
class perform_site_check(difference_detection_processor): class perform_site_check(difference_detection_processor):
def run_changedetection(self, watch, skip_when_checksum_same=True): def run_changedetection(self, watch, skip_when_checksum_same=True):
changed_detected = False changed_detected = False
html_content = "" html_content = ""
screenshot = False # as bytes screenshot = False # as bytes
@ -175,13 +176,13 @@ class perform_site_check(difference_detection_processor):
html_content=self.fetcher.content, html_content=self.fetcher.content,
append_pretty_line_formatting=not watch.is_source_type_url, append_pretty_line_formatting=not watch.is_source_type_url,
is_rss=is_rss) is_rss=is_rss)
elif filter_rule.startswith('xpath1:'): elif filter_rule.startswith('xpath1:'):
html_content += html_tools.xpath1_filter(xpath_filter=filter_rule.replace('xpath1:', ''), html_content += html_tools.xpath1_filter(xpath_filter=filter_rule.replace('xpath1:', ''),
html_content=self.fetcher.content, html_content=self.fetcher.content,
append_pretty_line_formatting=not watch.is_source_type_url, append_pretty_line_formatting=not watch.is_source_type_url,
is_rss=is_rss) is_rss=is_rss)
else: else:
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
html_content += html_tools.include_filters(include_filters=filter_rule, html_content += html_tools.include_filters(include_filters=filter_rule,
html_content=self.fetcher.content, html_content=self.fetcher.content,
append_pretty_line_formatting=not watch.is_source_type_url) append_pretty_line_formatting=not watch.is_source_type_url)
@ -197,18 +198,23 @@ class perform_site_check(difference_detection_processor):
else: else:
# extract text # extract text
do_anchor = self.datastore.data["settings"]["application"].get("render_anchor_tag_content", False) do_anchor = self.datastore.data["settings"]["application"].get("render_anchor_tag_content", False)
stripped_text_from_html = \ stripped_text_from_html = html_tools.html_to_text(html_content=html_content,
html_tools.html_to_text( render_anchor_tag_content=do_anchor,
html_content=html_content, is_rss=is_rss) # 1874 activate the <title workaround hack
render_anchor_tag_content=do_anchor,
is_rss=is_rss # #1874 activate the <title workaround hack
) if watch.get('trim_text_whitespace'):
stripped_text_from_html = '\n'.join(line.strip() for line in stripped_text_from_html.replace("\n\n", "\n").splitlines())
if watch.get('sort_text_alphabetically') and stripped_text_from_html:
if watch.get('remove_duplicate_lines'):
stripped_text_from_html = '\n'.join(dict.fromkeys(line.strip() for line in stripped_text_from_html.replace("\n\n", "\n").splitlines()))
if watch.get('sort_text_alphabetically'):
# Note: Because a <p>something</p> will add an extra line feed to signify the paragraph gap # Note: Because a <p>something</p> will add an extra line feed to signify the paragraph gap
# we end up with 'Some text\n\n', sorting will add all those extra \n at the start, so we remove them here. # we end up with 'Some text\n\n', sorting will add all those extra \n at the start, so we remove them here.
stripped_text_from_html = stripped_text_from_html.replace('\n\n', '\n') stripped_text_from_html = stripped_text_from_html.replace("\n\n", "\n")
stripped_text_from_html = '\n'.join( sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower() )) stripped_text_from_html = '\n'.join(sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower()))
# Re #340 - return the content before the 'ignore text' was applied # Re #340 - return the content before the 'ignore text' was applied
text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8') text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8')
@ -236,7 +242,7 @@ class perform_site_check(difference_detection_processor):
# We had some content, but no differences were found # We had some content, but no differences were found
# Store our new file as the MD5 so it will trigger in the future # Store our new file as the MD5 so it will trigger in the future
c = hashlib.md5(text_content_before_ignored_filter.translate(None, b'\r\n\t ')).hexdigest() c = hashlib.md5(text_content_before_ignored_filter.translate(None, b'\r\n\t ')).hexdigest()
return False, {'previous_md5': c}, stripped_text_from_html.encode('utf-8') return False, {'previous_md5': c}, stripped_text_from_html.encode('utf-8'), stripped_text_from_html.encode('utf-8')
else: else:
stripped_text_from_html = rendered_diff stripped_text_from_html = rendered_diff
@ -290,7 +296,7 @@ class perform_site_check(difference_detection_processor):
for match in res: for match in res:
regex_matched_output += [match] + [b'\n'] regex_matched_output += [match] + [b'\n']
# Now we will only show what the regex matched ##########################################################
stripped_text_from_html = b'' stripped_text_from_html = b''
text_content_before_ignored_filter = b'' text_content_before_ignored_filter = b''
if regex_matched_output: if regex_matched_output:
@ -298,6 +304,8 @@ class perform_site_check(difference_detection_processor):
stripped_text_from_html = b''.join(regex_matched_output) stripped_text_from_html = b''.join(regex_matched_output)
text_content_before_ignored_filter = stripped_text_from_html text_content_before_ignored_filter = stripped_text_from_html
# Re #133 - if we should strip whitespaces from triggering the change detected comparison # Re #133 - if we should strip whitespaces from triggering the change detected comparison
if self.datastore.data['settings']['application'].get('ignore_whitespace', False): if self.datastore.data['settings']['application'].get('ignore_whitespace', False):
fetched_md5 = hashlib.md5(stripped_text_from_html.translate(None, b'\r\n\t ')).hexdigest() fetched_md5 = hashlib.md5(stripped_text_from_html.translate(None, b'\r\n\t ')).hexdigest()
@ -357,4 +365,4 @@ class perform_site_check(difference_detection_processor):
if not watch.get('previous_md5'): if not watch.get('previous_md5'):
watch['previous_md5'] = fetched_md5 watch['previous_md5'] = fetched_md5
return changed_detected, update_obj, text_content_before_ignored_filter return changed_detected, update_obj, text_content_before_ignored_filter, stripped_text_from_html

@ -16,25 +16,31 @@ echo "---------------------------------- SOCKS5 -------------------"
docker run --network changedet-network \ docker run --network changedet-network \
-v `pwd`/tests/proxy_socks5/proxies.json-example:/app/changedetectionio/test-datastore/proxies.json \ -v `pwd`/tests/proxy_socks5/proxies.json-example:/app/changedetectionio/test-datastore/proxies.json \
--rm \ --rm \
-e "FLASK_SERVER_NAME=cdio" \
--hostname cdio \
-e "SOCKSTEST=proxiesjson" \ -e "SOCKSTEST=proxiesjson" \
test-changedetectionio \ test-changedetectionio \
bash -c 'cd changedetectionio && pytest tests/proxy_socks5/test_socks5_proxy_sources.py' bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004 -s tests/proxy_socks5/test_socks5_proxy_sources.py'
# SOCKS5 related - by manually entering in UI # SOCKS5 related - by manually entering in UI
docker run --network changedet-network \ docker run --network changedet-network \
--rm \ --rm \
-e "FLASK_SERVER_NAME=cdio" \
--hostname cdio \
-e "SOCKSTEST=manual" \ -e "SOCKSTEST=manual" \
test-changedetectionio \ test-changedetectionio \
bash -c 'cd changedetectionio && pytest tests/proxy_socks5/test_socks5_proxy.py' bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004 -s tests/proxy_socks5/test_socks5_proxy.py'
# SOCKS5 related - test from proxies.json via playwright - NOTE- PLAYWRIGHT DOESNT SUPPORT AUTHENTICATING PROXY # SOCKS5 related - test from proxies.json via playwright - NOTE- PLAYWRIGHT DOESNT SUPPORT AUTHENTICATING PROXY
docker run --network changedet-network \ docker run --network changedet-network \
-e "SOCKSTEST=manual-playwright" \ -e "SOCKSTEST=manual-playwright" \
--hostname cdio \
-e "FLASK_SERVER_NAME=cdio" \
-v `pwd`/tests/proxy_socks5/proxies.json-example-noauth:/app/changedetectionio/test-datastore/proxies.json \ -v `pwd`/tests/proxy_socks5/proxies.json-example-noauth:/app/changedetectionio/test-datastore/proxies.json \
-e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" \ -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" \
--rm \ --rm \
test-changedetectionio \ test-changedetectionio \
bash -c 'cd changedetectionio && pytest tests/proxy_socks5/test_socks5_proxy_sources.py' bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004 -s tests/proxy_socks5/test_socks5_proxy_sources.py'
echo "socks5 server logs" echo "socks5 server logs"
docker logs socks5proxy docker logs socks5proxy

@ -18,9 +18,11 @@ $(document).ready(function () {
}); });
$("#notification-token-toggle").click(function (e) { $(".toggle-show").click(function (e) {
e.preventDefault(); e.preventDefault();
$('#notification-tokens-info').toggle(); let target = $(this).data('target');
$(target).toggle();
}); });
}); });

@ -12,6 +12,54 @@ function toggleOpacity(checkboxSelector, fieldSelector, inverted) {
checkbox.addEventListener('change', updateOpacity); checkbox.addEventListener('change', updateOpacity);
} }
(function($) {
// Object to store ongoing requests by namespace
const requests = {};
$.abortiveSingularAjax = function(options) {
const namespace = options.namespace || 'default';
// Abort the current request in this namespace if it's still ongoing
if (requests[namespace]) {
requests[namespace].abort();
}
// Start a new AJAX request and store its reference in the correct namespace
requests[namespace] = $.ajax(options);
// Return the current request in case it's needed
return requests[namespace];
};
})(jQuery);
function request_textpreview_update() {
if (!$('body').hasClass('preview-text-enabled')) {
return
}
const data = {};
$('textarea:visible, input:visible').each(function () {
const $element = $(this); // Cache the jQuery object for the current element
const name = $element.attr('name'); // Get the name attribute of the element
data[name] = $element.is(':checkbox') ? ($element.is(':checked') ? $element.val() : undefined) : $element.val();
});
$.abortiveSingularAjax({
type: "POST",
url: preview_text_edit_filters_url,
data: data,
namespace: 'watchEdit'
}).done(function (data) {
$('#filters-and-triggers #text-preview-inner').text(data);
}).fail(function (error) {
if (error.statusText === 'abort') {
console.log('Request was aborted due to a new request being fired.');
} else {
$('#filters-and-triggers #text-preview-inner').text('There was an error communicating with the server.');
}
})
}
$(document).ready(function () { $(document).ready(function () {
$('#notification-setting-reset-to-default').click(function (e) { $('#notification-setting-reset-to-default').click(function (e) {
$('#notification_title').val(''); $('#notification_title').val('');
@ -27,5 +75,23 @@ $(document).ready(function () {
toggleOpacity('#time_between_check_use_default', '#time_between_check', false); toggleOpacity('#time_between_check_use_default', '#time_between_check', false);
const vh = Math.max(document.documentElement.clientHeight || 0, window.innerHeight || 0);
$("#text-preview-inner").css('max-height', (vh-300)+"px");
var debounced_request_textpreview_update = request_textpreview_update.debounce(100);
$("#activate-text-preview").click(function (e) {
$(this).fadeOut();
$('body').toggleClass('preview-text-enabled')
request_textpreview_update();
$("#text-preview-refresh").click(function (e) {
request_textpreview_update();
});
$('textarea:visible').on('keyup blur', debounced_request_textpreview_update);
$('input:visible').on('keyup blur change', debounced_request_textpreview_update);
$("#filters-and-triggers-tab").on('click', debounced_request_textpreview_update);
});
}); });

@ -40,15 +40,29 @@
} }
} }
#browser-steps-fieldlist {
height: 100%;
overflow-y: scroll;
}
#browser-steps .flex-wrapper { #browser-steps .flex-wrapper {
display: flex; display: flex;
flex-flow: row; flex-flow: row;
height: 70vh; height: 70vh;
font-size: 80%;
#browser-steps-ui {
flex-grow: 1; /* Allow it to grow and fill the available space */
flex-shrink: 1; /* Allow it to shrink if needed */
flex-basis: 0; /* Start with 0 base width so it stretches as much as possible */
background-color: #eee;
border-radius: 5px;
}
#browser-steps-fieldlist {
flex-grow: 0; /* Don't allow it to grow */
flex-shrink: 0; /* Don't allow it to shrink */
flex-basis: auto; /* Base width is determined by the content */
max-width: 400px; /* Set a max width to prevent overflow */
padding-left: 1rem;
overflow-y: scroll;
}
} }
/* this is duplicate :( */ /* this is duplicate :( */

@ -0,0 +1,45 @@
body.preview-text-enabled {
#filters-and-triggers > div {
display: flex; /* Establishes Flexbox layout */
gap: 20px; /* Adds space between the columns */
position: relative; /* Ensures the sticky positioning is relative to this parent */
}
/* layout of the page */
#edit-text-filter, #text-preview {
flex: 1; /* Each column takes an equal amount of available space */
align-self: flex-start; /* Aligns the right column to the start, allowing it to maintain its content height */
}
#edit-text-filter {
#pro-tips {
display: none;
}
}
#text-preview {
position: sticky;
top: 25px;
display: block !important;
}
/* actual preview area */
#text-preview-inner {
background: var(--color-grey-900);
border: 1px solid var(--color-grey-600);
padding: 1rem;
color: #333;
font-family: "Courier New", Courier, monospace; /* Sets the font to a monospace type */
font-size: 12px;
overflow-x: scroll;
white-space: pre-wrap; /* Preserves whitespace and line breaks like <pre> */
overflow-wrap: break-word; /* Allows long words to break and wrap to the next line */
}
}
#activate-text-preview {
right: 0;
position: absolute;
z-index: 0;
box-shadow: 1px 1px 4px var(--color-shadow-jump);
}

@ -12,6 +12,7 @@
@import "parts/_darkmode"; @import "parts/_darkmode";
@import "parts/_menu"; @import "parts/_menu";
@import "parts/_love"; @import "parts/_love";
@import "parts/preview_text_filter";
body { body {
color: var(--color-text); color: var(--color-text);

@ -46,14 +46,31 @@
#browser_steps li > label { #browser_steps li > label {
display: none; } display: none; }
#browser-steps-fieldlist {
height: 100%;
overflow-y: scroll; }
#browser-steps .flex-wrapper { #browser-steps .flex-wrapper {
display: flex; display: flex;
flex-flow: row; flex-flow: row;
height: 70vh; } height: 70vh;
font-size: 80%; }
#browser-steps .flex-wrapper #browser-steps-ui {
flex-grow: 1;
/* Allow it to grow and fill the available space */
flex-shrink: 1;
/* Allow it to shrink if needed */
flex-basis: 0;
/* Start with 0 base width so it stretches as much as possible */
background-color: #eee;
border-radius: 5px; }
#browser-steps .flex-wrapper #browser-steps-fieldlist {
flex-grow: 0;
/* Don't allow it to grow */
flex-shrink: 0;
/* Don't allow it to shrink */
flex-basis: auto;
/* Base width is determined by the content */
max-width: 400px;
/* Set a max width to prevent overflow */
padding-left: 1rem;
overflow-y: scroll; }
/* this is duplicate :( */ /* this is duplicate :( */
#browsersteps-selector-wrapper { #browsersteps-selector-wrapper {
@ -411,6 +428,47 @@ html[data-darkmode="true"] #toggle-light-mode .icon-dark {
fill: #ff0000 !important; fill: #ff0000 !important;
transition: all ease 0.3s !important; } transition: all ease 0.3s !important; }
body.preview-text-enabled {
/* layout of the page */
/* actual preview area */ }
body.preview-text-enabled #filters-and-triggers > div {
display: flex;
/* Establishes Flexbox layout */
gap: 20px;
/* Adds space between the columns */
position: relative;
/* Ensures the sticky positioning is relative to this parent */ }
body.preview-text-enabled #edit-text-filter, body.preview-text-enabled #text-preview {
flex: 1;
/* Each column takes an equal amount of available space */
align-self: flex-start;
/* Aligns the right column to the start, allowing it to maintain its content height */ }
body.preview-text-enabled #edit-text-filter #pro-tips {
display: none; }
body.preview-text-enabled #text-preview {
position: sticky;
top: 25px;
display: block !important; }
body.preview-text-enabled #text-preview-inner {
background: var(--color-grey-900);
border: 1px solid var(--color-grey-600);
padding: 1rem;
color: #333;
font-family: "Courier New", Courier, monospace;
/* Sets the font to a monospace type */
font-size: 12px;
overflow-x: scroll;
white-space: pre-wrap;
/* Preserves whitespace and line breaks like <pre> */
overflow-wrap: break-word;
/* Allows long words to break and wrap to the next line */ }
#activate-text-preview {
right: 0;
position: absolute;
z-index: 0;
box-shadow: 1px 1px 4px var(--color-shadow-jump); }
body { body {
color: var(--color-text); color: var(--color-text);
background: var(--color-background-page); background: var(--color-background-page);
@ -1194,11 +1252,9 @@ ul {
color: #fff; color: #fff;
opacity: 0.7; } opacity: 0.7; }
.restock-label svg { .restock-label svg {
vertical-align: middle; } vertical-align: middle; }
#chrome-extension-link { #chrome-extension-link {
padding: 9px; padding: 9px;
border: 1px solid var(--color-grey-800); border: 1px solid var(--color-grey-800);

@ -13,7 +13,6 @@ from threading import Lock
import json import json
import os import os
import re import re
import requests
import secrets import secrets
import threading import threading
import time import time
@ -281,6 +280,7 @@ class ChangeDetectionStore:
self.needs_write_urgent = True self.needs_write_urgent = True
def add_watch(self, url, tag='', extras=None, tag_uuids=None, write_to_disk_now=True): def add_watch(self, url, tag='', extras=None, tag_uuids=None, write_to_disk_now=True):
import requests
if extras is None: if extras is None:
extras = {} extras = {}

@ -11,8 +11,11 @@
class="notification-urls" ) class="notification-urls" )
}} }}
<div class="pure-form-message-inline"> <div class="pure-form-message-inline">
<ul> <p>
<li>Use <a target=_new href="https://github.com/caronc/apprise">AppRise URLs</a> for notification to just about any service! <i><a target=_new href="https://github.com/dgtlmoon/changedetection.io/wiki/Notification-configuration-notes">Please read the notification services wiki here for important configuration notes</a></i>.</li> <strong>Tip:</strong> Use <a target=_new href="https://github.com/caronc/apprise">AppRise Notification URLs</a> for notification to just about any service! <i><a target=_new href="https://github.com/dgtlmoon/changedetection.io/wiki/Notification-configuration-notes">Please read the notification services wiki here for important configuration notes</a></i>.<br>
</p>
<div data-target="#advanced-help-notifications" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</div>
<ul style="display: none" id="advanced-help-notifications">
<li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_discord">discord://</a></code> (or <code>https://discord.com/api/webhooks...</code>)) only supports a maximum <strong>2,000 characters</strong> of notification text, including the title.</li> <li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_discord">discord://</a></code> (or <code>https://discord.com/api/webhooks...</code>)) only supports a maximum <strong>2,000 characters</strong> of notification text, including the title.</li>
<li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_telegram">tgram://</a></code> bots can't send messages to other bots, so you should specify chat ID of non-bot user.</li> <li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_telegram">tgram://</a></code> bots can't send messages to other bots, so you should specify chat ID of non-bot user.</li>
<li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_telegram">tgram://</a></code> only supports very limited HTML and can fail when extra tags are sent, <a href="https://core.telegram.org/bots/api#html-style">read more here</a> (or use plaintext/markdown format)</li> <li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_telegram">tgram://</a></code> only supports very limited HTML and can fail when extra tags are sent, <a href="https://core.telegram.org/bots/api#html-style">read more here</a> (or use plaintext/markdown format)</li>
@ -40,7 +43,7 @@
</div> </div>
<div class="pure-controls"> <div class="pure-controls">
<div id="notification-token-toggle" class="pure-button button-tag button-xsmall">Show token/placeholders</div> <div data-target="#notification-tokens-info" class="toggle-show pure-button button-tag button-xsmall">Show token/placeholders</div>
</div> </div>
<div class="pure-controls" style="display: none;" id="notification-tokens-info"> <div class="pure-controls" style="display: none;" id="notification-tokens-info">
<table class="pure-table" id="token-table"> <table class="pure-table" id="token-table">

@ -33,7 +33,7 @@
<script src="{{url_for('static_content', group='js', filename='csrf.js')}}" defer></script> <script src="{{url_for('static_content', group='js', filename='csrf.js')}}" defer></script>
</head> </head>
<body> <body class="">
<div class="header"> <div class="header">
<div class="home-menu pure-menu pure-menu-horizontal pure-menu-fixed" id="nav-menu"> <div class="home-menu pure-menu pure-menu-horizontal pure-menu-fixed" id="nav-menu">
{% if has_password and not current_user.is_authenticated %} {% if has_password and not current_user.is_authenticated %}

@ -4,6 +4,7 @@
{% from '_common_fields.html' import render_common_settings_form %} {% from '_common_fields.html' import render_common_settings_form %}
<script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script> <script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
<script src="{{url_for('static_content', group='js', filename='vis.js')}}" defer></script> <script src="{{url_for('static_content', group='js', filename='vis.js')}}" defer></script>
<script src="{{url_for('static_content', group='js', filename='global-settings.js')}}" defer></script>
<script> <script>
const browser_steps_available_screenshots=JSON.parse('{{ watch.get_browsersteps_available_screenshots|tojson }}'); const browser_steps_available_screenshots=JSON.parse('{{ watch.get_browsersteps_available_screenshots|tojson }}');
const browser_steps_config=JSON.parse('{{ browser_steps_config|tojson }}'); const browser_steps_config=JSON.parse('{{ browser_steps_config|tojson }}');
@ -49,7 +50,7 @@
{% endif %} {% endif %}
{% if watch['processor'] == 'text_json_diff' %} {% if watch['processor'] == 'text_json_diff' %}
<li class="tab"><a id="visualselector-tab" href="#visualselector">Visual Filter Selector</a></li> <li class="tab"><a id="visualselector-tab" href="#visualselector">Visual Filter Selector</a></li>
<li class="tab"><a href="#filters-and-triggers">Filters &amp; Triggers</a></li> <li class="tab" id="filters-and-triggers-tab"><a href="#filters-and-triggers">Filters &amp; Triggers</a></li>
{% endif %} {% endif %}
<li class="tab"><a href="#notifications">Notifications</a></li> <li class="tab"><a href="#notifications">Notifications</a></li>
<li class="tab"><a href="#stats">Stats</a></li> <li class="tab"><a href="#stats">Stats</a></li>
@ -199,7 +200,7 @@ User-Agent: wonderbra 1.0") }}
<div id="loading-status-text" style="display: none;">Please wait, first browser step can take a little time to load..<div class="spinner"></div></div> <div id="loading-status-text" style="display: none;">Please wait, first browser step can take a little time to load..<div class="spinner"></div></div>
<div class="flex-wrapper" > <div class="flex-wrapper" >
<div id="browser-steps-ui" class="noselect" style="width: 100%; background-color: #eee; border-radius: 5px;"> <div id="browser-steps-ui" class="noselect">
<div class="noselect" id="browsersteps-selector-wrapper" style="width: 100%"> <div class="noselect" id="browsersteps-selector-wrapper" style="width: 100%">
<span class="loader" > <span class="loader" >
@ -214,7 +215,7 @@ User-Agent: wonderbra 1.0") }}
<canvas class="noselect" id="browsersteps-selector-canvas" style="max-width: 100%; width: 100%;"></canvas> <canvas class="noselect" id="browsersteps-selector-canvas" style="max-width: 100%; width: 100%;"></canvas>
</div> </div>
</div> </div>
<div id="browser-steps-fieldlist" style="padding-left: 1em; width: 350px; font-size: 80%;" > <div id="browser-steps-fieldlist" >
<span id="browser-seconds-remaining">Loading</span> <span style="font-size: 80%;"> (<a target=_new href="https://github.com/dgtlmoon/changedetection.io/pull/478/files#diff-1a79d924d1840c485238e66772391268a89c95b781d69091384cf1ea1ac146c9R4">?</a>) </span> <span id="browser-seconds-remaining">Loading</span> <span style="font-size: 80%;"> (<a target=_new href="https://github.com/dgtlmoon/changedetection.io/pull/478/files#diff-1a79d924d1840c485238e66772391268a89c95b781d69091384cf1ea1ac146c9R4">?</a>) </span>
{{ render_field(form.browser_steps) }} {{ render_field(form.browser_steps) }}
</div> </div>
@ -253,7 +254,10 @@ User-Agent: wonderbra 1.0") }}
{% if watch['processor'] == 'text_json_diff' %} {% if watch['processor'] == 'text_json_diff' %}
<div class="tab-pane-inner" id="filters-and-triggers"> <div class="tab-pane-inner" id="filters-and-triggers">
<div class="pure-control-group"> <span id="activate-text-preview" class="pure-button pure-button-primary button-xsmall">Activate preview</span>
<div>
<div id="edit-text-filter">
<div class="pure-control-group" id="pro-tips">
<strong>Pro-tips:</strong><br> <strong>Pro-tips:</strong><br>
<ul> <ul>
<li> <li>
@ -275,9 +279,9 @@ xpath://body/div/span[contains(@class, 'example-class')]",
{% if '/text()' in field %} {% if '/text()' in field %}
<span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the &lt;element&gt; contains &lt;![CDATA[]]&gt;</strong></span><br> <span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the &lt;element&gt; contains &lt;![CDATA[]]&gt;</strong></span><br>
{% endif %} {% endif %}
<span class="pure-form-message-inline">One rule per line, <i>any</i> rules that matches will be used.<br> <span class="pure-form-message-inline">One CSS, xPath, JSON Path/JQ selector per line, <i>any</i> rules that matches will be used.<br>
<p><div data-target="#advanced-help-selectors" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</div><br></p>
<ul> <ul id="advanced-help-selectors" style="display: none;">
<li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li> <li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
<li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed). <li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed).
<ul> <ul>
@ -297,21 +301,25 @@ xpath://body/div/span[contains(@class, 'example-class')]",
<li>To use XPath1.0: Prefix with <code>xpath1:</code></li> <li>To use XPath1.0: Prefix with <code>xpath1:</code></li>
</ul> </ul>
</li> </li>
</ul> <li>
Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath{% if jq_support %}, or jq selector{%endif%} rules before filing an issue on GitHub! <a Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath{% if jq_support %}, or jq selector{%endif%} rules before filing an issue on GitHub! <a
href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br> href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br>
</li>
</ul>
</span> </span>
</div> </div>
<fieldset class="pure-control-group"> <fieldset class="pure-control-group">
{{ render_field(form.subtractive_selectors, rows=5, placeholder=has_tag_filters_extra+"header {{ render_field(form.subtractive_selectors, rows=5, placeholder=has_tag_filters_extra+"header
footer footer
nav nav
.stockticker") }} .stockticker
//*[contains(text(), 'Advertisement')]") }}
<span class="pure-form-message-inline"> <span class="pure-form-message-inline">
<ul> <ul>
<li> Remove HTML element(s) by CSS selector before text conversion. </li> <li> Remove HTML element(s) by CSS and XPath selectors before text conversion. </li>
<li> Don't paste HTML here, use only CSS selectors </li> <li> Don't paste HTML here, use only CSS and XPath selectors </li>
<li> Add multiple elements or CSS selectors per line to ignore multiple parts of the HTML. </li> <li> Add multiple elements, CSS or XPath selectors per line to ignore multiple parts of the HTML. </li>
</ul> </ul>
</span> </span>
</fieldset> </fieldset>
@ -326,14 +334,21 @@ nav
<span class="pure-form-message-inline">So it's always better to select <strong>Added</strong>+<strong>Replaced</strong> when you're interested in new content.</span><br> <span class="pure-form-message-inline">So it's always better to select <strong>Added</strong>+<strong>Replaced</strong> when you're interested in new content.</span><br>
<span class="pure-form-message-inline">When content is merely moved in a list, it will also trigger an <strong>addition</strong>, consider enabling <code><strong>Only trigger when unique lines appear</strong></code></span> <span class="pure-form-message-inline">When content is merely moved in a list, it will also trigger an <strong>addition</strong>, consider enabling <code><strong>Only trigger when unique lines appear</strong></code></span>
</fieldset> </fieldset>
<fieldset class="pure-control-group">
{{ render_checkbox_field(form.check_unique_lines) }}
<span class="pure-form-message-inline">Good for websites that just move the content around, and you want to know when NEW content is added, compares new lines against all history for this watch.</span>
</fieldset>
<fieldset class="pure-control-group">
{{ render_checkbox_field(form.remove_duplicate_lines) }}
<span class="pure-form-message-inline">Remove duplicate lines of text</span>
</fieldset>
<fieldset class="pure-control-group"> <fieldset class="pure-control-group">
{{ render_checkbox_field(form.sort_text_alphabetically) }} {{ render_checkbox_field(form.sort_text_alphabetically) }}
<span class="pure-form-message-inline">Helps reduce changes detected caused by sites shuffling lines around, combine with <i>check unique lines</i> below.</span> <span class="pure-form-message-inline">Helps reduce changes detected caused by sites shuffling lines around, combine with <i>check unique lines</i> below.</span>
</fieldset> </fieldset>
<fieldset class="pure-control-group"> <fieldset class="pure-control-group">
{{ render_checkbox_field(form.check_unique_lines) }} {{ render_checkbox_field(form.trim_text_whitespace) }}
<span class="pure-form-message-inline">Good for websites that just move the content around, and you want to know when NEW content is added, compares new lines against all history for this watch.</span> <span class="pure-form-message-inline">Remove any whitespace before and after each line of text</span>
</fieldset> </fieldset>
<fieldset> <fieldset>
<div class="pure-control-group"> <div class="pure-control-group">
@ -403,7 +418,19 @@ Unavailable") }}
</fieldset> </fieldset>
</div> </div>
</div> </div>
{% endif %} <div id="text-preview" style="display: none;" >
<script>
const preview_text_edit_filters_url="{{url_for('watch_get_preview_rendered', uuid=uuid)}}";
</script>
<span><strong>Preview of the text that is used for changedetection after all filters run.</strong></span><br>
{#<div id="text-preview-controls"><span id="text-preview-refresh" class="pure-button button-xsmall">Refresh</span></div>#}
<p>
<div id="text-preview-inner"></div>
</p>
</div>
</div>
</div>
{% endif %}
{# rendered sub Template #} {# rendered sub Template #}
{% if extra_form_content %} {% if extra_form_content %}
<div class="tab-pane-inner" id="extras_tab"> <div class="tab-pane-inner" id="extras_tab">

@ -155,11 +155,13 @@
{{ render_field(form.application.form.global_subtractive_selectors, rows=5, placeholder="header {{ render_field(form.application.form.global_subtractive_selectors, rows=5, placeholder="header
footer footer
nav nav
.stockticker") }} .stockticker
//*[contains(text(), 'Advertisement')]") }}
<span class="pure-form-message-inline"> <span class="pure-form-message-inline">
<ul> <ul>
<li> Remove HTML element(s) by CSS selector before text conversion. </li> <li> Remove HTML element(s) by CSS and XPath selectors before text conversion. </li>
<li> Add multiple elements or CSS selectors per line to ignore multiple parts of the HTML. </li> <li> Don't paste HTML here, use only CSS and XPath selectors </li>
<li> Add multiple elements, CSS or XPath selectors per line to ignore multiple parts of the HTML. </li>
</ul> </ul>
</span> </span>
</fieldset> </fieldset>

@ -1,12 +1,27 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import os import os
import time
from flask import url_for from flask import url_for
from changedetectionio.tests.util import live_server_setup, wait_for_all_checks from changedetectionio.tests.util import live_server_setup, wait_for_all_checks
def set_response():
import time
data = f"""<html>
<body>
<h1>Awesome, you made it</h1>
yeah the socks request worked
</body>
</html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(data)
time.sleep(1)
def test_socks5(client, live_server, measure_memory_usage): def test_socks5(client, live_server, measure_memory_usage):
live_server_setup(live_server) live_server_setup(live_server)
set_response()
# Setup a proxy # Setup a proxy
res = client.post( res = client.post(
@ -24,7 +39,10 @@ def test_socks5(client, live_server, measure_memory_usage):
assert b"Settings updated." in res.data assert b"Settings updated." in res.data
test_url = "https://changedetection.io/CHANGELOG.txt?socks-test-tag=" + os.getenv('SOCKSTEST', '') # Because the socks server should connect back to us
test_url = url_for('test_endpoint', _external=True) + f"?socks-test-tag={os.getenv('SOCKSTEST', '')}"
test_url = test_url.replace('localhost.localdomain', 'cdio')
test_url = test_url.replace('localhost', 'cdio')
res = client.post( res = client.post(
url_for("form_quick_watch_add"), url_for("form_quick_watch_add"),
@ -60,4 +78,4 @@ def test_socks5(client, live_server, measure_memory_usage):
) )
# Should see the proper string # Should see the proper string
assert "+0200:".encode('utf-8') in res.data assert "Awesome, you made it".encode('utf-8') in res.data

@ -1,16 +1,32 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import os import os
import time
from flask import url_for from flask import url_for
from changedetectionio.tests.util import live_server_setup, wait_for_all_checks from changedetectionio.tests.util import live_server_setup, wait_for_all_checks
def set_response():
import time
data = f"""<html>
<body>
<h1>Awesome, you made it</h1>
yeah the socks request worked
</body>
</html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(data)
time.sleep(1)
# should be proxies.json mounted from run_proxy_tests.sh already # should be proxies.json mounted from run_proxy_tests.sh already
# -v `pwd`/tests/proxy_socks5/proxies.json-example:/app/changedetectionio/test-datastore/proxies.json # -v `pwd`/tests/proxy_socks5/proxies.json-example:/app/changedetectionio/test-datastore/proxies.json
def test_socks5_from_proxiesjson_file(client, live_server, measure_memory_usage): def test_socks5_from_proxiesjson_file(client, live_server, measure_memory_usage):
live_server_setup(live_server) live_server_setup(live_server)
set_response()
test_url = "https://changedetection.io/CHANGELOG.txt?socks-test-tag=" + os.getenv('SOCKSTEST', '') # Because the socks server should connect back to us
test_url = url_for('test_endpoint', _external=True) + f"?socks-test-tag={os.getenv('SOCKSTEST', '')}"
test_url = test_url.replace('localhost.localdomain', 'cdio')
test_url = test_url.replace('localhost', 'cdio')
res = client.get(url_for("settings_page")) res = client.get(url_for("settings_page"))
assert b'name="requests-proxy" type="radio" value="socks5proxy"' in res.data assert b'name="requests-proxy" type="radio" value="socks5proxy"' in res.data
@ -49,4 +65,4 @@ def test_socks5_from_proxiesjson_file(client, live_server, measure_memory_usage)
) )
# Should see the proper string # Should see the proper string
assert "+0200:".encode('utf-8') in res.data assert "Awesome, you made it".encode('utf-8') in res.data

@ -2,7 +2,7 @@
import os import os
import time import time
from flask import url_for from flask import url_for
from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client, wait_for_notification_endpoint_output
from changedetectionio.notification import ( from changedetectionio.notification import (
default_notification_body, default_notification_body,
default_notification_format, default_notification_format,
@ -94,7 +94,7 @@ def test_restock_detection(client, live_server, measure_memory_usage):
assert b'not-in-stock' not in res.data assert b'not-in-stock' not in res.data
# We should have a notification # We should have a notification
time.sleep(2) wait_for_notification_endpoint_output()
assert os.path.isfile("test-datastore/notification.txt"), "Notification received" assert os.path.isfile("test-datastore/notification.txt"), "Notification received"
os.unlink("test-datastore/notification.txt") os.unlink("test-datastore/notification.txt")
@ -103,6 +103,7 @@ def test_restock_detection(client, live_server, measure_memory_usage):
set_original_response() set_original_response()
client.get(url_for("form_watch_checknow"), follow_redirects=True) client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client) wait_for_all_checks(client)
time.sleep(5)
assert not os.path.isfile("test-datastore/notification.txt"), "No notification should have fired when it went OUT OF STOCK by default" assert not os.path.isfile("test-datastore/notification.txt"), "No notification should have fired when it went OUT OF STOCK by default"
# BUT we should see that it correctly shows "not in stock" # BUT we should see that it correctly shows "not in stock"

@ -2,7 +2,7 @@
import os.path import os.path
import time import time
from flask import url_for from flask import url_for
from .util import live_server_setup, wait_for_all_checks from .util import live_server_setup, wait_for_all_checks, wait_for_notification_endpoint_output
from changedetectionio import html_tools from changedetectionio import html_tools
@ -165,7 +165,7 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa
assert b'unviewed' in res.data assert b'unviewed' in res.data
# Takes a moment for apprise to fire # Takes a moment for apprise to fire
time.sleep(3) wait_for_notification_endpoint_output()
assert os.path.isfile("test-datastore/notification.txt"), "Notification fired because I can see the output file" assert os.path.isfile("test-datastore/notification.txt"), "Notification fired because I can see the output file"
with open("test-datastore/notification.txt", 'rb') as f: with open("test-datastore/notification.txt", 'rb') as f:
response = f.read() response = f.read()

@ -87,6 +87,9 @@ def test_element_removal_output():
Some initial text<br> Some initial text<br>
<p>across multiple lines</p> <p>across multiple lines</p>
<div id="changetext">Some text that changes</div> <div id="changetext">Some text that changes</div>
<div>Some text should be matched by xPath // selector</div>
<div>Some text should be matched by xPath selector</div>
<div>Some text should be matched by xPath1 selector</div>
</body> </body>
<footer> <footer>
<p>Footer</p> <p>Footer</p>
@ -94,7 +97,16 @@ def test_element_removal_output():
</html> </html>
""" """
html_blob = element_removal( html_blob = element_removal(
["header", "footer", "nav", "#changetext"], html_content=content [
"header",
"footer",
"nav",
"#changetext",
"//*[contains(text(), 'xPath // selector')]",
"xpath://*[contains(text(), 'xPath selector')]",
"xpath1://*[contains(text(), 'xPath1 selector')]"
],
html_content=content
) )
text = get_text(html_blob) text = get_text(html_blob)
assert ( assert (

@ -4,7 +4,7 @@
import os import os
import time import time
from flask import url_for from flask import url_for
from .util import set_original_response, live_server_setup from .util import set_original_response, live_server_setup, wait_for_notification_endpoint_output
from changedetectionio.model import App from changedetectionio.model import App
@ -102,14 +102,15 @@ def test_filter_doesnt_exist_then_exists_should_get_notification(client, live_se
follow_redirects=True follow_redirects=True
) )
assert b"Updated watch." in res.data assert b"Updated watch." in res.data
time.sleep(3) wait_for_notification_endpoint_output()
# Shouldn't exist, shouldn't have fired # Shouldn't exist, shouldn't have fired
assert not os.path.isfile("test-datastore/notification.txt") assert not os.path.isfile("test-datastore/notification.txt")
# Now the filter should exist # Now the filter should exist
set_response_with_filter() set_response_with_filter()
client.get(url_for("form_watch_checknow"), follow_redirects=True) client.get(url_for("form_watch_checknow"), follow_redirects=True)
time.sleep(3)
wait_for_notification_endpoint_output()
assert os.path.isfile("test-datastore/notification.txt") assert os.path.isfile("test-datastore/notification.txt")

@ -1,7 +1,9 @@
import os import os
import time import time
from loguru import logger
from flask import url_for from flask import url_for
from .util import set_original_response, live_server_setup, extract_UUID_from_client, wait_for_all_checks from .util import set_original_response, live_server_setup, extract_UUID_from_client, wait_for_all_checks, \
wait_for_notification_endpoint_output
from changedetectionio.model import App from changedetectionio.model import App
@ -26,6 +28,12 @@ def run_filter_test(client, live_server, content_filter):
# Response WITHOUT the filter ID element # Response WITHOUT the filter ID element
set_original_response() set_original_response()
# Goto the edit page, add our ignore text
notification_url = url_for('test_notification_endpoint', _external=True).replace('http', 'json')
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
# cleanup for the next # cleanup for the next
client.get( client.get(
url_for("form_delete", uuid="all"), url_for("form_delete", uuid="all"),
@ -34,83 +42,92 @@ def run_filter_test(client, live_server, content_filter):
if os.path.isfile("test-datastore/notification.txt"): if os.path.isfile("test-datastore/notification.txt"):
os.unlink("test-datastore/notification.txt") os.unlink("test-datastore/notification.txt")
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
res = client.post( res = client.post(
url_for("form_quick_watch_add"), url_for("import_page"),
data={"url": test_url, "tags": ''}, data={"urls": test_url},
follow_redirects=True follow_redirects=True
) )
assert b"Watch added" in res.data assert b"1 Imported" in res.data
# Give the thread time to pick up the first version
wait_for_all_checks(client) wait_for_all_checks(client)
# Goto the edit page, add our ignore text uuid = extract_UUID_from_client(client)
# Add our URL to the import page
url = url_for('test_notification_endpoint', _external=True) assert live_server.app.config['DATASTORE'].data['watching'][uuid]['consecutive_filter_failures'] == 0, "No filter = No filter failure"
notification_url = url.replace('http', 'json')
watch_data = {"notification_urls": notification_url,
print(">>>> Notification URL: " + notification_url) "notification_title": "New ChangeDetection.io Notification - {{watch_url}}",
"notification_body": "BASE URL: {{base_url}}\n"
# Just a regular notification setting, this will be used by the special 'filter not found' notification "Watch URL: {{watch_url}}\n"
notification_form_data = {"notification_urls": notification_url, "Watch UUID: {{watch_uuid}}\n"
"notification_title": "New ChangeDetection.io Notification - {{watch_url}}", "Watch title: {{watch_title}}\n"
"notification_body": "BASE URL: {{base_url}}\n" "Watch tag: {{watch_tag}}\n"
"Watch URL: {{watch_url}}\n" "Preview: {{preview_url}}\n"
"Watch UUID: {{watch_uuid}}\n" "Diff URL: {{diff_url}}\n"
"Watch title: {{watch_title}}\n" "Snapshot: {{current_snapshot}}\n"
"Watch tag: {{watch_tag}}\n" "Diff: {{diff}}\n"
"Preview: {{preview_url}}\n" "Diff Full: {{diff_full}}\n"
"Diff URL: {{diff_url}}\n" "Diff as Patch: {{diff_patch}}\n"
"Snapshot: {{current_snapshot}}\n" ":-)",
"Diff: {{diff}}\n" "notification_format": "Text",
"Diff Full: {{diff_full}}\n" "fetch_backend": "html_requests",
"Diff as Patch: {{diff_patch}}\n" "filter_failure_notification_send": 'y',
":-)", "headers": "",
"notification_format": "Text"} "tags": "my tag",
"title": "my title 123",
notification_form_data.update({ "time_between_check-hours": 5, # So that the queue runner doesnt also put it in
"url": test_url, "url": test_url,
"tags": "my tag", }
"title": "my title 123",
"headers": "",
"filter_failure_notification_send": 'y',
"include_filters": content_filter,
"fetch_backend": "html_requests"})
# A POST here will also reset the filter failure counter (filter_failure_notification_threshold_attempts)
res = client.post( res = client.post(
url_for("edit_page", uuid="first"), url_for("edit_page", uuid=uuid),
data=notification_form_data, data=watch_data,
follow_redirects=True follow_redirects=True
) )
assert b"Updated watch." in res.data assert b"Updated watch." in res.data
wait_for_all_checks(client) wait_for_all_checks(client)
assert live_server.app.config['DATASTORE'].data['watching'][uuid]['consecutive_filter_failures'] == 0, "No filter = No filter failure"
# Now add a filter, because recheck hours == 5, ONLY pressing of the [edit] or [recheck all] should trigger
watch_data['include_filters'] = content_filter
res = client.post(
url_for("edit_page", uuid=uuid),
data=watch_data,
follow_redirects=True
)
assert b"Updated watch." in res.data
# Now the notification should not exist, because we didnt reach the threshold # It should have checked once so far and given this error (because we hit SAVE)
wait_for_all_checks(client)
assert not os.path.isfile("test-datastore/notification.txt") assert not os.path.isfile("test-datastore/notification.txt")
# Hitting [save] would have triggered a recheck, and we have a filter, so this would be ONE failure
assert live_server.app.config['DATASTORE'].data['watching'][uuid]['consecutive_filter_failures'] == 1, "Should have been checked once"
# recheck it up to just before the threshold, including the fact that in the previous POST it would have rechecked (and incremented) # recheck it up to just before the threshold, including the fact that in the previous POST it would have rechecked (and incremented)
for i in range(0, App._FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT-2): # Add 4 more checks
checked = 0
ATTEMPT_THRESHOLD_SETTING = live_server.app.config['DATASTORE'].data['settings']['application'].get('filter_failure_notification_threshold_attempts', 0)
for i in range(0, ATTEMPT_THRESHOLD_SETTING - 2):
checked += 1
client.get(url_for("form_watch_checknow"), follow_redirects=True) client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client) wait_for_all_checks(client)
time.sleep(2) # delay for apprise to fire res = client.get(url_for("index"))
assert not os.path.isfile("test-datastore/notification.txt"), f"test-datastore/notification.txt should not exist - Attempt {i} when threshold is {App._FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT}" assert b'Warning, no filters were found' in res.data
assert not os.path.isfile("test-datastore/notification.txt")
time.sleep(1)
# We should see something in the frontend assert live_server.app.config['DATASTORE'].data['watching'][uuid]['consecutive_filter_failures'] == 5
res = client.get(url_for("index"))
assert b'Warning, no filters were found' in res.data
time.sleep(2)
# One more check should trigger the _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT threshold # One more check should trigger the _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT threshold
client.get(url_for("form_watch_checknow"), follow_redirects=True) client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client) wait_for_all_checks(client)
time.sleep(2) # delay for apprise to fire wait_for_notification_endpoint_output()
# Now it should exist and contain our "filter not found" alert # Now it should exist and contain our "filter not found" alert
assert os.path.isfile("test-datastore/notification.txt") assert os.path.isfile("test-datastore/notification.txt")
with open("test-datastore/notification.txt", 'r') as f: with open("test-datastore/notification.txt", 'r') as f:
notification = f.read() notification = f.read()
@ -123,10 +140,11 @@ def run_filter_test(client, live_server, content_filter):
set_response_with_filter() set_response_with_filter()
# Try several times, it should NOT have 'filter not found' # Try several times, it should NOT have 'filter not found'
for i in range(0, App._FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT): for i in range(0, ATTEMPT_THRESHOLD_SETTING + 2):
client.get(url_for("form_watch_checknow"), follow_redirects=True) client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client) wait_for_all_checks(client)
wait_for_notification_endpoint_output()
# It should have sent a notification, but.. # It should have sent a notification, but..
assert os.path.isfile("test-datastore/notification.txt") assert os.path.isfile("test-datastore/notification.txt")
# but it should not contain the info about a failed filter (because there was none in this case) # but it should not contain the info about a failed filter (because there was none in this case)
@ -135,9 +153,6 @@ def run_filter_test(client, live_server, content_filter):
assert not 'CSS/xPath filter was not present in the page' in notification assert not 'CSS/xPath filter was not present in the page' in notification
# Re #1247 - All tokens got replaced correctly in the notification # Re #1247 - All tokens got replaced correctly in the notification
res = client.get(url_for("index"))
uuid = extract_UUID_from_client(client)
# UUID is correct, but notification contains tag uuid as UUIID wtf
assert uuid in notification assert uuid in notification
# cleanup for the next # cleanup for the next
@ -152,9 +167,11 @@ def test_setup(live_server):
live_server_setup(live_server) live_server_setup(live_server)
def test_check_include_filters_failure_notification(client, live_server, measure_memory_usage): def test_check_include_filters_failure_notification(client, live_server, measure_memory_usage):
# live_server_setup(live_server)
run_filter_test(client, live_server,'#nope-doesnt-exist') run_filter_test(client, live_server,'#nope-doesnt-exist')
def test_check_xpath_filter_failure_notification(client, live_server, measure_memory_usage): def test_check_xpath_filter_failure_notification(client, live_server, measure_memory_usage):
# live_server_setup(live_server)
run_filter_test(client, live_server, '//*[@id="nope-doesnt-exist"]') run_filter_test(client, live_server, '//*[@id="nope-doesnt-exist"]')
# Test that notification is never sent # Test that notification is never sent

@ -2,6 +2,8 @@
from flask import url_for from flask import url_for
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks
import time
def set_nonrenderable_response(): def set_nonrenderable_response():
test_return_data = """<html> test_return_data = """<html>
@ -11,17 +13,16 @@ def set_nonrenderable_response():
</body> </body>
</html> </html>
""" """
with open("test-datastore/endpoint-content.txt", "w") as f: with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data) f.write(test_return_data)
time.sleep(1)
return None return None
def set_zero_byte_response(): def set_zero_byte_response():
with open("test-datastore/endpoint-content.txt", "w") as f: with open("test-datastore/endpoint-content.txt", "w") as f:
f.write("") f.write("")
time.sleep(1)
return None return None
def test_check_basic_change_detection_functionality(client, live_server, measure_memory_usage): def test_check_basic_change_detection_functionality(client, live_server, measure_memory_usage):

@ -3,7 +3,7 @@ import os
import time import time
from flask import url_for from flask import url_for
from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client, wait_for_notification_endpoint_output
from ..notification import default_notification_format from ..notification import default_notification_format
instock_props = [ instock_props = [
@ -146,14 +146,13 @@ def _run_test_minmax_limit(client, extra_watch_edit_form):
data={"url": test_url, "tags": 'restock tests', 'processor': 'restock_diff'}, data={"url": test_url, "tags": 'restock tests', 'processor': 'restock_diff'},
follow_redirects=True follow_redirects=True
) )
# A change in price, should trigger a change by default
wait_for_all_checks(client) wait_for_all_checks(client)
data = { data = {
"tags": "", "tags": "",
"url": test_url, "url": test_url,
"headers": "", "headers": "",
"time_between_check-hours": 5,
'fetch_backend': "html_requests" 'fetch_backend': "html_requests"
} }
data.update(extra_watch_edit_form) data.update(extra_watch_edit_form)
@ -178,11 +177,9 @@ def _run_test_minmax_limit(client, extra_watch_edit_form):
assert b'1,000.45' or b'1000.45' in res.data #depending on locale assert b'1,000.45' or b'1000.45' in res.data #depending on locale
assert b'unviewed' not in res.data assert b'unviewed' not in res.data
# price changed to something LESS than min (900), SHOULD be a change # price changed to something LESS than min (900), SHOULD be a change
set_original_response(props_markup=instock_props[0], price='890.45') set_original_response(props_markup=instock_props[0], price='890.45')
# let previous runs wait
time.sleep(1)
res = client.get(url_for("form_watch_checknow"), follow_redirects=True) res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
assert b'1 watches queued for rechecking.' in res.data assert b'1 watches queued for rechecking.' in res.data
wait_for_all_checks(client) wait_for_all_checks(client)
@ -197,7 +194,8 @@ def _run_test_minmax_limit(client, extra_watch_edit_form):
client.get(url_for("form_watch_checknow"), follow_redirects=True) client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client) wait_for_all_checks(client)
res = client.get(url_for("index")) res = client.get(url_for("index"))
assert b'1,890.45' or b'1890.45' in res.data # Depending on the LOCALE it may be either of these (generally for US/default/etc)
assert b'1,890.45' in res.data or b'1890.45' in res.data
assert b'unviewed' in res.data assert b'unviewed' in res.data
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
@ -362,7 +360,7 @@ def test_change_with_notification_values(client, live_server):
set_original_response(props_markup=instock_props[0], price='1950.45') set_original_response(props_markup=instock_props[0], price='1950.45')
client.get(url_for("form_watch_checknow")) client.get(url_for("form_watch_checknow"))
wait_for_all_checks(client) wait_for_all_checks(client)
time.sleep(3) wait_for_notification_endpoint_output()
assert os.path.isfile("test-datastore/notification.txt"), "Notification received" assert os.path.isfile("test-datastore/notification.txt"), "Notification received"
with open("test-datastore/notification.txt", 'r') as f: with open("test-datastore/notification.txt", 'r') as f:
notification = f.read() notification = f.read()

@ -11,6 +11,8 @@ def set_original_ignore_response():
<p>Some initial text</p> <p>Some initial text</p>
<p>Which is across multiple lines</p> <p>Which is across multiple lines</p>
<p>So let's see what happens.</p> <p>So let's see what happens.</p>
<p>&nbsp; So let's see what happens. <br> </p>
<p>A - sortable line</p>
</body> </body>
</html> </html>
""" """
@ -166,3 +168,50 @@ def test_sort_lines_functionality(client, live_server, measure_memory_usage):
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data assert b'Deleted' in res.data
def test_extra_filters(client, live_server, measure_memory_usage):
#live_server_setup(live_server)
set_original_ignore_response()
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
wait_for_all_checks(client)
# Add our URL to the import page
res = client.post(
url_for("edit_page", uuid="first"),
data={"remove_duplicate_lines": "y",
"trim_text_whitespace": "y",
"sort_text_alphabetically": "", # leave this OFF for testing
"url": test_url,
"fetch_backend": "html_requests"},
follow_redirects=True
)
assert b"Updated watch." in res.data
# Give the thread time to pick it up
wait_for_all_checks(client)
# Trigger a check
client.get(url_for("form_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
wait_for_all_checks(client)
res = client.get(
url_for("preview_page", uuid="first")
)
assert res.data.count(b"see what happens.") == 1
# still should remain unsorted ('A - sortable line') stays at the end
assert res.data.find(b'A - sortable line') > res.data.find(b'Which is across multiple lines')
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data

@ -76,6 +76,18 @@ def set_more_modified_response():
return None return None
def wait_for_notification_endpoint_output():
'''Apprise can take a few seconds to fire'''
#@todo - could check the apprise object directly instead of looking for this file
from os.path import isfile
for i in range(1, 20):
time.sleep(1)
if isfile("test-datastore/notification.txt"):
return True
return False
# kinda funky, but works for now # kinda funky, but works for now
def extract_api_key_from_UI(client): def extract_api_key_from_UI(client):
import re import re

@ -189,7 +189,9 @@ class update_worker(threading.Thread):
'screenshot': None 'screenshot': None
}) })
self.notification_q.put(n_object) self.notification_q.put(n_object)
logger.error(f"Sent filter not found notification for {watch_uuid}") logger.debug(f"Sent filter not found notification for {watch_uuid}")
else:
logger.debug(f"NOT sending filter not found notification for {watch_uuid} - no notification URLs")
def send_step_failure_notification(self, watch_uuid, step_n): def send_step_failure_notification(self, watch_uuid, step_n):
watch = self.datastore.data['watching'].get(watch_uuid, False) watch = self.datastore.data['watching'].get(watch_uuid, False)
@ -276,7 +278,7 @@ class update_worker(threading.Thread):
update_handler.call_browser() update_handler.call_browser()
changed_detected, update_obj, contents = update_handler.run_changedetection( changed_detected, update_obj, contents, content_after_filters = update_handler.run_changedetection(
watch=watch, watch=watch,
skip_when_checksum_same=skip_when_same_checksum, skip_when_checksum_same=skip_when_same_checksum,
) )
@ -364,18 +366,22 @@ class update_worker(threading.Thread):
# Only when enabled, send the notification # Only when enabled, send the notification
if watch.get('filter_failure_notification_send', False): if watch.get('filter_failure_notification_send', False):
c = watch.get('consecutive_filter_failures', 5) c = watch.get('consecutive_filter_failures', 0)
c += 1 c += 1
# Send notification if we reached the threshold? # Send notification if we reached the threshold?
threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', 0)
0) logger.debug(f"Filter for {uuid} not found, consecutive_filter_failures: {c} of threshold {threshold}")
logger.warning(f"Filter for {uuid} not found, consecutive_filter_failures: {c}") if c >= threshold:
if threshold > 0 and c >= threshold:
if not watch.get('notification_muted'): if not watch.get('notification_muted'):
logger.debug(f"Sending filter failed notification for {uuid}")
self.send_filter_failure_notification(uuid) self.send_filter_failure_notification(uuid)
c = 0 c = 0
logger.debug(f"Reset filter failure count back to zero")
self.datastore.update_watch(uuid=uuid, update_obj={'consecutive_filter_failures': c}) self.datastore.update_watch(uuid=uuid, update_obj={'consecutive_filter_failures': c})
else:
logger.trace(f"{uuid} - filter_failure_notification_send not enabled, skipping")
process_changedetection_results = False process_changedetection_results = False
@ -422,7 +428,7 @@ class update_worker(threading.Thread):
) )
if watch.get('filter_failure_notification_send', False): if watch.get('filter_failure_notification_send', False):
c = watch.get('consecutive_filter_failures', 5) c = watch.get('consecutive_filter_failures', 0)
c += 1 c += 1
# Send notification if we reached the threshold? # Send notification if we reached the threshold?
threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts',

@ -35,7 +35,7 @@ dnspython==2.6.1 # related to eventlet fixes
# jq not available on Windows so must be installed manually # jq not available on Windows so must be installed manually
# Notification library # Notification library
apprise~=1.8.1 apprise==1.9.0
# apprise mqtt https://github.com/dgtlmoon/changedetection.io/issues/315 # apprise mqtt https://github.com/dgtlmoon/changedetection.io/issues/315
# and 2.0.0 https://github.com/dgtlmoon/changedetection.io/issues/2241 not yet compatible # and 2.0.0 https://github.com/dgtlmoon/changedetection.io/issues/2241 not yet compatible

Loading…
Cancel
Save