From 7f2fa20318182495a1992c6041529f52bf53b4c9 Mon Sep 17 00:00:00 2001
From: dgtlmoon
Date: Wed, 11 Sep 2024 14:51:32 +0200
Subject: [PATCH 1/6] Small memory allocation fixes (#2625)
---
changedetectionio/apprise/__init__.py | 79 +++++++++++++++++
.../content_fetchers/requests.py | 5 +-
changedetectionio/flask_app.py | 3 +-
changedetectionio/forms.py | 3 +-
changedetectionio/html_tools.py | 17 ++--
changedetectionio/notification.py | 85 ++-----------------
changedetectionio/store.py | 2 +-
7 files changed, 103 insertions(+), 91 deletions(-)
create mode 100644 changedetectionio/apprise/__init__.py
diff --git a/changedetectionio/apprise/__init__.py b/changedetectionio/apprise/__init__.py
new file mode 100644
index 00000000..130b1322
--- /dev/null
+++ b/changedetectionio/apprise/__init__.py
@@ -0,0 +1,79 @@
+
+# include the decorator
+from apprise.decorators import notify
+
+@notify(on="delete")
+@notify(on="deletes")
+@notify(on="get")
+@notify(on="gets")
+@notify(on="post")
+@notify(on="posts")
+@notify(on="put")
+@notify(on="puts")
+def apprise_custom_api_call_wrapper(body, title, notify_type, *args, **kwargs):
+ import requests
+ import json
+ from apprise.utils import parse_url as apprise_parse_url
+ from apprise import URLBase
+
+ url = kwargs['meta'].get('url')
+
+ if url.startswith('post'):
+ r = requests.post
+ elif url.startswith('get'):
+ r = requests.get
+ elif url.startswith('put'):
+ r = requests.put
+ elif url.startswith('delete'):
+ r = requests.delete
+
+ url = url.replace('post://', 'http://')
+ url = url.replace('posts://', 'https://')
+ url = url.replace('put://', 'http://')
+ url = url.replace('puts://', 'https://')
+ url = url.replace('get://', 'http://')
+ url = url.replace('gets://', 'https://')
+ url = url.replace('put://', 'http://')
+ url = url.replace('puts://', 'https://')
+ url = url.replace('delete://', 'http://')
+ url = url.replace('deletes://', 'https://')
+
+ headers = {}
+ params = {}
+ auth = None
+
+ # Convert /foobar?+some-header=hello to proper header dictionary
+ results = apprise_parse_url(url)
+ if results:
+ # Add our headers that the user can potentially over-ride if they wish
+ # to to our returned result set and tidy entries by unquoting them
+ headers = {URLBase.unquote(x): URLBase.unquote(y)
+ for x, y in results['qsd+'].items()}
+
+ # https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation
+ # In Apprise, it relies on prefixing each request arg with "-", because it uses say &method=update as a flag for apprise
+ # but here we are making straight requests, so we need todo convert this against apprise's logic
+ for k, v in results['qsd'].items():
+ if not k.strip('+-') in results['qsd+'].keys():
+ params[URLBase.unquote(k)] = URLBase.unquote(v)
+
+ # Determine Authentication
+ auth = ''
+ if results.get('user') and results.get('password'):
+ auth = (URLBase.unquote(results.get('user')), URLBase.unquote(results.get('user')))
+ elif results.get('user'):
+ auth = (URLBase.unquote(results.get('user')))
+
+ # Try to auto-guess if it's JSON
+ try:
+ json.loads(body)
+ headers['Content-Type'] = 'application/json; charset=utf-8'
+ except ValueError as e:
+ pass
+
+ r(results.get('url'),
+ auth=auth,
+ data=body.encode('utf-8') if type(body) is str else body,
+ headers=headers,
+ params=params
+ )
\ No newline at end of file
diff --git a/changedetectionio/content_fetchers/requests.py b/changedetectionio/content_fetchers/requests.py
index c39b2636..149d7f96 100644
--- a/changedetectionio/content_fetchers/requests.py
+++ b/changedetectionio/content_fetchers/requests.py
@@ -1,8 +1,6 @@
from loguru import logger
-import chardet
import hashlib
import os
-import requests
from changedetectionio import strtobool
from changedetectionio.content_fetchers.exceptions import BrowserStepsInUnsupportedFetcher, EmptyReply, Non200ErrorCodeReceived
from changedetectionio.content_fetchers.base import Fetcher
@@ -28,6 +26,9 @@ class fetcher(Fetcher):
is_binary=False,
empty_pages_are_a_change=False):
+ import chardet
+ import requests
+
if self.browser_steps_get_valid_steps():
raise BrowserStepsInUnsupportedFetcher(url=url)
diff --git a/changedetectionio/flask_app.py b/changedetectionio/flask_app.py
index fd12393a..6324b58b 100644
--- a/changedetectionio/flask_app.py
+++ b/changedetectionio/flask_app.py
@@ -537,7 +537,8 @@ def changedetection_app(config=None, datastore_o=None):
import random
from .apprise_asset import asset
apobj = apprise.Apprise(asset=asset)
-
+ # so that the custom endpoints are registered
+ from changedetectionio.apprise import apprise_custom_api_call_wrapper
is_global_settings_form = request.args.get('mode', '') == 'global-settings'
is_group_settings_form = request.args.get('mode', '') == 'group-settings'
diff --git a/changedetectionio/forms.py b/changedetectionio/forms.py
index b0b19f99..ce2841de 100644
--- a/changedetectionio/forms.py
+++ b/changedetectionio/forms.py
@@ -221,7 +221,8 @@ class ValidateAppRiseServers(object):
def __call__(self, form, field):
import apprise
apobj = apprise.Apprise()
-
+ # so that the custom endpoints are registered
+ from changedetectionio.apprise import apprise_custom_api_call_wrapper
for server_url in field.data:
if not apobj.add(server_url):
message = field.gettext('\'%s\' is not a valid AppRise URL.' % (server_url))
diff --git a/changedetectionio/html_tools.py b/changedetectionio/html_tools.py
index bd5fdb8f..ffe00cd0 100644
--- a/changedetectionio/html_tools.py
+++ b/changedetectionio/html_tools.py
@@ -1,10 +1,4 @@
-
-from bs4 import BeautifulSoup
-from inscriptis import get_text
-from jsonpath_ng.ext import parse
from typing import List
-from inscriptis.model.config import ParserConfig
-from xml.sax.saxutils import escape as xml_escape
import json
import re
@@ -39,6 +33,7 @@ def perl_style_slash_enclosed_regex_to_options(regex):
# Given a CSS Rule, and a blob of HTML, return the blob of HTML that matches
def include_filters(include_filters, html_content, append_pretty_line_formatting=False):
+ from bs4 import BeautifulSoup
soup = BeautifulSoup(html_content, "html.parser")
html_block = ""
r = soup.select(include_filters, separator="")
@@ -56,6 +51,7 @@ def include_filters(include_filters, html_content, append_pretty_line_formatting
return html_block
def subtractive_css_selector(css_selector, html_content):
+ from bs4 import BeautifulSoup
soup = BeautifulSoup(html_content, "html.parser")
for item in soup.select(css_selector):
item.decompose()
@@ -181,6 +177,7 @@ def xpath1_filter(xpath_filter, html_content, append_pretty_line_formatting=Fals
# Extract/find element
def extract_element(find='title', html_content=''):
+ from bs4 import BeautifulSoup
#Re #106, be sure to handle when its not found
element_text = None
@@ -194,6 +191,8 @@ def extract_element(find='title', html_content=''):
#
def _parse_json(json_data, json_filter):
+ from jsonpath_ng.ext import parse
+
if json_filter.startswith("json:"):
jsonpath_expression = parse(json_filter.replace('json:', ''))
match = jsonpath_expression.find(json_data)
@@ -242,6 +241,8 @@ def _get_stripped_text_from_json_match(match):
# json_filter - ie json:$..price
# ensure_is_ldjson_info_type - str "product", optional, "@type == product" (I dont know how to do that as a json selector)
def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None):
+ from bs4 import BeautifulSoup
+
stripped_text_from_html = False
# https://github.com/dgtlmoon/changedetection.io/pull/2041#issuecomment-1848397161w
# Try to parse/filter out the JSON, if we get some parser error, then maybe it's embedded within HTML tags
@@ -352,6 +353,7 @@ def strip_ignore_text(content, wordlist, mode="content"):
return "\n".encode('utf8').join(output)
def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False) -> str:
+ from xml.sax.saxutils import escape as xml_escape
pattern = ')\s*)*)\]\]>'
def repl(m):
text = m.group(1)
@@ -360,6 +362,9 @@ def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False
return re.sub(pattern, repl, html_content)
def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=False) -> str:
+ from inscriptis import get_text
+ from inscriptis.model.config import ParserConfig
+
"""Converts html string to a string with just the text. If ignoring
rendering anchor tag content is enable, anchor tag content are also
included in the text
diff --git a/changedetectionio/notification.py b/changedetectionio/notification.py
index d685ab1d..54e682ae 100644
--- a/changedetectionio/notification.py
+++ b/changedetectionio/notification.py
@@ -1,9 +1,10 @@
-import apprise
+
import time
from apprise import NotifyFormat
-import json
+import apprise
from loguru import logger
+
valid_tokens = {
'base_url': '',
'current_snapshot': '',
@@ -34,87 +35,11 @@ valid_notification_formats = {
default_notification_format_for_watch: default_notification_format_for_watch
}
-# include the decorator
-from apprise.decorators import notify
-
-@notify(on="delete")
-@notify(on="deletes")
-@notify(on="get")
-@notify(on="gets")
-@notify(on="post")
-@notify(on="posts")
-@notify(on="put")
-@notify(on="puts")
-def apprise_custom_api_call_wrapper(body, title, notify_type, *args, **kwargs):
- import requests
- from apprise.utils import parse_url as apprise_parse_url
- from apprise import URLBase
-
- url = kwargs['meta'].get('url')
-
- if url.startswith('post'):
- r = requests.post
- elif url.startswith('get'):
- r = requests.get
- elif url.startswith('put'):
- r = requests.put
- elif url.startswith('delete'):
- r = requests.delete
-
- url = url.replace('post://', 'http://')
- url = url.replace('posts://', 'https://')
- url = url.replace('put://', 'http://')
- url = url.replace('puts://', 'https://')
- url = url.replace('get://', 'http://')
- url = url.replace('gets://', 'https://')
- url = url.replace('put://', 'http://')
- url = url.replace('puts://', 'https://')
- url = url.replace('delete://', 'http://')
- url = url.replace('deletes://', 'https://')
-
- headers = {}
- params = {}
- auth = None
-
- # Convert /foobar?+some-header=hello to proper header dictionary
- results = apprise_parse_url(url)
- if results:
- # Add our headers that the user can potentially over-ride if they wish
- # to to our returned result set and tidy entries by unquoting them
- headers = {URLBase.unquote(x): URLBase.unquote(y)
- for x, y in results['qsd+'].items()}
-
- # https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation
- # In Apprise, it relies on prefixing each request arg with "-", because it uses say &method=update as a flag for apprise
- # but here we are making straight requests, so we need todo convert this against apprise's logic
- for k, v in results['qsd'].items():
- if not k.strip('+-') in results['qsd+'].keys():
- params[URLBase.unquote(k)] = URLBase.unquote(v)
-
- # Determine Authentication
- auth = ''
- if results.get('user') and results.get('password'):
- auth = (URLBase.unquote(results.get('user')), URLBase.unquote(results.get('user')))
- elif results.get('user'):
- auth = (URLBase.unquote(results.get('user')))
-
- # Try to auto-guess if it's JSON
- try:
- json.loads(body)
- headers['Content-Type'] = 'application/json; charset=utf-8'
- except ValueError as e:
- pass
-
- r(results.get('url'),
- auth=auth,
- data=body.encode('utf-8') if type(body) is str else body,
- headers=headers,
- params=params
- )
def process_notification(n_object, datastore):
-
+ # so that the custom endpoints are registered
+ from changedetectionio.apprise import apprise_custom_api_call_wrapper
from .safe_jinja import render as jinja_render
now = time.time()
if n_object.get('notification_timestamp'):
diff --git a/changedetectionio/store.py b/changedetectionio/store.py
index c3772557..cc1b335f 100644
--- a/changedetectionio/store.py
+++ b/changedetectionio/store.py
@@ -11,7 +11,6 @@ from threading import Lock
import json
import os
import re
-import requests
import secrets
import threading
import time
@@ -270,6 +269,7 @@ class ChangeDetectionStore:
self.needs_write_urgent = True
def add_watch(self, url, tag='', extras=None, tag_uuids=None, write_to_disk_now=True):
+ import requests
if extras is None:
extras = {}
From 19f3851c9ddf44ebf25bbe637271b3d7456aaf66 Mon Sep 17 00:00:00 2001
From: dgtlmoon
Date: Wed, 11 Sep 2024 16:20:49 +0200
Subject: [PATCH 2/6] Memory management improvements - LXML and other libraries
can leak allocation, wrap in a sub-process (#2626)
---
.../processors/restock_diff/processor.py | 14 +++++--
.../processors/text_json_diff/processor.py | 38 +++++++++++++------
2 files changed, 37 insertions(+), 15 deletions(-)
diff --git a/changedetectionio/processors/restock_diff/processor.py b/changedetectionio/processors/restock_diff/processor.py
index 1a3a96ca..3d8e4349 100644
--- a/changedetectionio/processors/restock_diff/processor.py
+++ b/changedetectionio/processors/restock_diff/processor.py
@@ -2,8 +2,7 @@ from .. import difference_detection_processor
from ..exceptions import ProcessorException
from . import Restock
from loguru import logger
-import hashlib
-import re
+
import urllib3
import time
@@ -36,6 +35,7 @@ def get_itemprop_availability(html_content) -> Restock:
"""
from jsonpath_ng import parse
+ import re
now = time.time()
import extruct
logger.trace(f"Imported extruct module in {time.time() - now:.3f}s")
@@ -122,6 +122,10 @@ class perform_site_check(difference_detection_processor):
xpath_data = None
def run_changedetection(self, watch, skip_when_checksum_same=True):
+ import hashlib
+
+ from concurrent.futures import ProcessPoolExecutor
+ from functools import partial
if not watch:
raise Exception("Watch no longer exists.")
@@ -149,7 +153,11 @@ class perform_site_check(difference_detection_processor):
itemprop_availability = {}
try:
- itemprop_availability = get_itemprop_availability(html_content=self.fetcher.content)
+ with ProcessPoolExecutor() as executor:
+ # Use functools.partial to create a callable with arguments
+ # anything using bs4/lxml etc is quite "leaky"
+ future = executor.submit(partial(get_itemprop_availability, self.fetcher.content))
+ itemprop_availability = future.result()
except MoreThanOnePriceFound as e:
# Add the real data
raise ProcessorException(message="Cannot run, more than one price detected, this plugin is only for product pages with ONE product, try the content-change detection mode.",
diff --git a/changedetectionio/processors/text_json_diff/processor.py b/changedetectionio/processors/text_json_diff/processor.py
index 1de5bafb..77c37131 100644
--- a/changedetectionio/processors/text_json_diff/processor.py
+++ b/changedetectionio/processors/text_json_diff/processor.py
@@ -36,6 +36,9 @@ class PDFToHTMLToolNotFound(ValueError):
class perform_site_check(difference_detection_processor):
def run_changedetection(self, watch, skip_when_checksum_same=True):
+ from concurrent.futures import ProcessPoolExecutor
+ from functools import partial
+
changed_detected = False
html_content = ""
screenshot = False # as bytes
@@ -171,20 +174,30 @@ class perform_site_check(difference_detection_processor):
for filter_rule in include_filters_rule:
# For HTML/XML we offer xpath as an option, just start a regular xPath "/.."
if filter_rule[0] == '/' or filter_rule.startswith('xpath:'):
- html_content += html_tools.xpath_filter(xpath_filter=filter_rule.replace('xpath:', ''),
+ with ProcessPoolExecutor() as executor:
+ # Use functools.partial to create a callable with arguments - anything using bs4/lxml etc is quite "leaky"
+ future = executor.submit(partial(html_tools.xpath_filter, xpath_filter=filter_rule.replace('xpath:', ''),
html_content=self.fetcher.content,
append_pretty_line_formatting=not watch.is_source_type_url,
- is_rss=is_rss)
+ is_rss=is_rss))
+ html_content += future.result()
+
elif filter_rule.startswith('xpath1:'):
- html_content += html_tools.xpath1_filter(xpath_filter=filter_rule.replace('xpath1:', ''),
+ with ProcessPoolExecutor() as executor:
+ # Use functools.partial to create a callable with arguments - anything using bs4/lxml etc is quite "leaky"
+ future = executor.submit(partial(html_tools.xpath1_filter, xpath_filter=filter_rule.replace('xpath1:', ''),
html_content=self.fetcher.content,
append_pretty_line_formatting=not watch.is_source_type_url,
- is_rss=is_rss)
+ is_rss=is_rss))
+ html_content += future.result()
else:
- # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
- html_content += html_tools.include_filters(include_filters=filter_rule,
+ with ProcessPoolExecutor() as executor:
+ # Use functools.partial to create a callable with arguments - anything using bs4/lxml etc is quite "leaky"
+ # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
+ future = executor.submit(partial(html_tools.include_filters, include_filters=filter_rule,
html_content=self.fetcher.content,
- append_pretty_line_formatting=not watch.is_source_type_url)
+ append_pretty_line_formatting=not watch.is_source_type_url))
+ html_content += future.result()
if not html_content.strip():
raise FilterNotFoundInResponse(msg=include_filters_rule, screenshot=self.fetcher.screenshot, xpath_data=self.fetcher.xpath_data)
@@ -197,12 +210,13 @@ class perform_site_check(difference_detection_processor):
else:
# extract text
do_anchor = self.datastore.data["settings"]["application"].get("render_anchor_tag_content", False)
- stripped_text_from_html = \
- html_tools.html_to_text(
- html_content=html_content,
+ with ProcessPoolExecutor() as executor:
+ # Use functools.partial to create a callable with arguments - anything using bs4/lxml etc is quite "leaky"
+ # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
+ future = executor.submit(partial(html_tools.html_to_text, html_content=html_content,
render_anchor_tag_content=do_anchor,
- is_rss=is_rss # #1874 activate the something
will add an extra line feed to signify the paragraph gap
From 8ecfd17973a07f2dc6a71bd8095f3113c9dab334 Mon Sep 17 00:00:00 2001
From: Felipe Tuffani <8229714+fetuffani@users.noreply.github.com>
Date: Tue, 17 Sep 2024 06:22:54 -0300
Subject: [PATCH 3/6] Restock/Price detection - Fix duplicated prices with
different data type on single page product #2636 (#2638)
---
.../processors/restock_diff/processor.py | 21 ++++++++++++++++++-
1 file changed, 20 insertions(+), 1 deletion(-)
diff --git a/changedetectionio/processors/restock_diff/processor.py b/changedetectionio/processors/restock_diff/processor.py
index 3d8e4349..88203b51 100644
--- a/changedetectionio/processors/restock_diff/processor.py
+++ b/changedetectionio/processors/restock_diff/processor.py
@@ -26,6 +26,25 @@ def _search_prop_by_value(matches, value):
if value in prop[0]:
return prop[1] # Yield the desired value and exit the function
+def _deduplicate_prices(data):
+ seen = set()
+ unique_data = []
+
+ for datum in data:
+ # Convert 'value' to float if it can be a numeric string, otherwise leave it as is
+ try:
+ normalized_value = float(datum.value) if isinstance(datum.value, str) and datum.value.replace('.', '', 1).isdigit() else datum.value
+ except ValueError:
+ normalized_value = datum.value
+
+ # If the normalized value hasn't been seen yet, add it to unique data
+ if normalized_value not in seen:
+ unique_data.append(datum)
+ seen.add(normalized_value)
+
+ return unique_data
+
+
# should return Restock()
# add casting?
def get_itemprop_availability(html_content) -> Restock:
@@ -60,7 +79,7 @@ def get_itemprop_availability(html_content) -> Restock:
pricecurrency_parse = parse('$..(pricecurrency|currency|priceCurrency )')
availability_parse = parse('$..(availability|Availability)')
- price_result = price_parse.find(data)
+ price_result = _deduplicate_prices(price_parse.find(data))
if price_result:
# Right now, we just support single product items, maybe we will store the whole actual metadata seperately in teh future and
# parse that for the UI?
From 273adfa0a460dcb615d811d814c361930e7b45d1 Mon Sep 17 00:00:00 2001
From: dgtlmoon
Date: Tue, 17 Sep 2024 16:55:04 +0200
Subject: [PATCH 4/6] Testing - Fix false filter missing check alerts
---
.../{apprise => apprise_plugin}/__init__.py | 1 -
changedetectionio/flask_app.py | 2 +-
changedetectionio/forms.py | 2 +-
changedetectionio/notification.py | 3 +-
.../tests/test_filter_failure_notification.py | 126 ++++++++++--------
changedetectionio/update_worker.py | 20 ++-
6 files changed, 86 insertions(+), 68 deletions(-)
rename changedetectionio/{apprise => apprise_plugin}/__init__.py (99%)
diff --git a/changedetectionio/apprise/__init__.py b/changedetectionio/apprise_plugin/__init__.py
similarity index 99%
rename from changedetectionio/apprise/__init__.py
rename to changedetectionio/apprise_plugin/__init__.py
index 130b1322..93c382fa 100644
--- a/changedetectionio/apprise/__init__.py
+++ b/changedetectionio/apprise_plugin/__init__.py
@@ -1,4 +1,3 @@
-
# include the decorator
from apprise.decorators import notify
diff --git a/changedetectionio/flask_app.py b/changedetectionio/flask_app.py
index 6324b58b..eb9d7799 100644
--- a/changedetectionio/flask_app.py
+++ b/changedetectionio/flask_app.py
@@ -538,7 +538,7 @@ def changedetection_app(config=None, datastore_o=None):
from .apprise_asset import asset
apobj = apprise.Apprise(asset=asset)
# so that the custom endpoints are registered
- from changedetectionio.apprise import apprise_custom_api_call_wrapper
+ from changedetectionio.apprise_plugin import apprise_custom_api_call_wrapper
is_global_settings_form = request.args.get('mode', '') == 'global-settings'
is_group_settings_form = request.args.get('mode', '') == 'group-settings'
diff --git a/changedetectionio/forms.py b/changedetectionio/forms.py
index ce2841de..1b718cfe 100644
--- a/changedetectionio/forms.py
+++ b/changedetectionio/forms.py
@@ -222,7 +222,7 @@ class ValidateAppRiseServers(object):
import apprise
apobj = apprise.Apprise()
# so that the custom endpoints are registered
- from changedetectionio.apprise import apprise_custom_api_call_wrapper
+ from changedetectionio.apprise_plugin import apprise_custom_api_call_wrapper
for server_url in field.data:
if not apobj.add(server_url):
message = field.gettext('\'%s\' is not a valid AppRise URL.' % (server_url))
diff --git a/changedetectionio/notification.py b/changedetectionio/notification.py
index 54e682ae..a7328857 100644
--- a/changedetectionio/notification.py
+++ b/changedetectionio/notification.py
@@ -39,7 +39,8 @@ valid_notification_formats = {
def process_notification(n_object, datastore):
# so that the custom endpoints are registered
- from changedetectionio.apprise import apprise_custom_api_call_wrapper
+ from changedetectionio.apprise_plugin import apprise_custom_api_call_wrapper
+
from .safe_jinja import render as jinja_render
now = time.time()
if n_object.get('notification_timestamp'):
diff --git a/changedetectionio/tests/test_filter_failure_notification.py b/changedetectionio/tests/test_filter_failure_notification.py
index 24ae405f..7d3a8bd0 100644
--- a/changedetectionio/tests/test_filter_failure_notification.py
+++ b/changedetectionio/tests/test_filter_failure_notification.py
@@ -1,5 +1,6 @@
import os
import time
+from loguru import logger
from flask import url_for
from .util import set_original_response, live_server_setup, extract_UUID_from_client, wait_for_all_checks, \
wait_for_notification_endpoint_output
@@ -27,6 +28,12 @@ def run_filter_test(client, live_server, content_filter):
# Response WITHOUT the filter ID element
set_original_response()
+ # Goto the edit page, add our ignore text
+ notification_url = url_for('test_notification_endpoint', _external=True).replace('http', 'json')
+
+ # Add our URL to the import page
+ test_url = url_for('test_endpoint', _external=True)
+
# cleanup for the next
client.get(
url_for("form_delete", uuid="all"),
@@ -35,84 +42,90 @@ def run_filter_test(client, live_server, content_filter):
if os.path.isfile("test-datastore/notification.txt"):
os.unlink("test-datastore/notification.txt")
- # Add our URL to the import page
- test_url = url_for('test_endpoint', _external=True)
res = client.post(
- url_for("form_quick_watch_add"),
- data={"url": test_url, "tags": ''},
+ url_for("import_page"),
+ data={"urls": test_url},
follow_redirects=True
)
- assert b"Watch added" in res.data
-
- # Give the thread time to pick up the first version
+ assert b"1 Imported" in res.data
wait_for_all_checks(client)
- # Goto the edit page, add our ignore text
- # Add our URL to the import page
- url = url_for('test_notification_endpoint', _external=True)
- notification_url = url.replace('http', 'json')
-
- print(">>>> Notification URL: " + notification_url)
-
- # Just a regular notification setting, this will be used by the special 'filter not found' notification
- notification_form_data = {"notification_urls": notification_url,
- "notification_title": "New ChangeDetection.io Notification - {{watch_url}}",
- "notification_body": "BASE URL: {{base_url}}\n"
- "Watch URL: {{watch_url}}\n"
- "Watch UUID: {{watch_uuid}}\n"
- "Watch title: {{watch_title}}\n"
- "Watch tag: {{watch_tag}}\n"
- "Preview: {{preview_url}}\n"
- "Diff URL: {{diff_url}}\n"
- "Snapshot: {{current_snapshot}}\n"
- "Diff: {{diff}}\n"
- "Diff Full: {{diff_full}}\n"
- "Diff as Patch: {{diff_patch}}\n"
- ":-)",
- "notification_format": "Text"}
-
- notification_form_data.update({
- "url": test_url,
- "tags": "my tag",
- "title": "my title 123",
- "headers": "",
- "filter_failure_notification_send": 'y',
- "include_filters": content_filter,
- "fetch_backend": "html_requests"})
-
- # A POST here will also reset the filter failure counter (filter_failure_notification_threshold_attempts)
+ uuid = extract_UUID_from_client(client)
+
+ assert live_server.app.config['DATASTORE'].data['watching'][uuid]['consecutive_filter_failures'] == 0, "No filter = No filter failure"
+
+ watch_data = {"notification_urls": notification_url,
+ "notification_title": "New ChangeDetection.io Notification - {{watch_url}}",
+ "notification_body": "BASE URL: {{base_url}}\n"
+ "Watch URL: {{watch_url}}\n"
+ "Watch UUID: {{watch_uuid}}\n"
+ "Watch title: {{watch_title}}\n"
+ "Watch tag: {{watch_tag}}\n"
+ "Preview: {{preview_url}}\n"
+ "Diff URL: {{diff_url}}\n"
+ "Snapshot: {{current_snapshot}}\n"
+ "Diff: {{diff}}\n"
+ "Diff Full: {{diff_full}}\n"
+ "Diff as Patch: {{diff_patch}}\n"
+ ":-)",
+ "notification_format": "Text",
+ "fetch_backend": "html_requests",
+ "filter_failure_notification_send": 'y',
+ "headers": "",
+ "tags": "my tag",
+ "title": "my title 123",
+ "time_between_check-hours": 5, # So that the queue runner doesnt also put it in
+ "url": test_url,
+ }
+
res = client.post(
- url_for("edit_page", uuid="first"),
- data=notification_form_data,
+ url_for("edit_page", uuid=uuid),
+ data=watch_data,
follow_redirects=True
)
-
assert b"Updated watch." in res.data
wait_for_all_checks(client)
+ assert live_server.app.config['DATASTORE'].data['watching'][uuid]['consecutive_filter_failures'] == 0, "No filter = No filter failure"
+
+ # Now add a filter, because recheck hours == 5, ONLY pressing of the [edit] or [recheck all] should trigger
+ watch_data['include_filters'] = content_filter
+ res = client.post(
+ url_for("edit_page", uuid=uuid),
+ data=watch_data,
+ follow_redirects=True
+ )
+ assert b"Updated watch." in res.data
+
+ # It should have checked once so far and given this error (because we hit SAVE)
- # Now the notification should not exist, because we didnt reach the threshold
+ wait_for_all_checks(client)
assert not os.path.isfile("test-datastore/notification.txt")
+ # Hitting [save] would have triggered a recheck, and we have a filter, so this would be ONE failure
+ assert live_server.app.config['DATASTORE'].data['watching'][uuid]['consecutive_filter_failures'] == 1, "Should have been checked once"
+
# recheck it up to just before the threshold, including the fact that in the previous POST it would have rechecked (and incremented)
- for i in range(0, App._FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT-2):
+ # Add 4 more checks
+ checked = 0
+ ATTEMPT_THRESHOLD_SETTING = live_server.app.config['DATASTORE'].data['settings']['application'].get('filter_failure_notification_threshold_attempts', 0)
+ for i in range(0, ATTEMPT_THRESHOLD_SETTING - 2):
+ checked += 1
client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
- time.sleep(2) # delay for apprise to fire
- assert not os.path.isfile("test-datastore/notification.txt"), f"test-datastore/notification.txt should not exist - Attempt {i} when threshold is {App._FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT}"
+ res = client.get(url_for("index"))
+ assert b'Warning, no filters were found' in res.data
+ assert not os.path.isfile("test-datastore/notification.txt")
- # We should see something in the frontend
- res = client.get(url_for("index"))
- assert b'Warning, no filters were found' in res.data
+ assert live_server.app.config['DATASTORE'].data['watching'][uuid]['consecutive_filter_failures'] == 5
# One more check should trigger the _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT threshold
client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
-
wait_for_notification_endpoint_output()
+
# Now it should exist and contain our "filter not found" alert
assert os.path.isfile("test-datastore/notification.txt")
-
with open("test-datastore/notification.txt", 'r') as f:
notification = f.read()
@@ -125,7 +138,7 @@ def run_filter_test(client, live_server, content_filter):
set_response_with_filter()
# Try several times, it should NOT have 'filter not found'
- for i in range(0, App._FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT):
+ for i in range(0, ATTEMPT_THRESHOLD_SETTING + 2):
client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
@@ -138,9 +151,6 @@ def run_filter_test(client, live_server, content_filter):
assert not 'CSS/xPath filter was not present in the page' in notification
# Re #1247 - All tokens got replaced correctly in the notification
- res = client.get(url_for("index"))
- uuid = extract_UUID_from_client(client)
- # UUID is correct, but notification contains tag uuid as UUIID wtf
assert uuid in notification
# cleanup for the next
@@ -155,9 +165,11 @@ def test_setup(live_server):
live_server_setup(live_server)
def test_check_include_filters_failure_notification(client, live_server, measure_memory_usage):
+# live_server_setup(live_server)
run_filter_test(client, live_server,'#nope-doesnt-exist')
def test_check_xpath_filter_failure_notification(client, live_server, measure_memory_usage):
+# live_server_setup(live_server)
run_filter_test(client, live_server, '//*[@id="nope-doesnt-exist"]')
# Test that notification is never sent
diff --git a/changedetectionio/update_worker.py b/changedetectionio/update_worker.py
index ba183848..fab27548 100644
--- a/changedetectionio/update_worker.py
+++ b/changedetectionio/update_worker.py
@@ -189,7 +189,9 @@ class update_worker(threading.Thread):
'screenshot': None
})
self.notification_q.put(n_object)
- logger.error(f"Sent filter not found notification for {watch_uuid}")
+ logger.debug(f"Sent filter not found notification for {watch_uuid}")
+ else:
+ logger.debug(f"NOT sending filter not found notification for {watch_uuid} - no notification URLs")
def send_step_failure_notification(self, watch_uuid, step_n):
watch = self.datastore.data['watching'].get(watch_uuid, False)
@@ -364,18 +366,22 @@ class update_worker(threading.Thread):
# Only when enabled, send the notification
if watch.get('filter_failure_notification_send', False):
- c = watch.get('consecutive_filter_failures', 5)
+ c = watch.get('consecutive_filter_failures', 0)
c += 1
# Send notification if we reached the threshold?
- threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts',
- 0)
- logger.warning(f"Filter for {uuid} not found, consecutive_filter_failures: {c}")
- if threshold > 0 and c >= threshold:
+ threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', 0)
+ logger.debug(f"Filter for {uuid} not found, consecutive_filter_failures: {c} of threshold {threshold}")
+ if c >= threshold:
if not watch.get('notification_muted'):
+ logger.debug(f"Sending filter failed notification for {uuid}")
self.send_filter_failure_notification(uuid)
c = 0
+ logger.debug(f"Reset filter failure count back to zero")
self.datastore.update_watch(uuid=uuid, update_obj={'consecutive_filter_failures': c})
+ else:
+ logger.trace(f"{uuid} - filter_failure_notification_send not enabled, skipping")
+
process_changedetection_results = False
@@ -422,7 +428,7 @@ class update_worker(threading.Thread):
)
if watch.get('filter_failure_notification_send', False):
- c = watch.get('consecutive_filter_failures', 5)
+ c = watch.get('consecutive_filter_failures', 0)
c += 1
# Send notification if we reached the threshold?
threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts',
From 543cb205d2d1276bd7e71c1f943acf2da67b9f04 Mon Sep 17 00:00:00 2001
From: dgtlmoon
Date: Tue, 17 Sep 2024 18:29:12 +0200
Subject: [PATCH 5/6] Testing - Fixing Restock test #2641
---
changedetectionio/tests/test_restock_itemprop.py | 8 ++------
1 file changed, 2 insertions(+), 6 deletions(-)
diff --git a/changedetectionio/tests/test_restock_itemprop.py b/changedetectionio/tests/test_restock_itemprop.py
index c873aa22..eb2e731b 100644
--- a/changedetectionio/tests/test_restock_itemprop.py
+++ b/changedetectionio/tests/test_restock_itemprop.py
@@ -146,14 +146,13 @@ def _run_test_minmax_limit(client, extra_watch_edit_form):
data={"url": test_url, "tags": 'restock tests', 'processor': 'restock_diff'},
follow_redirects=True
)
-
- # A change in price, should trigger a change by default
wait_for_all_checks(client)
data = {
"tags": "",
"url": test_url,
"headers": "",
+ "time_between_check-hours": 5,
'fetch_backend': "html_requests"
}
data.update(extra_watch_edit_form)
@@ -178,12 +177,9 @@ def _run_test_minmax_limit(client, extra_watch_edit_form):
assert b'1,000.45' or b'1000.45' in res.data #depending on locale
assert b'unviewed' not in res.data
-
# price changed to something LESS than min (900), SHOULD be a change
set_original_response(props_markup=instock_props[0], price='890.45')
- # let previous runs wait
- time.sleep(2)
-
+
res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
assert b'1 watches queued for rechecking.' in res.data
wait_for_all_checks(client)
From a5ff1cd1d75ac275d1e251c3107ef81dd3d0c706 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dawid=20Wr=C3=B3bel?=
Date: Tue, 17 Sep 2024 18:30:15 +0200
Subject: [PATCH 6/6] browser_steps: add "click element containing text if
exists" (#2629)
---
.../blueprint/browser_steps/browser_steps.py | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/changedetectionio/blueprint/browser_steps/browser_steps.py b/changedetectionio/blueprint/browser_steps/browser_steps.py
index 6f38be2e..b9765bac 100644
--- a/changedetectionio/blueprint/browser_steps/browser_steps.py
+++ b/changedetectionio/blueprint/browser_steps/browser_steps.py
@@ -25,6 +25,7 @@ browser_step_ui_config = {'Choose one': '0 0',
'Click element if exists': '1 0',
'Click element': '1 0',
'Click element containing text': '0 1',
+ 'Click element containing text if exists': '0 1',
'Enter text in field': '1 1',
'Execute JS': '0 1',
# 'Extract text and use as filter': '1 0',
@@ -96,12 +97,24 @@ class steppable_browser_interface():
return self.action_goto_url(value=self.start_url)
def action_click_element_containing_text(self, selector=None, value=''):
+ logger.debug("Clicking element containing text")
if not len(value.strip()):
return
elem = self.page.get_by_text(value)
if elem.count():
elem.first.click(delay=randint(200, 500), timeout=3000)
+ def action_click_element_containing_text_if_exists(self, selector=None, value=''):
+ logger.debug("Clicking element containing text if exists")
+ if not len(value.strip()):
+ return
+ elem = self.page.get_by_text(value)
+ logger.debug(f"Clicking element containing text - {elem.count()} elements found")
+ if elem.count():
+ elem.first.click(delay=randint(200, 500), timeout=3000)
+ else:
+ return
+
def action_enter_text_in_field(self, selector, value):
if not len(selector.strip()):
return