Merge branch 'master' into 2118-fix-missing-arg

pull/2162/head
dgtlmoon 3 months ago
commit 575df972a4

@ -465,6 +465,7 @@ class watchForm(commonSettingsForm):
method = SelectField('Request method', choices=valid_method, default=default_method) method = SelectField('Request method', choices=valid_method, default=default_method)
ignore_status_codes = BooleanField('Ignore status codes (process non-2xx status codes as normal)', default=False) ignore_status_codes = BooleanField('Ignore status codes (process non-2xx status codes as normal)', default=False)
check_unique_lines = BooleanField('Only trigger when unique lines appear', default=False) check_unique_lines = BooleanField('Only trigger when unique lines appear', default=False)
sort_text_alphabetically = BooleanField('Sort text alphabetically', default=False)
filter_text_added = BooleanField('Added lines', default=True) filter_text_added = BooleanField('Added lines', default=True)
filter_text_replaced = BooleanField('Replaced/changed lines', default=True) filter_text_replaced = BooleanField('Replaced/changed lines', default=True)

@ -58,6 +58,7 @@ base_config = {
'previous_md5_before_filters': False, # Used for skipping changedetection entirely 'previous_md5_before_filters': False, # Used for skipping changedetection entirely
'proxy': None, # Preferred proxy connection 'proxy': None, # Preferred proxy connection
'remote_server_reply': None, # From 'server' reply header 'remote_server_reply': None, # From 'server' reply header
'sort_text_alphabetically': False,
'subtractive_selectors': [], 'subtractive_selectors': [],
'tag': '', # Old system of text name for a tag, to be removed 'tag': '', # Old system of text name for a tag, to be removed
'tags': [], # list of UUIDs to App.Tags 'tags': [], # list of UUIDs to App.Tags

@ -204,6 +204,12 @@ class perform_site_check(difference_detection_processor):
is_rss=is_rss # #1874 activate the <title workaround hack is_rss=is_rss # #1874 activate the <title workaround hack
) )
if watch.get('sort_text_alphabetically') and stripped_text_from_html:
# Note: Because a <p>something</p> will add an extra line feed to signify the paragraph gap
# we end up with 'Some text\n\n', sorting will add all those extra \n at the start, so we remove them here.
stripped_text_from_html = stripped_text_from_html.replace('\n\n', '\n')
stripped_text_from_html = '\n'.join( sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower() ))
# Re #340 - return the content before the 'ignore text' was applied # Re #340 - return the content before the 'ignore text' was applied
text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8') text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8')

@ -36,6 +36,7 @@ function isItemInStock() {
'nicht zur verfügung', 'nicht zur verfügung',
'niet beschikbaar', 'niet beschikbaar',
'niet leverbaar', 'niet leverbaar',
'niet op voorraad',
'no disponible temporalmente', 'no disponible temporalmente',
'no longer in stock', 'no longer in stock',
'no tickets available', 'no tickets available',

@ -339,6 +339,10 @@ nav
<span class="pure-form-message-inline">When content is merely moved in a list, it will also trigger an <strong>addition</strong>, consider enabling <code><strong>Only trigger when unique lines appear</strong></code></span> <span class="pure-form-message-inline">When content is merely moved in a list, it will also trigger an <strong>addition</strong>, consider enabling <code><strong>Only trigger when unique lines appear</strong></code></span>
</fieldset> </fieldset>
<fieldset class="pure-control-group">
{{ render_checkbox_field(form.sort_text_alphabetically) }}
<span class="pure-form-message-inline">Helps reduce changes detected caused by sites shuffling lines around, combine with <i>check unique lines</i> below.</span>
</fieldset>
<fieldset class="pure-control-group"> <fieldset class="pure-control-group">
{{ render_checkbox_field(form.check_unique_lines) }} {{ render_checkbox_field(form.check_unique_lines) }}
<span class="pure-form-message-inline">Good for websites that just move the content around, and you want to know when NEW content is added, compares new lines against all history for this watch.</span> <span class="pure-form-message-inline">Good for websites that just move the content around, and you want to know when NEW content is added, compares new lines against all history for this watch.</span>

@ -2,7 +2,7 @@
import time import time
from flask import url_for from flask import url_for
from .util import live_server_setup from .util import live_server_setup, wait_for_all_checks
def set_original_ignore_response(): def set_original_ignore_response():
@ -34,6 +34,23 @@ def set_modified_swapped_lines():
with open("test-datastore/endpoint-content.txt", "w") as f: with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data) f.write(test_return_data)
def set_modified_swapped_lines_with_extra_text_for_sorting():
test_return_data = """<html>
<body>
<p>&nbsp;Which is across multiple lines</p>
<p>Some initial text</p>
<p> So let's see what happens.</p>
<p>Z last</p>
<p>0 numerical</p>
<p>A uppercase</p>
<p>a lowercase</p>
</body>
</html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
def set_modified_with_trigger_text_response(): def set_modified_with_trigger_text_response():
test_return_data = """<html> test_return_data = """<html>
@ -49,15 +66,14 @@ def set_modified_with_trigger_text_response():
with open("test-datastore/endpoint-content.txt", "w") as f: with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data) f.write(test_return_data)
def test_setup(client, live_server):
live_server_setup(live_server)
def test_unique_lines_functionality(client, live_server): def test_unique_lines_functionality(client, live_server):
live_server_setup(live_server) #live_server_setup(live_server)
sleep_time_for_fetch_thread = 3
set_original_ignore_response() set_original_ignore_response()
# Give the endpoint time to spin up
time.sleep(1)
# Add our URL to the import page # Add our URL to the import page
test_url = url_for('test_endpoint', _external=True) test_url = url_for('test_endpoint', _external=True)
@ -67,7 +83,7 @@ def test_unique_lines_functionality(client, live_server):
follow_redirects=True follow_redirects=True
) )
assert b"1 Imported" in res.data assert b"1 Imported" in res.data
time.sleep(sleep_time_for_fetch_thread) wait_for_all_checks(client)
# Add our URL to the import page # Add our URL to the import page
res = client.post( res = client.post(
@ -83,12 +99,11 @@ def test_unique_lines_functionality(client, live_server):
# Make a change # Make a change
set_modified_swapped_lines() set_modified_swapped_lines()
time.sleep(sleep_time_for_fetch_thread)
# Trigger a check # Trigger a check
client.get(url_for("form_watch_checknow"), follow_redirects=True) client.get(url_for("form_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up # Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread) wait_for_all_checks(client)
# It should report nothing found (no new 'unviewed' class) # It should report nothing found (no new 'unviewed' class)
res = client.get(url_for("index")) res = client.get(url_for("index"))
@ -97,7 +112,57 @@ def test_unique_lines_functionality(client, live_server):
# Now set the content which contains the new text and re-ordered existing text # Now set the content which contains the new text and re-ordered existing text
set_modified_with_trigger_text_response() set_modified_with_trigger_text_response()
client.get(url_for("form_watch_checknow"), follow_redirects=True) client.get(url_for("form_watch_checknow"), follow_redirects=True)
time.sleep(sleep_time_for_fetch_thread) wait_for_all_checks(client)
res = client.get(url_for("index")) res = client.get(url_for("index"))
assert b'unviewed' in res.data assert b'unviewed' in res.data
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
def test_sort_lines_functionality(client, live_server):
#live_server_setup(live_server)
set_modified_swapped_lines_with_extra_text_for_sorting()
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
wait_for_all_checks(client)
# Add our URL to the import page
res = client.post(
url_for("edit_page", uuid="first"),
data={"sort_text_alphabetically": "n",
"url": test_url,
"fetch_backend": "html_requests"},
follow_redirects=True
)
assert b"Updated watch." in res.data
# Trigger a check
client.get(url_for("form_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
wait_for_all_checks(client)
res = client.get(url_for("index"))
# Should be a change registered
assert b'unviewed' in res.data
res = client.get(
url_for("preview_page", uuid="first"),
follow_redirects=True
)
assert res.data.find(b'0 numerical') < res.data.find(b'Z last')
assert res.data.find(b'A uppercase') < res.data.find(b'Z last')
assert res.data.find(b'Some initial text') < res.data.find(b'Which is across multiple lines')
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data

@ -46,8 +46,8 @@ beautifulsoup4
# XPath filtering, lxml is required by bs4 anyway, but put it here to be safe. # XPath filtering, lxml is required by bs4 anyway, but put it here to be safe.
lxml lxml
# XPath 2.0-3.1 support # XPath 2.0-3.1 support - 4.2.0 broke something?
elementpath elementpath==4.1.5
selenium~=4.14.0 selenium~=4.14.0

Loading…
Cancel
Save