parent
2bc988dffc
commit
6e71088cde
@ -0,0 +1,11 @@
|
|||||||
|
# Change detection post-processors
|
||||||
|
|
||||||
|
The concept here is to be able to switch between different domain specific problems to solve.
|
||||||
|
|
||||||
|
- `text_json_diff` The traditional text and JSON comparison handler
|
||||||
|
- `restock_diff` Only cares about detecting if a product looks like it has some text that suggests that it's out of stock, otherwise assumes that it's in stock.
|
||||||
|
|
||||||
|
Some suggestions for the future
|
||||||
|
|
||||||
|
- `graphical`
|
||||||
|
- `restock_and_price` - extract price AND stock text
|
@ -0,0 +1,24 @@
|
|||||||
|
from abc import abstractmethod
|
||||||
|
import hashlib
|
||||||
|
|
||||||
|
|
||||||
|
class difference_detection_processor():
|
||||||
|
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def run(self, uuid, skip_when_checksum_same=True):
|
||||||
|
update_obj = {'last_notification_error': False, 'last_error': False}
|
||||||
|
some_data = 'xxxxx'
|
||||||
|
update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest()
|
||||||
|
changed_detected = False
|
||||||
|
return changed_detected, update_obj, ''.encode('utf-8')
|
||||||
|
|
||||||
|
|
||||||
|
def available_processors():
|
||||||
|
from . import restock_diff, text_json_diff
|
||||||
|
x=[('text_json_diff', text_json_diff.name), ('restock_diff', restock_diff.name)]
|
||||||
|
# @todo Make this smarter with introspection of sorts.
|
||||||
|
return x
|
@ -0,0 +1,125 @@
|
|||||||
|
|
||||||
|
import hashlib
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import urllib3
|
||||||
|
from . import difference_detection_processor
|
||||||
|
from changedetectionio import content_fetcher
|
||||||
|
from copy import deepcopy
|
||||||
|
|
||||||
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||||
|
|
||||||
|
name = 'Re-stock detection'
|
||||||
|
description = 'Detects if the product goes back to in-stock'
|
||||||
|
|
||||||
|
class perform_site_check(difference_detection_processor):
|
||||||
|
screenshot = None
|
||||||
|
xpath_data = None
|
||||||
|
|
||||||
|
def __init__(self, *args, datastore, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
self.datastore = datastore
|
||||||
|
|
||||||
|
def run(self, uuid, skip_when_checksum_same=True):
|
||||||
|
|
||||||
|
# DeepCopy so we can be sure we don't accidently change anything by reference
|
||||||
|
watch = deepcopy(self.datastore.data['watching'].get(uuid))
|
||||||
|
|
||||||
|
if not watch:
|
||||||
|
raise Exception("Watch no longer exists.")
|
||||||
|
|
||||||
|
# Protect against file:// access
|
||||||
|
if re.search(r'^file', watch.get('url', ''), re.IGNORECASE) and not os.getenv('ALLOW_FILE_URI', False):
|
||||||
|
raise Exception(
|
||||||
|
"file:// type access is denied for security reasons."
|
||||||
|
)
|
||||||
|
|
||||||
|
# Unset any existing notification error
|
||||||
|
update_obj = {'last_notification_error': False, 'last_error': False}
|
||||||
|
extra_headers = watch.get('headers', [])
|
||||||
|
|
||||||
|
# Tweak the base config with the per-watch ones
|
||||||
|
request_headers = deepcopy(self.datastore.data['settings']['headers'])
|
||||||
|
request_headers.update(extra_headers)
|
||||||
|
|
||||||
|
# https://github.com/psf/requests/issues/4525
|
||||||
|
# Requests doesnt yet support brotli encoding, so don't put 'br' here, be totally sure that the user cannot
|
||||||
|
# do this by accident.
|
||||||
|
if 'Accept-Encoding' in request_headers and "br" in request_headers['Accept-Encoding']:
|
||||||
|
request_headers['Accept-Encoding'] = request_headers['Accept-Encoding'].replace(', br', '')
|
||||||
|
|
||||||
|
timeout = self.datastore.data['settings']['requests'].get('timeout')
|
||||||
|
|
||||||
|
url = watch.link
|
||||||
|
|
||||||
|
request_body = self.datastore.data['watching'][uuid].get('body')
|
||||||
|
request_method = self.datastore.data['watching'][uuid].get('method')
|
||||||
|
ignore_status_codes = self.datastore.data['watching'][uuid].get('ignore_status_codes', False)
|
||||||
|
|
||||||
|
# Pluggable content fetcher
|
||||||
|
prefer_backend = watch.get_fetch_backend
|
||||||
|
if not prefer_backend or prefer_backend == 'system':
|
||||||
|
prefer_backend = self.datastore.data['settings']['application']['fetch_backend']
|
||||||
|
|
||||||
|
if hasattr(content_fetcher, prefer_backend):
|
||||||
|
klass = getattr(content_fetcher, prefer_backend)
|
||||||
|
else:
|
||||||
|
# If the klass doesnt exist, just use a default
|
||||||
|
klass = getattr(content_fetcher, "html_requests")
|
||||||
|
|
||||||
|
proxy_id = self.datastore.get_preferred_proxy_for_watch(uuid=uuid)
|
||||||
|
proxy_url = None
|
||||||
|
if proxy_id:
|
||||||
|
proxy_url = self.datastore.proxy_list.get(proxy_id).get('url')
|
||||||
|
print("UUID {} Using proxy {}".format(uuid, proxy_url))
|
||||||
|
|
||||||
|
fetcher = klass(proxy_override=proxy_url)
|
||||||
|
|
||||||
|
# Configurable per-watch or global extra delay before extracting text (for webDriver types)
|
||||||
|
system_webdriver_delay = self.datastore.data['settings']['application'].get('webdriver_delay', None)
|
||||||
|
if watch['webdriver_delay'] is not None:
|
||||||
|
fetcher.render_extract_delay = watch.get('webdriver_delay')
|
||||||
|
elif system_webdriver_delay is not None:
|
||||||
|
fetcher.render_extract_delay = system_webdriver_delay
|
||||||
|
|
||||||
|
# Could be removed if requests/plaintext could also return some info?
|
||||||
|
if prefer_backend != 'html_webdriver':
|
||||||
|
raise Exception("Re-stock detection requires Chrome or compatible webdriver/playwright fetcher to work")
|
||||||
|
|
||||||
|
if watch.get('webdriver_js_execute_code') is not None and watch.get('webdriver_js_execute_code').strip():
|
||||||
|
fetcher.webdriver_js_execute_code = watch.get('webdriver_js_execute_code')
|
||||||
|
|
||||||
|
fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, watch.get('include_filters'))
|
||||||
|
fetcher.quit()
|
||||||
|
|
||||||
|
self.screenshot = fetcher.screenshot
|
||||||
|
self.xpath_data = fetcher.xpath_data
|
||||||
|
|
||||||
|
# Track the content type
|
||||||
|
update_obj['content_type'] = fetcher.headers.get('Content-Type', '')
|
||||||
|
update_obj["last_check_status"] = fetcher.get_last_status_code()
|
||||||
|
|
||||||
|
# Main detection method
|
||||||
|
fetched_md5 = None
|
||||||
|
if fetcher.instock_data:
|
||||||
|
fetched_md5 = hashlib.md5(fetcher.instock_data.encode('utf-8')).hexdigest()
|
||||||
|
# 'Possibly in stock' comes from stock-not-in-stock.js when no string found above the fold.
|
||||||
|
update_obj["in_stock"] = True if fetcher.instock_data == 'Possibly in stock' else False
|
||||||
|
|
||||||
|
|
||||||
|
# The main thing that all this at the moment comes down to :)
|
||||||
|
changed_detected = False
|
||||||
|
|
||||||
|
if watch.get('previous_md5') and watch.get('previous_md5') != fetched_md5:
|
||||||
|
# Yes if we only care about it going to instock, AND we are in stock
|
||||||
|
if watch.get('in_stock_only') and update_obj["in_stock"]:
|
||||||
|
changed_detected = True
|
||||||
|
|
||||||
|
if not watch.get('in_stock_only'):
|
||||||
|
# All cases
|
||||||
|
changed_detected = True
|
||||||
|
|
||||||
|
# Always record the new checksum
|
||||||
|
update_obj["previous_md5"] = fetched_md5
|
||||||
|
|
||||||
|
return changed_detected, update_obj, fetcher.instock_data.encode('utf-8')
|
@ -0,0 +1,56 @@
|
|||||||
|
function isItemInStock() {
|
||||||
|
// @todo Pass these in so the same list can be used in non-JS fetchers
|
||||||
|
const outOfStockTexts = [
|
||||||
|
'0 in stock',
|
||||||
|
'agotado',
|
||||||
|
'artikel zurzeit vergriffen',
|
||||||
|
'available for back order',
|
||||||
|
'backordered',
|
||||||
|
'brak w magazynie',
|
||||||
|
'brak na stanie',
|
||||||
|
'coming soon',
|
||||||
|
'currently unavailable',
|
||||||
|
'en rupture de stock',
|
||||||
|
'as soon as stock is available',
|
||||||
|
'message if back in stock',
|
||||||
|
'nachricht bei',
|
||||||
|
'nicht auf lager',
|
||||||
|
'nicht lieferbar',
|
||||||
|
'nicht zur verfügung',
|
||||||
|
'no disponible temporalmente',
|
||||||
|
'not in stock',
|
||||||
|
'out of stock',
|
||||||
|
'out-of-stock',
|
||||||
|
'não estamos a aceitar encomendas',
|
||||||
|
'produkt niedostępny',
|
||||||
|
'no longer in stock',
|
||||||
|
'sold out',
|
||||||
|
'temporarily out of stock',
|
||||||
|
'temporarily unavailable',
|
||||||
|
'we do not currently have an estimate of when this product will be back in stock.',
|
||||||
|
'zur zeit nicht an lager',
|
||||||
|
];
|
||||||
|
|
||||||
|
const elementsWithZeroChildren = Array.from(document.getElementsByTagName('*')).filter(element => element.children.length === 0);
|
||||||
|
for (let i = elementsWithZeroChildren.length - 1; i >= 0; i--) {
|
||||||
|
const element = elementsWithZeroChildren[i];
|
||||||
|
if (element.offsetWidth > 0 || element.offsetHeight > 0 || element.getClientRects().length > 0) {
|
||||||
|
var elementText="";
|
||||||
|
if (element.tagName.toLowerCase() === "input") {
|
||||||
|
elementText = element.value.toLowerCase();
|
||||||
|
} else {
|
||||||
|
elementText = element.textContent.toLowerCase();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const outOfStockText of outOfStockTexts) {
|
||||||
|
if (elementText.includes(outOfStockText)) {
|
||||||
|
return elementText; // item is out of stock
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 'Possibly in stock'; // possibly in stock, cant decide otherwise.
|
||||||
|
}
|
||||||
|
|
||||||
|
// returns the element text that makes it think it's out of stock
|
||||||
|
return isItemInStock();
|
@ -0,0 +1,2 @@
|
|||||||
|
"""Tests for the app."""
|
||||||
|
|
@ -0,0 +1,3 @@
|
|||||||
|
#!/usr/bin/python3
|
||||||
|
|
||||||
|
from .. import conftest
|
@ -0,0 +1,106 @@
|
|||||||
|
#!/usr/bin/python3
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
from flask import url_for
|
||||||
|
from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
|
||||||
|
from changedetectionio.notification import (
|
||||||
|
default_notification_body,
|
||||||
|
default_notification_format,
|
||||||
|
default_notification_title,
|
||||||
|
valid_notification_formats,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def set_original_response():
|
||||||
|
test_return_data = """<html>
|
||||||
|
<body>
|
||||||
|
Some initial text<br>
|
||||||
|
<p>Which is across multiple lines</p>
|
||||||
|
<br>
|
||||||
|
So let's see what happens. <br>
|
||||||
|
<div>price: $10.99</div>
|
||||||
|
<div id="sametext">Out of stock</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
|
||||||
|
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||||
|
f.write(test_return_data)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def set_back_in_stock_response():
|
||||||
|
test_return_data = """<html>
|
||||||
|
<body>
|
||||||
|
Some initial text<br>
|
||||||
|
<p>Which is across multiple lines</p>
|
||||||
|
<br>
|
||||||
|
So let's see what happens. <br>
|
||||||
|
<div>price: $10.99</div>
|
||||||
|
<div id="sametext">Available!</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
|
||||||
|
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||||
|
f.write(test_return_data)
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Add a site in paused mode, add an invalid filter, we should still have visual selector data ready
|
||||||
|
def test_restock_detection(client, live_server):
|
||||||
|
|
||||||
|
set_original_response()
|
||||||
|
#assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test"
|
||||||
|
|
||||||
|
time.sleep(1)
|
||||||
|
live_server_setup(live_server)
|
||||||
|
#####################
|
||||||
|
notification_url = url_for('test_notification_endpoint', _external=True).replace('http://localhost', 'http://changedet').replace('http', 'json')
|
||||||
|
|
||||||
|
|
||||||
|
#####################
|
||||||
|
# Set this up for when we remove the notification from the watch, it should fallback with these details
|
||||||
|
res = client.post(
|
||||||
|
url_for("settings_page"),
|
||||||
|
data={"application-notification_urls": notification_url,
|
||||||
|
"application-notification_title": "fallback-title "+default_notification_title,
|
||||||
|
"application-notification_body": "fallback-body "+default_notification_body,
|
||||||
|
"application-notification_format": default_notification_format,
|
||||||
|
"requests-time_between_check-minutes": 180,
|
||||||
|
'application-fetch_backend': "html_webdriver"},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
# Add our URL to the import page, because the docker container (playwright/selenium) wont be able to connect to our usual test url
|
||||||
|
test_url = url_for('test_endpoint', _external=True).replace('http://localhost', 'http://changedet')
|
||||||
|
|
||||||
|
|
||||||
|
client.post(
|
||||||
|
url_for("form_quick_watch_add"),
|
||||||
|
data={"url": test_url, "tag": '', 'processor': 'restock_diff'},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# Is it correctly show as NOT in stock?
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
res = client.get(url_for("index"))
|
||||||
|
assert b'not-in-stock' in res.data
|
||||||
|
|
||||||
|
# Is it correctly shown as in stock
|
||||||
|
set_back_in_stock_response()
|
||||||
|
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
res = client.get(url_for("index"))
|
||||||
|
assert b'not-in-stock' not in res.data
|
||||||
|
|
||||||
|
# We should have a notification
|
||||||
|
time.sleep(2)
|
||||||
|
assert os.path.isfile("test-datastore/notification.txt")
|
||||||
|
os.unlink("test-datastore/notification.txt")
|
||||||
|
|
||||||
|
# Default behaviour is to only fire notification when it goes OUT OF STOCK -> IN STOCK
|
||||||
|
# So here there should be no file, because we go IN STOCK -> OUT OF STOCK
|
||||||
|
set_original_response()
|
||||||
|
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
assert not os.path.isfile("test-datastore/notification.txt")
|
Before Width: | Height: | Size: 209 KiB After Width: | Height: | Size: 171 KiB |
Loading…
Reference in new issue