Restock & Price monitor - Huge refactor, set upper and lower price alert limits, set % change, follow the prices and restock amounts directly in the watch-overview list
parent
99b0935b42
commit
cffb6d748c
@ -0,0 +1,73 @@
|
|||||||
|
import os
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
from changedetectionio import strtobool
|
||||||
|
from changedetectionio.notification import default_notification_format_for_watch
|
||||||
|
|
||||||
|
class watch_base(dict):
|
||||||
|
|
||||||
|
def __init__(self, *arg, **kw):
|
||||||
|
self.update({
|
||||||
|
# Custom notification content
|
||||||
|
# Re #110, so then if this is set to None, we know to use the default value instead
|
||||||
|
# Requires setting to None on submit if it's the same as the default
|
||||||
|
# Should be all None by default, so we use the system default in this case.
|
||||||
|
'body': None,
|
||||||
|
'browser_steps': [],
|
||||||
|
'browser_steps_last_error_step': None,
|
||||||
|
'check_count': 0,
|
||||||
|
'check_unique_lines': False, # On change-detected, compare against all history if its something new
|
||||||
|
'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
|
||||||
|
'date_created': None,
|
||||||
|
'extract_text': [], # Extract text by regex after filters
|
||||||
|
'extract_title_as_title': False,
|
||||||
|
'fetch_backend': 'system', # plaintext, playwright etc
|
||||||
|
'fetch_time': 0.0,
|
||||||
|
'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
|
||||||
|
'filter_text_added': True,
|
||||||
|
'filter_text_removed': True,
|
||||||
|
'filter_text_replaced': True,
|
||||||
|
'follow_price_changes': True,
|
||||||
|
'has_ldjson_price_data': None,
|
||||||
|
'headers': {}, # Extra headers to send
|
||||||
|
'ignore_text': [], # List of text to ignore when calculating the comparison checksum
|
||||||
|
'in_stock_only': True, # Only trigger change on going to instock from out-of-stock
|
||||||
|
'include_filters': [],
|
||||||
|
'last_checked': 0,
|
||||||
|
'last_error': False,
|
||||||
|
'last_viewed': 0, # history key value of the last viewed via the [diff] link
|
||||||
|
'method': 'GET',
|
||||||
|
'notification_alert_count': 0,
|
||||||
|
'notification_body': None,
|
||||||
|
'notification_format': default_notification_format_for_watch,
|
||||||
|
'notification_muted': False,
|
||||||
|
'notification_screenshot': False, # Include the latest screenshot if available and supported by the apprise URL
|
||||||
|
'notification_title': None,
|
||||||
|
'notification_urls': [], # List of URLs to add to the notification Queue (Usually AppRise)
|
||||||
|
'paused': False,
|
||||||
|
'previous_md5': False,
|
||||||
|
'previous_md5_before_filters': False, # Used for skipping changedetection entirely
|
||||||
|
'processor': 'text_json_diff', # could be restock_diff or others from .processors
|
||||||
|
'price_change_threshold_percent': None,
|
||||||
|
'proxy': None, # Preferred proxy connection
|
||||||
|
'remote_server_reply': None, # From 'server' reply header
|
||||||
|
'sort_text_alphabetically': False,
|
||||||
|
'subtractive_selectors': [],
|
||||||
|
'tag': '', # Old system of text name for a tag, to be removed
|
||||||
|
'tags': [], # list of UUIDs to App.Tags
|
||||||
|
'text_should_not_be_present': [], # Text that should not present
|
||||||
|
'time_between_check': {'weeks': None, 'days': None, 'hours': None, 'minutes': None, 'seconds': None},
|
||||||
|
'time_between_check_use_default': True,
|
||||||
|
'title': None,
|
||||||
|
'track_ldjson_price_data': None,
|
||||||
|
'trigger_text': [], # List of text or regex to wait for until a change is detected
|
||||||
|
'url': '',
|
||||||
|
'uuid': str(uuid.uuid4()),
|
||||||
|
'webdriver_delay': None,
|
||||||
|
'webdriver_js_execute_code': None, # Run before change-detection
|
||||||
|
})
|
||||||
|
|
||||||
|
super(watch_base, self).__init__(*arg, **kw)
|
||||||
|
|
||||||
|
if self.get('default'):
|
||||||
|
del self['default']
|
@ -0,0 +1,10 @@
|
|||||||
|
class ProcessorException(Exception):
|
||||||
|
def __init__(self, message=None, status_code=None, url=None, screenshot=None, has_filters=False, html_content='', xpath_data=None):
|
||||||
|
self.message = message
|
||||||
|
self.status_code = status_code
|
||||||
|
self.url = url
|
||||||
|
self.screenshot = screenshot
|
||||||
|
self.has_filters = has_filters
|
||||||
|
self.html_content = html_content
|
||||||
|
self.xpath_data = xpath_data
|
||||||
|
return
|
@ -1,62 +0,0 @@
|
|||||||
|
|
||||||
from . import difference_detection_processor
|
|
||||||
from loguru import logger
|
|
||||||
import hashlib
|
|
||||||
import urllib3
|
|
||||||
|
|
||||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
||||||
|
|
||||||
name = 'Re-stock detection for single product pages'
|
|
||||||
description = 'Detects if the product goes back to in-stock'
|
|
||||||
|
|
||||||
class UnableToExtractRestockData(Exception):
|
|
||||||
def __init__(self, status_code):
|
|
||||||
# Set this so we can use it in other parts of the app
|
|
||||||
self.status_code = status_code
|
|
||||||
return
|
|
||||||
|
|
||||||
class perform_site_check(difference_detection_processor):
|
|
||||||
screenshot = None
|
|
||||||
xpath_data = None
|
|
||||||
|
|
||||||
def run_changedetection(self, watch, skip_when_checksum_same=True):
|
|
||||||
|
|
||||||
if not watch:
|
|
||||||
raise Exception("Watch no longer exists.")
|
|
||||||
|
|
||||||
# Unset any existing notification error
|
|
||||||
update_obj = {'last_notification_error': False, 'last_error': False}
|
|
||||||
|
|
||||||
self.screenshot = self.fetcher.screenshot
|
|
||||||
self.xpath_data = self.fetcher.xpath_data
|
|
||||||
|
|
||||||
# Track the content type
|
|
||||||
update_obj['content_type'] = self.fetcher.headers.get('Content-Type', '')
|
|
||||||
update_obj["last_check_status"] = self.fetcher.get_last_status_code()
|
|
||||||
|
|
||||||
# Main detection method
|
|
||||||
fetched_md5 = None
|
|
||||||
if self.fetcher.instock_data:
|
|
||||||
fetched_md5 = hashlib.md5(self.fetcher.instock_data.encode('utf-8')).hexdigest()
|
|
||||||
# 'Possibly in stock' comes from stock-not-in-stock.js when no string found above the fold.
|
|
||||||
update_obj["in_stock"] = True if self.fetcher.instock_data == 'Possibly in stock' else False
|
|
||||||
logger.debug(f"Watch UUID {watch.get('uuid')} restock check returned '{self.fetcher.instock_data}' from JS scraper.")
|
|
||||||
else:
|
|
||||||
raise UnableToExtractRestockData(status_code=self.fetcher.status_code)
|
|
||||||
|
|
||||||
# The main thing that all this at the moment comes down to :)
|
|
||||||
changed_detected = False
|
|
||||||
logger.debug(f"Watch UUID {watch.get('uuid')} restock check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}")
|
|
||||||
|
|
||||||
if watch.get('previous_md5') and watch.get('previous_md5') != fetched_md5:
|
|
||||||
# Yes if we only care about it going to instock, AND we are in stock
|
|
||||||
if watch.get('in_stock_only') and update_obj["in_stock"]:
|
|
||||||
changed_detected = True
|
|
||||||
|
|
||||||
if not watch.get('in_stock_only'):
|
|
||||||
# All cases
|
|
||||||
changed_detected = True
|
|
||||||
|
|
||||||
# Always record the new checksum
|
|
||||||
update_obj["previous_md5"] = fetched_md5
|
|
||||||
return changed_detected, update_obj, self.fetcher.instock_data.encode('utf-8').strip()
|
|
@ -0,0 +1,65 @@
|
|||||||
|
|
||||||
|
from changedetectionio.model.Watch import model as BaseWatch
|
||||||
|
import re
|
||||||
|
from babel.numbers import parse_decimal
|
||||||
|
|
||||||
|
class Restock(dict):
|
||||||
|
|
||||||
|
def parse_currency(self, raw_value: str) -> float:
|
||||||
|
# Clean and standardize the value (ie 1,400.00 should be 1400.00), even better would be store the whole thing as an integer.
|
||||||
|
standardized_value = raw_value
|
||||||
|
|
||||||
|
if ',' in standardized_value and '.' in standardized_value:
|
||||||
|
# Identify the correct decimal separator
|
||||||
|
if standardized_value.rfind('.') > standardized_value.rfind(','):
|
||||||
|
standardized_value = standardized_value.replace(',', '')
|
||||||
|
else:
|
||||||
|
standardized_value = standardized_value.replace('.', '').replace(',', '.')
|
||||||
|
else:
|
||||||
|
standardized_value = standardized_value.replace(',', '.')
|
||||||
|
|
||||||
|
# Remove any non-numeric characters except for the decimal point
|
||||||
|
standardized_value = re.sub(r'[^\d.-]', '', standardized_value)
|
||||||
|
|
||||||
|
# Convert to float
|
||||||
|
return float(parse_decimal(standardized_value, locale='en'))
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
# Define default values
|
||||||
|
default_values = {
|
||||||
|
'in_stock': None,
|
||||||
|
'price': None,
|
||||||
|
'currency': None,
|
||||||
|
'original_price': None
|
||||||
|
}
|
||||||
|
|
||||||
|
# Initialize the dictionary with default values
|
||||||
|
super().__init__(default_values)
|
||||||
|
|
||||||
|
# Update with any provided positional arguments (dictionaries)
|
||||||
|
if args:
|
||||||
|
if len(args) == 1 and isinstance(args[0], dict):
|
||||||
|
self.update(args[0])
|
||||||
|
else:
|
||||||
|
raise ValueError("Only one positional argument of type 'dict' is allowed")
|
||||||
|
|
||||||
|
def __setitem__(self, key, value):
|
||||||
|
# Custom logic to handle setting price and original_price
|
||||||
|
if key == 'price':
|
||||||
|
if isinstance(value, str):
|
||||||
|
value = self.parse_currency(raw_value=value)
|
||||||
|
|
||||||
|
if value and not self.get('original_price'):
|
||||||
|
self['original_price'] = value
|
||||||
|
|
||||||
|
super().__setitem__(key, value)
|
||||||
|
|
||||||
|
class Watch(BaseWatch):
|
||||||
|
def __init__(self, *arg, **kw):
|
||||||
|
super().__init__(*arg, **kw)
|
||||||
|
self['restock'] = Restock(kw['default']['restock']) if kw.get('default') and kw['default'].get('restock') else Restock()
|
||||||
|
|
||||||
|
def clear_watch(self):
|
||||||
|
super().clear_watch()
|
||||||
|
self.update({'restock': Restock()})
|
||||||
|
|
@ -0,0 +1,61 @@
|
|||||||
|
|
||||||
|
from wtforms import (
|
||||||
|
BooleanField,
|
||||||
|
validators,
|
||||||
|
FloatField
|
||||||
|
)
|
||||||
|
|
||||||
|
from changedetectionio.forms import processor_text_json_diff_form
|
||||||
|
|
||||||
|
class processor_settings_form(processor_text_json_diff_form):
|
||||||
|
in_stock_only = BooleanField('Only trigger when product goes BACK to in-stock', default=True)
|
||||||
|
price_change_min = FloatField('Minimum amount to trigger notification', [validators.Optional()],
|
||||||
|
render_kw={"placeholder": "No limit", "size": "10"})
|
||||||
|
price_change_max = FloatField('Maximum amount to trigger notification', [validators.Optional()],
|
||||||
|
render_kw={"placeholder": "No limit", "size": "10"})
|
||||||
|
price_change_threshold_percent = FloatField('Threshold in % for price changes', validators=[
|
||||||
|
validators.Optional(),
|
||||||
|
validators.NumberRange(min=0, max=100, message="Should be between 0 and 100"),
|
||||||
|
], render_kw={"placeholder": "0%", "size": "5"})
|
||||||
|
|
||||||
|
follow_price_changes = BooleanField('Follow price changes', default=False)
|
||||||
|
|
||||||
|
def extra_tab_content(self):
|
||||||
|
return 'Restock & Price Detection'
|
||||||
|
|
||||||
|
def extra_form_content(self):
|
||||||
|
return """
|
||||||
|
{% from '_helpers.html' import render_field, render_checkbox_field, render_button %}
|
||||||
|
<script>
|
||||||
|
$(document).ready(function () {
|
||||||
|
toggleOpacity('#follow_price_changes', '.price-change-minmax', true);
|
||||||
|
});
|
||||||
|
</script>
|
||||||
|
|
||||||
|
|
||||||
|
<fieldset>
|
||||||
|
<div class="pure-control-group">
|
||||||
|
<fieldset class="pure-group">
|
||||||
|
{{ render_checkbox_field(form.in_stock_only) }}
|
||||||
|
<span class="pure-form-message-inline">Only trigger notifications when page changes from <strong>out of stock</strong> to <strong>back in stock</strong></span>
|
||||||
|
</fieldset>
|
||||||
|
<fieldset class="pure-group">
|
||||||
|
{{ render_checkbox_field(form.follow_price_changes) }}
|
||||||
|
<span class="pure-form-message-inline">Changes in price should trigger a notification</span>
|
||||||
|
<span class="pure-form-message-inline">When OFF - only care about restock detection</span>
|
||||||
|
</fieldset>
|
||||||
|
<fieldset class="pure-group price-change-minmax">
|
||||||
|
{{ render_field(form.price_change_min, placeholder=watch['restock']['price']) }}
|
||||||
|
<span class="pure-form-message-inline">Minimum amount, only trigger a change when the price is less than this amount.</span>
|
||||||
|
</fieldset>
|
||||||
|
<fieldset class="pure-group price-change-minmax">
|
||||||
|
{{ render_field(form.price_change_max, placeholder=watch['restock']['price']) }}
|
||||||
|
<span class="pure-form-message-inline">Maximum amount, only trigger a change when the price is more than this amount.</span>
|
||||||
|
</fieldset>
|
||||||
|
<fieldset class="pure-group price-change-minmax">
|
||||||
|
{{ render_field(form.price_change_threshold_percent) }}
|
||||||
|
<span class="pure-form-message-inline">Price must change more than this % to trigger a change.</span><br>
|
||||||
|
<span class="pure-form-message-inline">For example, If the product is $1,000 USD, <strong>2%</strong> would mean it has to change more than $20 since the first check.</span><br>
|
||||||
|
</fieldset>
|
||||||
|
</div>
|
||||||
|
</fieldset>"""
|
@ -0,0 +1,247 @@
|
|||||||
|
from .. import difference_detection_processor
|
||||||
|
from ..exceptions import ProcessorException
|
||||||
|
from . import Restock
|
||||||
|
from loguru import logger
|
||||||
|
import hashlib
|
||||||
|
import re
|
||||||
|
import urllib3
|
||||||
|
import time
|
||||||
|
|
||||||
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||||
|
name = 'Re-stock & Price detection for single product pages'
|
||||||
|
description = 'Detects if the product goes back to in-stock'
|
||||||
|
|
||||||
|
class UnableToExtractRestockData(Exception):
|
||||||
|
def __init__(self, status_code):
|
||||||
|
# Set this so we can use it in other parts of the app
|
||||||
|
self.status_code = status_code
|
||||||
|
return
|
||||||
|
|
||||||
|
class MoreThanOnePriceFound(Exception):
|
||||||
|
def __init__(self):
|
||||||
|
return
|
||||||
|
|
||||||
|
def _search_prop_by_value(matches, value):
|
||||||
|
for properties in matches:
|
||||||
|
for prop in properties:
|
||||||
|
if value in prop[0]:
|
||||||
|
return prop[1] # Yield the desired value and exit the function
|
||||||
|
|
||||||
|
# should return Restock()
|
||||||
|
# add casting?
|
||||||
|
def get_itemprop_availability(html_content) -> Restock:
|
||||||
|
"""
|
||||||
|
Kind of funny/cool way to find price/availability in one many different possibilities.
|
||||||
|
Use 'extruct' to find any possible RDFa/microdata/json-ld data, make a JSON string from the output then search it.
|
||||||
|
"""
|
||||||
|
from jsonpath_ng import parse
|
||||||
|
|
||||||
|
now = time.time()
|
||||||
|
import extruct
|
||||||
|
logger.trace(f"Imported extruct module in {time.time() - now:.3f}s")
|
||||||
|
|
||||||
|
value = {}
|
||||||
|
now = time.time()
|
||||||
|
# Extruct is very slow, I'm wondering if some ML is going to be faster (800ms on my i7), 'rdfa' seems to be the heaviest.
|
||||||
|
|
||||||
|
syntaxes = ['dublincore', 'json-ld', 'microdata', 'microformat', 'opengraph']
|
||||||
|
|
||||||
|
data = extruct.extract(html_content, syntaxes=syntaxes)
|
||||||
|
logger.trace(f"Extruct basic extract of all metadata done in {time.time() - now:.3f}s")
|
||||||
|
|
||||||
|
# First phase, dead simple scanning of anything that looks useful
|
||||||
|
value = Restock()
|
||||||
|
if data:
|
||||||
|
logger.debug(f"Using jsonpath to find price/availability/etc")
|
||||||
|
price_parse = parse('$..(price|Price)')
|
||||||
|
pricecurrency_parse = parse('$..(pricecurrency|currency|priceCurrency )')
|
||||||
|
availability_parse = parse('$..(availability|Availability)')
|
||||||
|
|
||||||
|
price_result = price_parse.find(data)
|
||||||
|
if price_result:
|
||||||
|
# Right now, we just support single product items, maybe we will store the whole actual metadata seperately in teh future and
|
||||||
|
# parse that for the UI?
|
||||||
|
prices_found = set(str(item.value).replace('$', '') for item in price_result)
|
||||||
|
if len(price_result) > 1 and len(prices_found) > 1:
|
||||||
|
# See of all prices are different, in the case that one product has many embedded data types with the same price
|
||||||
|
# One might have $121.95 and another 121.95 etc
|
||||||
|
logger.warning(f"More than one price found {prices_found}, throwing exception, cant use this plugin.")
|
||||||
|
raise MoreThanOnePriceFound()
|
||||||
|
|
||||||
|
value['price'] = price_result[0].value
|
||||||
|
|
||||||
|
pricecurrency_result = pricecurrency_parse.find(data)
|
||||||
|
if pricecurrency_result:
|
||||||
|
value['currency'] = pricecurrency_result[0].value
|
||||||
|
|
||||||
|
availability_result = availability_parse.find(data)
|
||||||
|
if availability_result:
|
||||||
|
value['availability'] = availability_result[0].value
|
||||||
|
|
||||||
|
if value.get('availability'):
|
||||||
|
value['availability'] = re.sub(r'(?i)^(https|http)://schema.org/', '',
|
||||||
|
value.get('availability').strip(' "\'').lower()) if value.get('availability') else None
|
||||||
|
|
||||||
|
# Second, go dig OpenGraph which is something that jsonpath_ng cant do because of the tuples and double-dots (:)
|
||||||
|
if not value.get('price') or value.get('availability'):
|
||||||
|
logger.debug(f"Alternatively digging through OpenGraph properties for restock/price info..")
|
||||||
|
jsonpath_expr = parse('$..properties')
|
||||||
|
|
||||||
|
for match in jsonpath_expr.find(data):
|
||||||
|
if not value.get('price'):
|
||||||
|
value['price'] = _search_prop_by_value([match.value], "price:amount")
|
||||||
|
if not value.get('availability'):
|
||||||
|
value['availability'] = _search_prop_by_value([match.value], "product:availability")
|
||||||
|
if not value.get('currency'):
|
||||||
|
value['currency'] = _search_prop_by_value([match.value], "price:currency")
|
||||||
|
logger.trace(f"Processed with Extruct in {time.time()-now:.3f}s")
|
||||||
|
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
def is_between(number, lower=None, upper=None):
|
||||||
|
"""
|
||||||
|
Check if a number is between two values.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
number (float): The number to check.
|
||||||
|
lower (float or None): The lower bound (inclusive). If None, no lower bound.
|
||||||
|
upper (float or None): The upper bound (inclusive). If None, no upper bound.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool: True if the number is between the lower and upper bounds, False otherwise.
|
||||||
|
"""
|
||||||
|
return (lower is None or lower <= number) and (upper is None or number <= upper)
|
||||||
|
|
||||||
|
|
||||||
|
class perform_site_check(difference_detection_processor):
|
||||||
|
screenshot = None
|
||||||
|
xpath_data = None
|
||||||
|
|
||||||
|
def run_changedetection(self, watch, skip_when_checksum_same=True):
|
||||||
|
if not watch:
|
||||||
|
raise Exception("Watch no longer exists.")
|
||||||
|
|
||||||
|
# Unset any existing notification error
|
||||||
|
update_obj = {'last_notification_error': False, 'last_error': False, 'restock': Restock()}
|
||||||
|
|
||||||
|
self.screenshot = self.fetcher.screenshot
|
||||||
|
self.xpath_data = self.fetcher.xpath_data
|
||||||
|
|
||||||
|
# Track the content type
|
||||||
|
update_obj['content_type'] = self.fetcher.headers.get('Content-Type', '')
|
||||||
|
update_obj["last_check_status"] = self.fetcher.get_last_status_code()
|
||||||
|
|
||||||
|
itemprop_availability = {}
|
||||||
|
try:
|
||||||
|
itemprop_availability = get_itemprop_availability(html_content=self.fetcher.content)
|
||||||
|
except MoreThanOnePriceFound as e:
|
||||||
|
# Add the real data
|
||||||
|
raise ProcessorException(message="Cannot run, more than one price detected, this plugin is only for product pages with ONE product, try the content-change detection mode.",
|
||||||
|
url=watch.get('url'),
|
||||||
|
status_code=self.fetcher.get_last_status_code(),
|
||||||
|
screenshot=self.fetcher.screenshot,
|
||||||
|
xpath_data=self.fetcher.xpath_data
|
||||||
|
)
|
||||||
|
|
||||||
|
# Something valid in get_itemprop_availability() by scraping metadata ?
|
||||||
|
if itemprop_availability.get('price') or itemprop_availability.get('availability'):
|
||||||
|
# Store for other usage
|
||||||
|
update_obj['restock'] = itemprop_availability
|
||||||
|
|
||||||
|
if itemprop_availability.get('availability'):
|
||||||
|
# @todo: Configurable?
|
||||||
|
if any(substring.lower() in itemprop_availability['availability'].lower() for substring in [
|
||||||
|
'instock',
|
||||||
|
'instoreonly',
|
||||||
|
'limitedavailability',
|
||||||
|
'onlineonly',
|
||||||
|
'presale']
|
||||||
|
):
|
||||||
|
update_obj['restock']['in_stock'] = True
|
||||||
|
else:
|
||||||
|
update_obj['restock']['in_stock'] = False
|
||||||
|
|
||||||
|
# Main detection method
|
||||||
|
fetched_md5 = None
|
||||||
|
|
||||||
|
if not self.fetcher.instock_data and not itemprop_availability.get('availability'):
|
||||||
|
raise ProcessorException(
|
||||||
|
message=f"Unable to extract restock data for this page unfortunately. (Got code {self.fetcher.get_last_status_code()} from server), no embedded stock information was found and nothing interesting in the text, try using this watch with Chrome.",
|
||||||
|
url=watch.get('url'),
|
||||||
|
status_code=self.fetcher.get_last_status_code(),
|
||||||
|
screenshot=self.fetcher.screenshot,
|
||||||
|
xpath_data=self.fetcher.xpath_data
|
||||||
|
)
|
||||||
|
|
||||||
|
# Nothing automatic in microdata found, revert to scraping the page
|
||||||
|
if self.fetcher.instock_data and itemprop_availability.get('availability') is None:
|
||||||
|
# 'Possibly in stock' comes from stock-not-in-stock.js when no string found above the fold.
|
||||||
|
# Careful! this does not really come from chrome/js when the watch is set to plaintext
|
||||||
|
update_obj['restock']["in_stock"] = True if self.fetcher.instock_data == 'Possibly in stock' else False
|
||||||
|
logger.debug(f"Watch UUID {watch.get('uuid')} restock check returned '{self.fetcher.instock_data}' from JS scraper.")
|
||||||
|
|
||||||
|
# What we store in the snapshot
|
||||||
|
price = update_obj.get('restock').get('price') if update_obj.get('restock').get('price') else ""
|
||||||
|
snapshot_content = f"{update_obj.get('restock').get('in_stock')} - {price}"
|
||||||
|
|
||||||
|
# Main detection method
|
||||||
|
fetched_md5 = hashlib.md5(snapshot_content.encode('utf-8')).hexdigest()
|
||||||
|
|
||||||
|
# The main thing that all this at the moment comes down to :)
|
||||||
|
changed_detected = False
|
||||||
|
logger.debug(f"Watch UUID {watch.get('uuid')} restock check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}")
|
||||||
|
|
||||||
|
# out of stock -> back in stock only?
|
||||||
|
if watch.get('restock') and watch['restock'].get('in_stock') != update_obj['restock'].get('in_stock'):
|
||||||
|
# Yes if we only care about it going to instock, AND we are in stock
|
||||||
|
if watch.get('in_stock_only') and update_obj['restock']['in_stock']:
|
||||||
|
changed_detected = True
|
||||||
|
|
||||||
|
if not watch.get('in_stock_only'):
|
||||||
|
# All cases
|
||||||
|
changed_detected = True
|
||||||
|
|
||||||
|
if watch.get('follow_price_changes') and watch.get('restock') and update_obj.get('restock') and update_obj['restock'].get('price'):
|
||||||
|
price = float(update_obj['restock'].get('price'))
|
||||||
|
# Default to current price if no previous price found
|
||||||
|
if watch['restock'].get('original_price'):
|
||||||
|
previous_price = float(watch['restock'].get('original_price'))
|
||||||
|
# It was different, but negate it further down
|
||||||
|
if price != previous_price:
|
||||||
|
changed_detected = True
|
||||||
|
|
||||||
|
# Minimum/maximum price limit
|
||||||
|
if update_obj.get('restock') and update_obj['restock'].get('price'):
|
||||||
|
logger.debug(
|
||||||
|
f"{watch.get('uuid')} - Change was detected, 'price_change_max' is '{watch.get('price_change_max', '')}' 'price_change_min' is '{watch.get('price_change_min', '')}', price from website is '{update_obj['restock'].get('price', '')}'.")
|
||||||
|
if update_obj['restock'].get('price'):
|
||||||
|
min_limit = float(watch.get('price_change_min')) if watch.get('price_change_min') else None
|
||||||
|
max_limit = float(watch.get('price_change_max')) if watch.get('price_change_max') else None
|
||||||
|
|
||||||
|
price = float(update_obj['restock'].get('price'))
|
||||||
|
logger.debug(f"{watch.get('uuid')} after float conversion - Min limit: '{min_limit}' Max limit: '{max_limit}' Price: '{price}'")
|
||||||
|
if min_limit or max_limit:
|
||||||
|
if is_between(number=price, lower=min_limit, upper=max_limit):
|
||||||
|
logger.trace(f"{watch.get('uuid')} {price} is between {min_limit} and {max_limit}")
|
||||||
|
if changed_detected:
|
||||||
|
logger.debug(f"{watch.get('uuid')} Override change-detected to FALSE because price was inside threshold")
|
||||||
|
changed_detected = False
|
||||||
|
else:
|
||||||
|
logger.trace(f"{watch.get('uuid')} {price} is NOT between {min_limit} and {max_limit}")
|
||||||
|
|
||||||
|
# Price comparison by %
|
||||||
|
if watch['restock'].get('original_price') and changed_detected and watch.get('price_change_threshold_percent'):
|
||||||
|
previous_price = float(watch['restock'].get('original_price'))
|
||||||
|
pc = float(watch.get('price_change_threshold_percent'))
|
||||||
|
change = abs((price - previous_price) / previous_price * 100)
|
||||||
|
if change and change <= pc:
|
||||||
|
logger.debug(f"{watch.get('uuid')} Override change-detected to FALSE because % threshold ({pc}%) was {change:.3f}%")
|
||||||
|
changed_detected = False
|
||||||
|
else:
|
||||||
|
logger.debug(f"{watch.get('uuid')} Price change was {change:.3f}% , (threshold {pc}%)")
|
||||||
|
|
||||||
|
# Always record the new checksum
|
||||||
|
update_obj["previous_md5"] = fetched_md5
|
||||||
|
|
||||||
|
return changed_detected, update_obj, snapshot_content.encode('utf-8').strip()
|
@ -0,0 +1,312 @@
|
|||||||
|
#!/usr/bin/python3
|
||||||
|
import time
|
||||||
|
|
||||||
|
from flask import url_for
|
||||||
|
from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
|
||||||
|
|
||||||
|
instock_props = [
|
||||||
|
# LD+JSON with non-standard list of 'type' https://github.com/dgtlmoon/changedetection.io/issues/1833
|
||||||
|
'<script type=\'application/ld+json\'>{"@context": "http://schema.org","@type": ["Product", "SubType"],"name": "My test product","description":"","Offers": { "@type": "Offer", "offeredBy": { "@type": "Organization", "name":"Person", "telephone":"+1 999 999 999" }, "price": $$PRICE$$, "priceCurrency": "EUR", "url": "/some/url", "availability": "http://schema.org/InStock"} }</script>',
|
||||||
|
# LD JSON
|
||||||
|
'<script id="product-jsonld" type="application/ld+json">{"@context":"https://schema.org","@type":"Product","brand":{"@type":"Brand","name":"Ubiquiti"},"name":"UniFi Express","sku":"UX","description":"Impressively compact UniFi Cloud Gateway and WiFi 6 access point that runs UniFi Network. Powers an entire network or simply meshes as an access point.","url":"https://store.ui.com/us/en/products/ux","image":{"@type":"ImageObject","url":"https://cdn.ecomm.ui.com/products/4ed25b4c-db92-4b98-bbf3-b0989f007c0e/123417a2-895e-49c7-ba04-b6cd8f6acc03.png","width":"1500","height":"1500"},"offers":{"@type":"Offer","availability":"https://schema.org/InStock","priceSpecification":{"@type":"PriceSpecification","price":$$PRICE$$,"priceCurrency":"USD","valueAddedTaxIncluded":false}}}</script>',
|
||||||
|
'<script id="product-schema" type="application/ld+json">{"@context": "https://schema.org","@type": "Product","itemCondition": "https://schema.org/NewCondition","image": "//1.com/hmgo","name": "Polo MuscleFit","color": "Beige","description": "Polo","sku": "0957102010","brand": {"@type": "Brand","name": "H&M"},"category": {"@type": "Thing","name": "Polo"},"offers": [{"@type": "Offer","url": "https:/www2.xxxxxx.com/fr_fr/productpage.0957102010.html","priceCurrency": "EUR","price": $$PRICE$$,"availability": "http://schema.org/InStock","seller": { "@type": "Organization", "name": "H&M"}}]}</script>'
|
||||||
|
# Microdata
|
||||||
|
'<div itemscope itemtype="https://schema.org/Product"><h1 itemprop="name">Example Product</h1><p itemprop="description">This is a sample product description.</p><div itemprop="offers" itemscope itemtype="https://schema.org/Offer"><p>Price: <span itemprop="price">$$$PRICE$$</span></p><link itemprop="availability" href="https://schema.org/InStock" /></div></div>'
|
||||||
|
]
|
||||||
|
|
||||||
|
out_of_stock_props = [
|
||||||
|
# out of stock AND contains multiples
|
||||||
|
'<script type="application/ld+json">{"@context":"http://schema.org","@type":"WebSite","url":"https://www.medimops.de/","potentialAction":{"@type":"SearchAction","target":"https://www.medimops.de/produkte-C0/?fcIsSearch=1&searchparam={searchparam}","query-input":"required name=searchparam"}}</script><script type="application/ld+json">{"@context":"http://schema.org","@type":"Product","name":"Horsetrader: Robert Sangster and the Rise and Fall of the Sport of Kings","image":"https://images2.medimops.eu/product/43a982/M00002551322-large.jpg","productID":"isbn:9780002551328","gtin13":"9780002551328","category":"Livres en langue étrangère","offers":{"@type":"Offer","priceCurrency":"EUR","price":$$PRICE$$,"itemCondition":"UsedCondition","availability":"OutOfStock"},"brand":{"@type":"Thing","name":"Patrick Robinson","url":"https://www.momox-shop.fr/,patrick-robinson/"}}</script>'
|
||||||
|
]
|
||||||
|
|
||||||
|
def set_original_response(props_markup='', price="121.95"):
|
||||||
|
|
||||||
|
props_markup=props_markup.replace('$$PRICE$$', price)
|
||||||
|
test_return_data = f"""<html>
|
||||||
|
<body>
|
||||||
|
Some initial text<br>
|
||||||
|
<p>Which is across multiple lines</p>
|
||||||
|
<br>
|
||||||
|
So let's see what happens. <br>
|
||||||
|
<div>price: ${price}</div>
|
||||||
|
{props_markup}
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
|
||||||
|
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||||
|
f.write(test_return_data)
|
||||||
|
time.sleep(1)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def test_setup(client, live_server):
|
||||||
|
|
||||||
|
live_server_setup(live_server)
|
||||||
|
|
||||||
|
def test_restock_itemprop_basic(client, live_server):
|
||||||
|
|
||||||
|
#live_server_setup(live_server)
|
||||||
|
|
||||||
|
test_url = url_for('test_endpoint', _external=True)
|
||||||
|
|
||||||
|
for p in instock_props:
|
||||||
|
set_original_response(props_markup=p)
|
||||||
|
client.post(
|
||||||
|
url_for("form_quick_watch_add"),
|
||||||
|
data={"url": test_url, "tags": 'restock tests', 'processor': 'restock_diff'},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
res = client.get(url_for("index"))
|
||||||
|
assert b'more than one price detected' not in res.data
|
||||||
|
assert b'has-restock-info' in res.data
|
||||||
|
assert b' in-stock' in res.data
|
||||||
|
assert b' not-in-stock' not in res.data
|
||||||
|
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
|
|
||||||
|
|
||||||
|
for p in out_of_stock_props:
|
||||||
|
set_original_response(props_markup=p)
|
||||||
|
client.post(
|
||||||
|
url_for("form_quick_watch_add"),
|
||||||
|
data={"url": test_url, "tags": '', 'processor': 'restock_diff'},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
res = client.get(url_for("index"))
|
||||||
|
|
||||||
|
assert b'has-restock-info not-in-stock' in res.data
|
||||||
|
|
||||||
|
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
|
|
||||||
|
def test_itemprop_price_change(client, live_server):
|
||||||
|
#live_server_setup(live_server)
|
||||||
|
|
||||||
|
test_url = url_for('test_endpoint', _external=True)
|
||||||
|
|
||||||
|
set_original_response(props_markup=instock_props[0], price="190.95")
|
||||||
|
client.post(
|
||||||
|
url_for("form_quick_watch_add"),
|
||||||
|
data={"url": test_url, "tags": 'restock tests', 'processor': 'restock_diff'},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# A change in price, should trigger a change by default
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
res = client.get(url_for("index"))
|
||||||
|
assert b'190.95' in res.data
|
||||||
|
|
||||||
|
# basic price change, look for notification
|
||||||
|
set_original_response(props_markup=instock_props[0], price='180.45')
|
||||||
|
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
res = client.get(url_for("index"))
|
||||||
|
assert b'180.45' in res.data
|
||||||
|
assert b'unviewed' in res.data
|
||||||
|
client.get(url_for("mark_all_viewed"), follow_redirects=True)
|
||||||
|
|
||||||
|
# turning off price change trigger, but it should show the new price, with no change notification
|
||||||
|
set_original_response(props_markup=instock_props[0], price='120.45')
|
||||||
|
res = client.post(
|
||||||
|
url_for("edit_page", uuid="first"),
|
||||||
|
data={"follow_price_changes": "", "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests"},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"Updated watch." in res.data
|
||||||
|
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
res = client.get(url_for("index"))
|
||||||
|
assert b'120.45' in res.data
|
||||||
|
assert b'unviewed' not in res.data
|
||||||
|
|
||||||
|
|
||||||
|
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
|
|
||||||
|
def test_itemprop_price_minmax_limit(client, live_server):
|
||||||
|
#live_server_setup(live_server)
|
||||||
|
|
||||||
|
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
|
|
||||||
|
test_url = url_for('test_endpoint', _external=True)
|
||||||
|
|
||||||
|
set_original_response(props_markup=instock_props[0], price="950.95")
|
||||||
|
client.post(
|
||||||
|
url_for("form_quick_watch_add"),
|
||||||
|
data={"url": test_url, "tags": 'restock tests', 'processor': 'restock_diff'},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# A change in price, should trigger a change by default
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
|
|
||||||
|
res = client.post(
|
||||||
|
url_for("edit_page", uuid="first"),
|
||||||
|
data={"follow_price_changes": "y",
|
||||||
|
"price_change_min": 900.0,
|
||||||
|
"price_change_max": 1100.10,
|
||||||
|
"url": test_url,
|
||||||
|
"tags": "",
|
||||||
|
"headers": "",
|
||||||
|
'fetch_backend': "html_requests"
|
||||||
|
},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"Updated watch." in res.data
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
|
client.get(url_for("mark_all_viewed"))
|
||||||
|
|
||||||
|
# price changed to something greater than min (900), and less than max (1100).. should be no change
|
||||||
|
set_original_response(props_markup=instock_props[0], price='1000.45')
|
||||||
|
client.get(url_for("form_watch_checknow"))
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
res = client.get(url_for("index"))
|
||||||
|
|
||||||
|
assert b'more than one price detected' not in res.data
|
||||||
|
# BUT the new price should show, even tho its within limits
|
||||||
|
assert b'1,000.45' or b'1000.45' in res.data #depending on locale
|
||||||
|
assert b'unviewed' not in res.data
|
||||||
|
|
||||||
|
|
||||||
|
# price changed to something LESS than min (900), SHOULD be a change
|
||||||
|
set_original_response(props_markup=instock_props[0], price='890.45')
|
||||||
|
# let previous runs wait
|
||||||
|
time.sleep(1)
|
||||||
|
res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||||
|
assert b'1 watches queued for rechecking.' in res.data
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
res = client.get(url_for("index"))
|
||||||
|
assert b'890.45' in res.data
|
||||||
|
assert b'unviewed' in res.data
|
||||||
|
|
||||||
|
client.get(url_for("mark_all_viewed"))
|
||||||
|
|
||||||
|
# price changed to something MORE than max (1100.10), SHOULD be a change
|
||||||
|
set_original_response(props_markup=instock_props[0], price='1890.45')
|
||||||
|
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
res = client.get(url_for("index"))
|
||||||
|
assert b'1,890.45' or b'1890.45' in res.data
|
||||||
|
assert b'unviewed' in res.data
|
||||||
|
|
||||||
|
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
|
|
||||||
|
|
||||||
|
def test_itemprop_percent_threshold(client, live_server):
|
||||||
|
#live_server_setup(live_server)
|
||||||
|
|
||||||
|
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
|
|
||||||
|
test_url = url_for('test_endpoint', _external=True)
|
||||||
|
|
||||||
|
set_original_response(props_markup=instock_props[0], price="950.95")
|
||||||
|
client.post(
|
||||||
|
url_for("form_quick_watch_add"),
|
||||||
|
data={"url": test_url, "tags": 'restock tests', 'processor': 'restock_diff'},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# A change in price, should trigger a change by default
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
|
res = client.post(
|
||||||
|
url_for("edit_page", uuid="first"),
|
||||||
|
data={"follow_price_changes": "y",
|
||||||
|
"price_change_threshold_percent": 5.0,
|
||||||
|
"url": test_url,
|
||||||
|
"tags": "",
|
||||||
|
"headers": "",
|
||||||
|
'fetch_backend': "html_requests"
|
||||||
|
},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"Updated watch." in res.data
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
|
|
||||||
|
# Basic change should not trigger
|
||||||
|
set_original_response(props_markup=instock_props[0], price='960.45')
|
||||||
|
client.get(url_for("form_watch_checknow"))
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
res = client.get(url_for("index"))
|
||||||
|
assert b'960.45' in res.data
|
||||||
|
assert b'unviewed' not in res.data
|
||||||
|
|
||||||
|
# Bigger INCREASE change than the threshold should trigger
|
||||||
|
set_original_response(props_markup=instock_props[0], price='1960.45')
|
||||||
|
client.get(url_for("form_watch_checknow"))
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
res = client.get(url_for("index"))
|
||||||
|
assert b'1,960.45' or b'1960.45' in res.data #depending on locale
|
||||||
|
assert b'unviewed' in res.data
|
||||||
|
|
||||||
|
|
||||||
|
# Small decrease should NOT trigger
|
||||||
|
client.get(url_for("mark_all_viewed"))
|
||||||
|
set_original_response(props_markup=instock_props[0], price='1950.45')
|
||||||
|
client.get(url_for("form_watch_checknow"))
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
res = client.get(url_for("index"))
|
||||||
|
assert b'1,950.45' or b'1950.45' in res.data #depending on locale
|
||||||
|
assert b'unviewed' not in res.data
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
|
|
||||||
|
def test_data_sanity(client, live_server):
|
||||||
|
#live_server_setup(live_server)
|
||||||
|
|
||||||
|
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
|
|
||||||
|
test_url = url_for('test_endpoint', _external=True)
|
||||||
|
test_url2 = url_for('test_endpoint2', _external=True)
|
||||||
|
set_original_response(props_markup=instock_props[0], price="950.95")
|
||||||
|
client.post(
|
||||||
|
url_for("form_quick_watch_add"),
|
||||||
|
data={"url": test_url, "tags": 'restock tests', 'processor': 'restock_diff'},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
res = client.get(url_for("index"))
|
||||||
|
assert b'950.95' in res.data
|
||||||
|
|
||||||
|
# Check the restock model object doesnt store the value by mistake and used in a new one
|
||||||
|
client.post(
|
||||||
|
url_for("form_quick_watch_add"),
|
||||||
|
data={"url": test_url2, "tags": 'restock tests', 'processor': 'restock_diff'},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
res = client.get(url_for("index"))
|
||||||
|
assert str(res.data.decode()).count("950.95") == 1, "Price should only show once (for the watch added, no other watches yet)"
|
||||||
|
|
||||||
|
## different test, check the edit page works on an empty request result
|
||||||
|
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
|
|
||||||
|
client.post(
|
||||||
|
url_for("form_quick_watch_add"),
|
||||||
|
data={"url": test_url2, "tags": 'restock tests', 'processor': 'restock_diff'},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
|
res = client.get(
|
||||||
|
url_for("edit_page", uuid="first"))
|
||||||
|
assert test_url2.encode('utf-8') in res.data
|
@ -0,0 +1,21 @@
|
|||||||
|
#!/usr/bin/python3
|
||||||
|
|
||||||
|
# run from dir above changedetectionio/ dir
|
||||||
|
# python3 -m unittest changedetectionio.tests.unit.test_restock_logic
|
||||||
|
|
||||||
|
import unittest
|
||||||
|
import os
|
||||||
|
|
||||||
|
from changedetectionio.processors import restock_diff
|
||||||
|
|
||||||
|
# mostly
|
||||||
|
class TestDiffBuilder(unittest.TestCase):
|
||||||
|
|
||||||
|
def test_logic(self):
|
||||||
|
assert restock_diff.is_between(number=10, lower=9, upper=11) == True, "Between 9 and 11"
|
||||||
|
assert restock_diff.is_between(number=10, lower=0, upper=11) == True, "Between 9 and 11"
|
||||||
|
assert restock_diff.is_between(number=10, lower=None, upper=11) == True, "Between None and 11"
|
||||||
|
assert not restock_diff.is_between(number=12, lower=None, upper=11) == True, "12 is not between None and 11"
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
Loading…
Reference in new issue