diff --git a/.github/workflows/test-only.yml b/.github/workflows/test-only.yml index 96fdd0d3..e06e619f 100644 --- a/.github/workflows/test-only.yml +++ b/.github/workflows/test-only.yml @@ -50,10 +50,13 @@ jobs: run: | # Selenium fetch - docker run -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py' + docker run --rm -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py' # Playwright/Browserless fetch - docker run -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py && pytest tests/visualselector/test_fetch_data.py' + docker run --rm -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py && pytest tests/visualselector/test_fetch_data.py' + + # restock detection via playwright - added name=changedet here so that playwright/browserless can connect to it + docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py' - name: Test proxy interaction run: | diff --git a/README.md b/README.md index f02e2045..eb4d1ac1 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -## Web Site Change Detection, Monitoring and Notification. +## Web Site Change Detection, Restock monitoring and notifications. **_Detect website content changes and perform meaningful actions - trigger notifications via Discord, Email, Slack, Telegram, API calls and many more._** diff --git a/changedetectionio/__init__.py b/changedetectionio/__init__.py index ace5e487..9f185b2b 100644 --- a/changedetectionio/__init__.py +++ b/changedetectionio/__init__.py @@ -64,6 +64,9 @@ app.config.exit = Event() app.config['NEW_VERSION_AVAILABLE'] = False +if os.getenv('FLASK_SERVER_NAME'): + app.config['SERVER_NAME'] = os.getenv('FLASK_SERVER_NAME') + #app.config["EXPLAIN_TEMPLATE_LOADING"] = True # Disables caching of the templates @@ -511,8 +514,9 @@ def changedetection_app(config=None, datastore_o=None): # https://wtforms.readthedocs.io/en/3.0.x/forms/#wtforms.form.Form.populate_obj ? def edit_page(uuid): - from changedetectionio import forms - from changedetectionio.blueprint.browser_steps.browser_steps import browser_step_ui_config + from . import forms + from .blueprint.browser_steps.browser_steps import browser_step_ui_config + from . import processors using_default_check_time = True # More for testing, possible to return the first/only @@ -527,6 +531,15 @@ def changedetection_app(config=None, datastore_o=None): flash("No watch with the UUID %s found." % (uuid), "error") return redirect(url_for('index')) + switch_processor = request.args.get('switch_processor') + if switch_processor: + for p in processors.available_processors(): + if p[0] == switch_processor: + datastore.data['watching'][uuid]['processor'] = switch_processor + flash(f"Switched to mode - {p[1]}.") + datastore.clear_watch_history(uuid) + redirect(url_for('edit_page', uuid=uuid)) + # be sure we update with a copy instead of accidently editing the live object by reference default = deepcopy(datastore.data['watching'][uuid]) @@ -633,6 +646,7 @@ def changedetection_app(config=None, datastore_o=None): visualselector_enabled = os.getenv('PLAYWRIGHT_DRIVER_URL', False) and is_html_webdriver output = render_template("edit.html", + available_processors=processors.available_processors(), browser_steps_config=browser_step_ui_config, current_base_url=datastore.data['settings']['application']['base_url'], emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False), @@ -735,6 +749,8 @@ def changedetection_app(config=None, datastore_o=None): @login_optionally_required def import_page(): remaining_urls = [] + from . import forms + if request.method == 'POST': from .importer import import_url_list, import_distill_io_json @@ -742,7 +758,7 @@ def changedetection_app(config=None, datastore_o=None): if request.values.get('urls') and len(request.values.get('urls').strip()): # Import and push into the queue for immediate update check importer = import_url_list() - importer.run(data=request.values.get('urls'), flash=flash, datastore=datastore) + importer.run(data=request.values.get('urls'), flash=flash, datastore=datastore, processor=request.values.get('processor')) for uuid in importer.new_uuids: update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True})) @@ -760,9 +776,12 @@ def changedetection_app(config=None, datastore_o=None): update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True})) - + form = forms.importForm(formdata=request.form if request.method == 'POST' else None, +# data=default, + ) # Could be some remaining, or we could be on GET output = render_template("import.html", + form=form, import_url_list_remaining="\n".join(remaining_urls), original_distill_json='' ) @@ -1126,7 +1145,8 @@ def changedetection_app(config=None, datastore_o=None): return redirect(url_for('index')) add_paused = request.form.get('edit_and_watch_submit_button') != None - new_uuid = datastore.add_watch(url=url, tag=request.form.get('tag').strip(), extras={'paused': add_paused}) + processor = request.form.get('processor', 'text_json_diff') + new_uuid = datastore.add_watch(url=url, tag=request.form.get('tag').strip(), extras={'paused': add_paused, 'processor': processor}) if new_uuid: if add_paused: diff --git a/changedetectionio/content_fetcher.py b/changedetectionio/content_fetcher.py index 475e90c5..1b31aef3 100644 --- a/changedetectionio/content_fetcher.py +++ b/changedetectionio/content_fetcher.py @@ -78,18 +78,18 @@ class ReplyWithContentButNoText(Exception): return class Fetcher(): - error = None - status_code = None - content = None - headers = None browser_steps = None browser_steps_screenshot_path = None - + content = None + error = None fetcher_description = "No description" + headers = None + status_code = None webdriver_js_execute_code = None - xpath_element_js = "" - xpath_data = None + xpath_element_js = "" + instock_data = None + instock_data_js = "" # Will be needed in the future by the VisualSelector, always get this where possible. screenshot = False @@ -103,6 +103,7 @@ class Fetcher(): from pkg_resources import resource_string # The code that scrapes elements and makes a list of elements/size/position to click on in the VisualSelector self.xpath_element_js = resource_string(__name__, "res/xpath_element_scraper.js").decode('utf-8') + self.instock_data_js = resource_string(__name__, "res/stock-not-in-stock.js").decode('utf-8') @abstractmethod @@ -373,7 +374,6 @@ class base_html_playwright(Fetcher): raise EmptyReply(url=url, status_code=response.status) self.status_code = response.status - self.content = self.page.content() self.headers = response.all_headers() # So we can find an element on the page where its selector was entered manually (maybe not xPath etc) @@ -383,6 +383,7 @@ class base_html_playwright(Fetcher): self.page.evaluate("var include_filters=''") self.xpath_data = self.page.evaluate("async () => {" + self.xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors) + "}") + self.instock_data = self.page.evaluate("async () => {" + self.instock_data_js + "}") # Bug 3 in Playwright screenshot handling # Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it diff --git a/changedetectionio/fetchers/__init__.py b/changedetectionio/fetchers/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/changedetectionio/forms.py b/changedetectionio/forms.py index be0c78cc..9c086e37 100644 --- a/changedetectionio/forms.py +++ b/changedetectionio/forms.py @@ -344,13 +344,15 @@ class ValidateCSSJSONXPATHInput(object): raise ValidationError("A system-error occurred when validating your jq expression") class quickWatchForm(Form): + from . import processors + url = fields.URLField('URL', validators=[validateURL()]) tag = StringField('Group tag', [validators.Optional()]) watch_submit_button = SubmitField('Watch', render_kw={"class": "pure-button pure-button-primary"}) + processor = RadioField(u'Processor', choices=processors.available_processors(), default="text_json_diff") edit_and_watch_submit_button = SubmitField('Edit > Watch', render_kw={"class": "pure-button pure-button-primary"}) - # Common to a single watch and the global settings class commonSettingsForm(Form): notification_urls = StringListField('Notification URL List', validators=[validators.Optional(), ValidateAppRiseServers()]) @@ -361,6 +363,10 @@ class commonSettingsForm(Form): extract_title_as_title = BooleanField('Extract from document and use as watch title', default=False) webdriver_delay = IntegerField('Wait seconds before extracting text', validators=[validators.Optional(), validators.NumberRange(min=1, message="Should contain one or more seconds")]) +class importForm(Form): + from . import processors + processor = RadioField(u'Processor', choices=processors.available_processors(), default="text_json_diff") + urls = TextAreaField('URLs') class SingleBrowserStep(Form): @@ -394,6 +400,8 @@ class watchForm(commonSettingsForm): method = SelectField('Request method', choices=valid_method, default=default_method) ignore_status_codes = BooleanField('Ignore status codes (process non-2xx status codes as normal)', default=False) check_unique_lines = BooleanField('Only trigger when new lines appear', default=False) + in_stock_only = BooleanField('Only trigger when product goes BACK to in-stock', default=True) + trigger_text = StringListField('Trigger/wait for text', [validators.Optional(), ValidateListRegex()]) if os.getenv("PLAYWRIGHT_DRIVER_URL"): browser_steps = FieldList(FormField(SingleBrowserStep), min_entries=10) diff --git a/changedetectionio/importer.py b/changedetectionio/importer.py index 3668b356..d49706a1 100644 --- a/changedetectionio/importer.py +++ b/changedetectionio/importer.py @@ -29,6 +29,7 @@ class import_url_list(Importer): data, flash, datastore, + processor=None ): urls = data.split("\n") @@ -52,7 +53,11 @@ class import_url_list(Importer): # Flask wtform validators wont work with basic auth, use validators package # Up to 5000 per batch so we dont flood the server if len(url) and validators.url(url.replace('source:', '')) and good < 5000: - new_uuid = datastore.add_watch(url=url.strip(), tag=tags, write_to_disk_now=False) + extras = None + if processor: + extras = {'processor': processor} + new_uuid = datastore.add_watch(url=url.strip(), tag=tags, write_to_disk_now=False, extras=extras) + if new_uuid: # Straight into the queue. self.new_uuids.append(new_uuid) diff --git a/changedetectionio/model/Watch.py b/changedetectionio/model/Watch.py index d25837e9..1901e658 100644 --- a/changedetectionio/model/Watch.py +++ b/changedetectionio/model/Watch.py @@ -23,12 +23,14 @@ base_config = { 'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine. 'extract_text': [], # Extract text by regex after filters 'extract_title_as_title': False, - 'fetch_backend': 'system', + 'fetch_backend': 'system', # plaintext, playwright etc + 'processor': 'text_json_diff', # could be restock_diff or others from .processors 'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')), 'has_ldjson_price_data': None, 'track_ldjson_price_data': None, 'headers': {}, # Extra headers to send 'ignore_text': [], # List of text to ignore when calculating the comparison checksum + 'in_stock_only' : True, # Only trigger change on going to instock from out-of-stock 'include_filters': [], 'last_checked': 0, 'last_error': False, diff --git a/changedetectionio/processors/README.md b/changedetectionio/processors/README.md new file mode 100644 index 00000000..547ae4e8 --- /dev/null +++ b/changedetectionio/processors/README.md @@ -0,0 +1,11 @@ +# Change detection post-processors + +The concept here is to be able to switch between different domain specific problems to solve. + +- `text_json_diff` The traditional text and JSON comparison handler +- `restock_diff` Only cares about detecting if a product looks like it has some text that suggests that it's out of stock, otherwise assumes that it's in stock. + +Some suggestions for the future + +- `graphical` +- `restock_and_price` - extract price AND stock text \ No newline at end of file diff --git a/changedetectionio/processors/__init__.py b/changedetectionio/processors/__init__.py new file mode 100644 index 00000000..0d90e4c9 --- /dev/null +++ b/changedetectionio/processors/__init__.py @@ -0,0 +1,24 @@ +from abc import abstractmethod +import hashlib + + +class difference_detection_processor(): + + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + @abstractmethod + def run(self, uuid, skip_when_checksum_same=True): + update_obj = {'last_notification_error': False, 'last_error': False} + some_data = 'xxxxx' + update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest() + changed_detected = False + return changed_detected, update_obj, ''.encode('utf-8') + + +def available_processors(): + from . import restock_diff, text_json_diff + x=[('text_json_diff', text_json_diff.name), ('restock_diff', restock_diff.name)] + # @todo Make this smarter with introspection of sorts. + return x diff --git a/changedetectionio/processors/restock_diff.py b/changedetectionio/processors/restock_diff.py new file mode 100644 index 00000000..34b41d62 --- /dev/null +++ b/changedetectionio/processors/restock_diff.py @@ -0,0 +1,125 @@ + +import hashlib +import os +import re +import urllib3 +from . import difference_detection_processor +from changedetectionio import content_fetcher +from copy import deepcopy + +urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) + +name = 'Re-stock detection' +description = 'Detects if the product goes back to in-stock' + +class perform_site_check(difference_detection_processor): + screenshot = None + xpath_data = None + + def __init__(self, *args, datastore, **kwargs): + super().__init__(*args, **kwargs) + self.datastore = datastore + + def run(self, uuid, skip_when_checksum_same=True): + + # DeepCopy so we can be sure we don't accidently change anything by reference + watch = deepcopy(self.datastore.data['watching'].get(uuid)) + + if not watch: + raise Exception("Watch no longer exists.") + + # Protect against file:// access + if re.search(r'^file', watch.get('url', ''), re.IGNORECASE) and not os.getenv('ALLOW_FILE_URI', False): + raise Exception( + "file:// type access is denied for security reasons." + ) + + # Unset any existing notification error + update_obj = {'last_notification_error': False, 'last_error': False} + extra_headers = watch.get('headers', []) + + # Tweak the base config with the per-watch ones + request_headers = deepcopy(self.datastore.data['settings']['headers']) + request_headers.update(extra_headers) + + # https://github.com/psf/requests/issues/4525 + # Requests doesnt yet support brotli encoding, so don't put 'br' here, be totally sure that the user cannot + # do this by accident. + if 'Accept-Encoding' in request_headers and "br" in request_headers['Accept-Encoding']: + request_headers['Accept-Encoding'] = request_headers['Accept-Encoding'].replace(', br', '') + + timeout = self.datastore.data['settings']['requests'].get('timeout') + + url = watch.link + + request_body = self.datastore.data['watching'][uuid].get('body') + request_method = self.datastore.data['watching'][uuid].get('method') + ignore_status_codes = self.datastore.data['watching'][uuid].get('ignore_status_codes', False) + + # Pluggable content fetcher + prefer_backend = watch.get_fetch_backend + if not prefer_backend or prefer_backend == 'system': + prefer_backend = self.datastore.data['settings']['application']['fetch_backend'] + + if hasattr(content_fetcher, prefer_backend): + klass = getattr(content_fetcher, prefer_backend) + else: + # If the klass doesnt exist, just use a default + klass = getattr(content_fetcher, "html_requests") + + proxy_id = self.datastore.get_preferred_proxy_for_watch(uuid=uuid) + proxy_url = None + if proxy_id: + proxy_url = self.datastore.proxy_list.get(proxy_id).get('url') + print("UUID {} Using proxy {}".format(uuid, proxy_url)) + + fetcher = klass(proxy_override=proxy_url) + + # Configurable per-watch or global extra delay before extracting text (for webDriver types) + system_webdriver_delay = self.datastore.data['settings']['application'].get('webdriver_delay', None) + if watch['webdriver_delay'] is not None: + fetcher.render_extract_delay = watch.get('webdriver_delay') + elif system_webdriver_delay is not None: + fetcher.render_extract_delay = system_webdriver_delay + + # Could be removed if requests/plaintext could also return some info? + if prefer_backend != 'html_webdriver': + raise Exception("Re-stock detection requires Chrome or compatible webdriver/playwright fetcher to work") + + if watch.get('webdriver_js_execute_code') is not None and watch.get('webdriver_js_execute_code').strip(): + fetcher.webdriver_js_execute_code = watch.get('webdriver_js_execute_code') + + fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, watch.get('include_filters')) + fetcher.quit() + + self.screenshot = fetcher.screenshot + self.xpath_data = fetcher.xpath_data + + # Track the content type + update_obj['content_type'] = fetcher.headers.get('Content-Type', '') + update_obj["last_check_status"] = fetcher.get_last_status_code() + + # Main detection method + fetched_md5 = None + if fetcher.instock_data: + fetched_md5 = hashlib.md5(fetcher.instock_data.encode('utf-8')).hexdigest() + # 'Possibly in stock' comes from stock-not-in-stock.js when no string found above the fold. + update_obj["in_stock"] = True if fetcher.instock_data == 'Possibly in stock' else False + + + # The main thing that all this at the moment comes down to :) + changed_detected = False + + if watch.get('previous_md5') and watch.get('previous_md5') != fetched_md5: + # Yes if we only care about it going to instock, AND we are in stock + if watch.get('in_stock_only') and update_obj["in_stock"]: + changed_detected = True + + if not watch.get('in_stock_only'): + # All cases + changed_detected = True + + # Always record the new checksum + update_obj["previous_md5"] = fetched_md5 + + return changed_detected, update_obj, fetcher.instock_data.encode('utf-8') diff --git a/changedetectionio/fetchers/text_json_diff.py b/changedetectionio/processors/text_json_diff.py similarity index 98% rename from changedetectionio/fetchers/text_json_diff.py rename to changedetectionio/processors/text_json_diff.py index 9faea305..14ce14f3 100644 --- a/changedetectionio/fetchers/text_json_diff.py +++ b/changedetectionio/processors/text_json_diff.py @@ -10,10 +10,14 @@ import urllib3 from changedetectionio import content_fetcher, html_tools from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT from copy import deepcopy +from . import difference_detection_processor urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) +name = 'Webpage Text/HTML, JSON and PDF changes' +description = 'Detects all text changes where possible' + class FilterNotFoundInResponse(ValueError): def __init__(self, msg): ValueError.__init__(self, msg) @@ -25,7 +29,7 @@ class PDFToHTMLToolNotFound(ValueError): # Some common stuff here that can be moved to a base class # (set_proxy_from_list) -class perform_site_check(): +class perform_site_check(difference_detection_processor): screenshot = None xpath_data = None @@ -55,7 +59,7 @@ class perform_site_check(): watch = deepcopy(self.datastore.data['watching'].get(uuid)) if not watch: - return + raise Exception("Watch no longer exists.") # Protect against file:// access if re.search(r'^file', watch.get('url', ''), re.IGNORECASE) and not os.getenv('ALLOW_FILE_URI', False): diff --git a/changedetectionio/res/stock-not-in-stock.js b/changedetectionio/res/stock-not-in-stock.js new file mode 100644 index 00000000..8973e7da --- /dev/null +++ b/changedetectionio/res/stock-not-in-stock.js @@ -0,0 +1,56 @@ +function isItemInStock() { + // @todo Pass these in so the same list can be used in non-JS fetchers + const outOfStockTexts = [ + '0 in stock', + 'agotado', + 'artikel zurzeit vergriffen', + 'available for back order', + 'backordered', + 'brak w magazynie', + 'brak na stanie', + 'coming soon', + 'currently unavailable', + 'en rupture de stock', + 'as soon as stock is available', + 'message if back in stock', + 'nachricht bei', + 'nicht auf lager', + 'nicht lieferbar', + 'nicht zur verfügung', + 'no disponible temporalmente', + 'not in stock', + 'out of stock', + 'out-of-stock', + 'não estamos a aceitar encomendas', + 'produkt niedostępny', + 'no longer in stock', + 'sold out', + 'temporarily out of stock', + 'temporarily unavailable', + 'we do not currently have an estimate of when this product will be back in stock.', + 'zur zeit nicht an lager', + ]; + + const elementsWithZeroChildren = Array.from(document.getElementsByTagName('*')).filter(element => element.children.length === 0); + for (let i = elementsWithZeroChildren.length - 1; i >= 0; i--) { + const element = elementsWithZeroChildren[i]; + if (element.offsetWidth > 0 || element.offsetHeight > 0 || element.getClientRects().length > 0) { + var elementText=""; + if (element.tagName.toLowerCase() === "input") { + elementText = element.value.toLowerCase(); + } else { + elementText = element.textContent.toLowerCase(); + } + + for (const outOfStockText of outOfStockTexts) { + if (elementText.includes(outOfStockText)) { + return elementText; // item is out of stock + } + } + } + } + return 'Possibly in stock'; // possibly in stock, cant decide otherwise. +} + +// returns the element text that makes it think it's out of stock +return isItemInStock(); \ No newline at end of file diff --git a/changedetectionio/static/styles/scss/styles.scss b/changedetectionio/static/styles/scss/styles.scss index 213e8c5f..4ed3412f 100644 --- a/changedetectionio/static/styles/scss/styles.scss +++ b/changedetectionio/static/styles/scss/styles.scss @@ -241,6 +241,10 @@ body:before { font-size: 85%; } +.button-xsmall { + font-size: 70%; +} + .fetch-error { padding-top: 1em; font-size: 80%; @@ -1044,3 +1048,27 @@ ul { vertical-align: middle; } + +#quick-watch-processor-type { + color: #fff; + ul { + padding: 0.3rem; + li { + list-style: none; +} + } + +} +.restock-label { + &.in-stock { + background-color: var(--color-background-button-green); + color: #fff; + } + &.not-in-stock { + background-color: var(--color-background-button-cancel); + color: #777; + } + padding: 3px; + border-radius: 3px; + white-space: nowrap; +} diff --git a/changedetectionio/static/styles/styles.css b/changedetectionio/static/styles/styles.css index d942f6f7..c8497847 100644 --- a/changedetectionio/static/styles/styles.css +++ b/changedetectionio/static/styles/styles.css @@ -432,6 +432,9 @@ body:before { .button-small { font-size: 85%; } +.button-xsmall { + font-size: 70%; } + .fetch-error { padding-top: 1em; font-size: 80%; @@ -980,3 +983,21 @@ ul { display: inline-block; height: 0.8rem; vertical-align: middle; } + +#quick-watch-processor-type { + color: #fff; } + #quick-watch-processor-type ul { + padding: 0.3rem; } + #quick-watch-processor-type ul li { + list-style: none; } + +.restock-label { + padding: 3px; + border-radius: 3px; + white-space: nowrap; } + .restock-label.in-stock { + background-color: var(--color-background-button-green); + color: #fff; } + .restock-label.not-in-stock { + background-color: var(--color-background-button-cancel); + color: #777; } diff --git a/changedetectionio/store.py b/changedetectionio/store.py index ad16de93..9ffadf9c 100644 --- a/changedetectionio/store.py +++ b/changedetectionio/store.py @@ -287,6 +287,7 @@ class ChangeDetectionStore: 'method', 'paused', 'previous_md5', + 'processor', 'subtractive_selectors', 'tag', 'text_should_not_be_present', diff --git a/changedetectionio/templates/_common_fields.jinja b/changedetectionio/templates/_common_fields.jinja index 5c2abf29..fe88741a 100644 --- a/changedetectionio/templates/_common_fields.jinja +++ b/changedetectionio/templates/_common_fields.jinja @@ -20,11 +20,11 @@ </ul> </div> <div class="notifications-wrapper"> - <a id="send-test-notification" class="pure-button button-secondary button-xsmall" style="font-size: 70%">Send test notification</a> + <a id="send-test-notification" class="pure-button button-secondary button-xsmall" >Send test notification</a> {% if emailprefix %} - <a id="add-email-helper" class="pure-button button-secondary button-xsmall" style="font-size: 70%">Add email</a> + <a id="add-email-helper" class="pure-button button-secondary button-xsmall" >Add email</a> {% endif %} - <a href="{{url_for('notification_logs')}}" class="pure-button button-secondary button-xsmall" style="font-size: 70%">Notification debug logs</a> + <a href="{{url_for('notification_logs')}}" class="pure-button button-secondary button-xsmall" >Notification debug logs</a> </div> </div> <div id="notification-customisation" class="pure-control-group"> diff --git a/changedetectionio/templates/edit.html b/changedetectionio/templates/edit.html index cf9b1938..298b8c90 100644 --- a/changedetectionio/templates/edit.html +++ b/changedetectionio/templates/edit.html @@ -34,8 +34,15 @@ {% if playwright_enabled %} <li class="tab"><a id="browsersteps-tab" href="#browser-steps">Browser Steps</a></li> {% endif %} + + {% if watch['processor'] == 'text_json_diff' %} <li class="tab"><a id="visualselector-tab" href="#visualselector">Visual Filter Selector</a></li> <li class="tab"><a href="#filters-and-triggers">Filters & Triggers</a></li> + {% endif %} + + {% if watch['processor'] == 'restock_diff' %} + <li class="tab"><a href="#restock">Restock Detection</a></li> + {% endif %} <li class="tab"><a href="#notifications">Notifications</a></li> </ul> </div> @@ -51,6 +58,16 @@ {{ render_field(form.url, placeholder="https://...", required=true, class="m-d") }} <span class="pure-form-message-inline">Some sites use JavaScript to create the content, for this you should <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">use the Chrome/WebDriver Fetcher</a></span><br> <span class="pure-form-message-inline">You can use variables in the URL, perfect for inserting the current date and other logic, <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">help and examples here</a></span><br> + <span class="pure-form-message-inline"> + {% if watch['processor'] == 'text_json_diff' %} + Current mode: <strong>Webpage Text/HTML, JSON and PDF changes.</strong><br> + <a href="{{url_for('edit_page', uuid=uuid)}}?switch_processor=restock_diff" class="pure-button button-xsmall">Switch to re-stock detection mode.</a> + {% else %} + Current mode: <strong>Re-stock detection.</strong><br> + <a href="{{url_for('edit_page', uuid=uuid)}}?switch_processor=text_json_diff" class="pure-button button-xsmall">Switch to Webpage Text/HTML, JSON and PDF changes mode.</a> + {% endif %} + </span> + </div> <div class="pure-control-group"> {{ render_field(form.title, class="m-d") }} @@ -214,6 +231,7 @@ User-Agent: wonderbra 1.0") }} </fieldset> </div> + {% if watch['processor'] == 'text_json_diff' %} <div class="tab-pane-inner" id="filters-and-triggers"> <div class="pure-control-group"> <strong>Pro-tips:</strong><br> @@ -345,7 +363,20 @@ Unavailable") }} </div> </fieldset> </div> + {% endif %} + {% if watch['processor'] == 'restock_diff' %} + <div class="tab-pane-inner" id="restock"> + <fieldset> + <div class="pure-control-group"> + {{ render_checkbox_field(form.in_stock_only) }} + <span class="pure-form-message-inline">Only trigger notifications when page changes from <strong>out of stock</strong> to <strong>back in stock</strong></span> + </div> + </fieldset> + </div> + {% endif %} + + {% if watch['processor'] == 'text_json_diff' %} <div class="tab-pane-inner visual-selector-ui" id="visualselector"> <img class="beta-logo" src="{{url_for('static_content', group='images', filename='beta-logo.png')}}"> @@ -378,6 +409,7 @@ Unavailable") }} </div> </fieldset> </div> + {% endif %} <div id="actions"> <div class="pure-control-group"> diff --git a/changedetectionio/templates/import.html b/changedetectionio/templates/import.html index 6034f1ce..d82ef0c4 100644 --- a/changedetectionio/templates/import.html +++ b/changedetectionio/templates/import.html @@ -1,5 +1,6 @@ {% extends 'base.html' %} {% block content %} +{% from '_helpers.jinja' import render_field %} <script type="text/javascript" src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script> <div class="edit-form monospaced-textarea"> @@ -14,7 +15,6 @@ <form class="pure-form pure-form-aligned" action="{{url_for('import_page')}}" method="POST"> <input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/> <div class="tab-pane-inner" id="url-list"> - <fieldset class="pure-group"> <legend> Enter one URL per line, and optionally add tags for each URL after a space, delineated by comma (,): @@ -23,7 +23,7 @@ <br> URLs which do not pass validation will stay in the textarea. </legend> - + {{ render_field(form.processor, class="processor") }} <textarea name="urls" class="pure-input-1-2" placeholder="https://" style="width: 100%; @@ -31,15 +31,17 @@ white-space: pre; overflow-wrap: normal; overflow-x: scroll;" rows="25">{{ import_url_list_remaining }}</textarea> - </fieldset> +<div id="quick-watch-processor-type"> + + </div> </div> <div class="tab-pane-inner" id="distill-io"> - <fieldset class="pure-group"> + <legend> Copy and Paste your Distill.io watch 'export' file, this should be a JSON file.<br> This is <i>experimental</i>, supported fields are <code>name</code>, <code>uri</code>, <code>tags</code>, <code>config:selections</code>, the rest (including <code>schedule</code>) are ignored. @@ -75,7 +77,7 @@ ] } " rows="25">{{ original_distill_json }}</textarea> - </fieldset> + </div> <button type="submit" class="pure-button pure-input-1-2 pure-button-primary">Import</button> </form> diff --git a/changedetectionio/templates/watch-overview.html b/changedetectionio/templates/watch-overview.html index 8d6e70f9..24027ff4 100644 --- a/changedetectionio/templates/watch-overview.html +++ b/changedetectionio/templates/watch-overview.html @@ -21,6 +21,10 @@ {{ render_simple_field(form.edit_and_watch_submit_button, title="Edit first then Watch") }} </div> </div> + <div id="quick-watch-processor-type"> + {{ render_simple_field(form.processor, title="Edit first then Watch") }} + </div> + </fieldset> <span style="color:#eee; font-size: 80%;"><img style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread-white.svg')}}" /> Tip: You can also add 'shared' watches. <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Sharing-a-Watch">More info</a></a></span> </form> @@ -28,12 +32,12 @@ <form class="pure-form" action="{{ url_for('form_watch_list_checkbox_operations') }}" method="POST" id="watch-list-form"> <input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/> <div id="checkbox-operations"> - <button class="pure-button button-secondary button-xsmall" style="font-size: 70%" name="op" value="pause">Pause</button> - <button class="pure-button button-secondary button-xsmall" style="font-size: 70%" name="op" value="unpause">UnPause</button> - <button class="pure-button button-secondary button-xsmall" style="font-size: 70%" name="op" value="mute">Mute</button> - <button class="pure-button button-secondary button-xsmall" style="font-size: 70%" name="op" value="unmute">UnMute</button> - <button class="pure-button button-secondary button-xsmall" style="font-size: 70%" name="op" value="recheck">Recheck</button> - <button class="pure-button button-secondary button-xsmall" style="font-size: 70%" name="op" value="notification-default">Use default notification</button> + <button class="pure-button button-secondary button-xsmall" name="op" value="pause">Pause</button> + <button class="pure-button button-secondary button-xsmall" name="op" value="unpause">UnPause</button> + <button class="pure-button button-secondary button-xsmall" name="op" value="mute">Mute</button> + <button class="pure-button button-secondary button-xsmall" name="op" value="unmute">UnMute</button> + <button class="pure-button button-secondary button-xsmall" name="op" value="recheck">Recheck</button> + <button class="pure-button button-secondary button-xsmall" name="op" value="notification-default">Use default notification</button> <button class="pure-button button-secondary button-xsmall" style="background: #dd4242; font-size: 70%" name="op" value="delete">Delete</button> </div> <div> @@ -72,7 +76,7 @@ {% if not ( loop.index >= 3 and loop.index <=4) %}{% continue %}{% endif %} --> #} <tr id="{{ watch.uuid }}" - class="{{ loop.cycle('pure-table-odd', 'pure-table-even') }} + class="{{ loop.cycle('pure-table-odd', 'pure-table-even') }} processor-{{ watch['processor'] }} {% if watch.last_error is defined and watch.last_error != False %}error{% endif %} {% if watch.last_notification_error is defined and watch.last_notification_error != False %}error{% endif %} {% if watch.paused is defined and watch.paused != False %}paused{% endif %} @@ -113,12 +117,26 @@ {% if watch.last_notification_error is defined and watch.last_notification_error != False %} <div class="fetch-error notification-error"><a href="{{url_for('notification_logs')}}">{{ watch.last_notification_error }}</a></div> {% endif %} - {% if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data'] %} - <div class="ldjson-price-track-offer">Embedded price data detected, follow only price data? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div> + + {% if watch['processor'] == 'text_json_diff' %} + {% if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data'] %} + <div class="ldjson-price-track-offer">Embedded price data detected, follow only price data? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div> + {% endif %} + {% if watch['track_ldjson_price_data'] == 'accepted' %} + <span class="tracking-ldjson-price-data" title="Automatically following embedded price information"><img src="{{url_for('static_content', group='images', filename='price-tag-icon.svg')}}" class="status-icon price-follow-tag-icon"/> Price</span> + {% endif %} {% endif %} - {% if watch['track_ldjson_price_data'] == 'accepted' %} - <span class="tracking-ldjson-price-data" title="Automatically following embedded price information"><img src="{{url_for('static_content', group='images', filename='price-tag-icon.svg')}}" class="status-icon price-follow-tag-icon"/> Price</span> + + {% if watch['processor'] == 'restock_diff' %} + <span class="restock-label {{'in-stock' if watch['in_stock'] else 'not-in-stock' }}" title="detecting restock conditions"> + <!-- maybe some object watch['processor'][restock_diff] or.. --> + {% if watch['last_checked'] %} + {% if watch['in_stock'] %} In stock {% else %} Not in stock {% endif %} + {% else %} + Not yet checked + {% endif %} {% endif %} + {% if not active_tag %} <span class="watch-tag-list">{{ watch.tag}}</span> {% endif %} diff --git a/changedetectionio/tests/restock/__init__.py b/changedetectionio/tests/restock/__init__.py new file mode 100644 index 00000000..085b3d78 --- /dev/null +++ b/changedetectionio/tests/restock/__init__.py @@ -0,0 +1,2 @@ +"""Tests for the app.""" + diff --git a/changedetectionio/tests/restock/conftest.py b/changedetectionio/tests/restock/conftest.py new file mode 100644 index 00000000..430513d4 --- /dev/null +++ b/changedetectionio/tests/restock/conftest.py @@ -0,0 +1,3 @@ +#!/usr/bin/python3 + +from .. import conftest diff --git a/changedetectionio/tests/restock/test_restock.py b/changedetectionio/tests/restock/test_restock.py new file mode 100644 index 00000000..7711f247 --- /dev/null +++ b/changedetectionio/tests/restock/test_restock.py @@ -0,0 +1,106 @@ +#!/usr/bin/python3 +import os +import time +from flask import url_for +from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client +from changedetectionio.notification import ( + default_notification_body, + default_notification_format, + default_notification_title, + valid_notification_formats, +) + + +def set_original_response(): + test_return_data = """<html> + <body> + Some initial text<br> + <p>Which is across multiple lines</p> + <br> + So let's see what happens. <br> + <div>price: $10.99</div> + <div id="sametext">Out of stock</div> + </body> + </html> + """ + + with open("test-datastore/endpoint-content.txt", "w") as f: + f.write(test_return_data) + return None + + + +def set_back_in_stock_response(): + test_return_data = """<html> + <body> + Some initial text<br> + <p>Which is across multiple lines</p> + <br> + So let's see what happens. <br> + <div>price: $10.99</div> + <div id="sametext">Available!</div> + </body> + </html> + """ + + with open("test-datastore/endpoint-content.txt", "w") as f: + f.write(test_return_data) + return None + +# Add a site in paused mode, add an invalid filter, we should still have visual selector data ready +def test_restock_detection(client, live_server): + + set_original_response() + #assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test" + + time.sleep(1) + live_server_setup(live_server) + ##################### + notification_url = url_for('test_notification_endpoint', _external=True).replace('http://localhost', 'http://changedet').replace('http', 'json') + + + ##################### + # Set this up for when we remove the notification from the watch, it should fallback with these details + res = client.post( + url_for("settings_page"), + data={"application-notification_urls": notification_url, + "application-notification_title": "fallback-title "+default_notification_title, + "application-notification_body": "fallback-body "+default_notification_body, + "application-notification_format": default_notification_format, + "requests-time_between_check-minutes": 180, + 'application-fetch_backend': "html_webdriver"}, + follow_redirects=True + ) + # Add our URL to the import page, because the docker container (playwright/selenium) wont be able to connect to our usual test url + test_url = url_for('test_endpoint', _external=True).replace('http://localhost', 'http://changedet') + + + client.post( + url_for("form_quick_watch_add"), + data={"url": test_url, "tag": '', 'processor': 'restock_diff'}, + follow_redirects=True + ) + + # Is it correctly show as NOT in stock? + wait_for_all_checks(client) + res = client.get(url_for("index")) + assert b'not-in-stock' in res.data + + # Is it correctly shown as in stock + set_back_in_stock_response() + client.get(url_for("form_watch_checknow"), follow_redirects=True) + wait_for_all_checks(client) + res = client.get(url_for("index")) + assert b'not-in-stock' not in res.data + + # We should have a notification + time.sleep(2) + assert os.path.isfile("test-datastore/notification.txt") + os.unlink("test-datastore/notification.txt") + + # Default behaviour is to only fire notification when it goes OUT OF STOCK -> IN STOCK + # So here there should be no file, because we go IN STOCK -> OUT OF STOCK + set_original_response() + client.get(url_for("form_watch_checknow"), follow_redirects=True) + wait_for_all_checks(client) + assert not os.path.isfile("test-datastore/notification.txt") diff --git a/changedetectionio/tests/test_ignore_regex_text.py b/changedetectionio/tests/test_ignore_regex_text.py index e9b69a9f..e21ff050 100644 --- a/changedetectionio/tests/test_ignore_regex_text.py +++ b/changedetectionio/tests/test_ignore_regex_text.py @@ -9,7 +9,7 @@ def test_setup(live_server): # Unit test of the stripper # Always we are dealing in utf-8 def test_strip_regex_text_func(): - from ..fetchers import text_json_diff as fetch_site_status + from ..processors import text_json_diff as fetch_site_status test_content = """ but sometimes we want to remove the lines. diff --git a/changedetectionio/tests/test_ignore_text.py b/changedetectionio/tests/test_ignore_text.py index 288dffc9..2d64f369 100644 --- a/changedetectionio/tests/test_ignore_text.py +++ b/changedetectionio/tests/test_ignore_text.py @@ -11,7 +11,8 @@ def test_setup(live_server): # Unit test of the stripper # Always we are dealing in utf-8 def test_strip_text_func(): - from ..fetchers import text_json_diff as fetch_site_status + from ..processors import text_json_diff as fetch_site_status + test_content = """ Some content diff --git a/changedetectionio/update_worker.py b/changedetectionio/update_worker.py index 3dd1059c..0667525f 100644 --- a/changedetectionio/update_worker.py +++ b/changedetectionio/update_worker.py @@ -4,7 +4,8 @@ import queue import time from changedetectionio import content_fetcher -from .fetchers.text_json_diff import FilterNotFoundInResponse +from .processors.text_json_diff import FilterNotFoundInResponse + # A single update worker # @@ -152,9 +153,8 @@ class update_worker(threading.Thread): os.unlink(full_path) def run(self): - from .fetchers import text_json_diff as fetch_site_status - update_handler = fetch_site_status.perform_site_check(datastore=self.datastore) + from .processors import text_json_diff, restock_diff while not self.app.config.exit.is_set(): @@ -171,11 +171,21 @@ class update_worker(threading.Thread): changed_detected = False contents = b'' process_changedetection_results = True - update_obj= {} - print("> Processing UUID {} Priority {} URL {}".format(uuid, queued_item_data.priority, self.datastore.data['watching'][uuid]['url'])) + update_obj = {} + print("> Processing UUID {} Priority {} URL {}".format(uuid, queued_item_data.priority, + self.datastore.data['watching'][uuid]['url'])) now = time.time() try: + processor = self.datastore.data['watching'][uuid].get('processor','text_json_diff') + + # @todo some way to switch by name + if processor == 'restock_diff': + update_handler = restock_diff.perform_site_check(datastore=self.datastore) + else: + # Used as a default and also by some tests + update_handler = text_json_diff.perform_site_check(datastore=self.datastore) + changed_detected, update_obj, contents = update_handler.run(uuid, skip_when_checksum_same=queued_item_data.item.get('skip_when_checksum_same')) # Re #342 # In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes. diff --git a/docs/screenshot.png b/docs/screenshot.png index 8ecabc4f..e6d35826 100644 Binary files a/docs/screenshot.png and b/docs/screenshot.png differ diff --git a/requirements.txt b/requirements.txt index 9b4a2baf..03528821 100644 --- a/requirements.txt +++ b/requirements.txt @@ -68,5 +68,5 @@ pillow # playwright is installed at Dockerfile build time because it's not available on all platforms # Include pytest, so if theres a support issue we can ask them to run these tests on their setup -pytest ~=6.2 +pytest ~=7.2 pytest-flask ~=1.2