From 1749c07750c3ae6edb0dad7a8174c733620e3b29 Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Fri, 12 Jan 2024 23:11:56 +0100 Subject: [PATCH] Restock detection - Check all elements for text to get stock status from, only consider elements inside the viewport, only consider elements more than 100px from the top (avoid menu) , trim any text returned (#2040) --- changedetectionio/processors/restock_diff.py | 2 +- changedetectionio/res/stock-not-in-stock.js | 214 ++++++++++--------- 2 files changed, 115 insertions(+), 101 deletions(-) diff --git a/changedetectionio/processors/restock_diff.py b/changedetectionio/processors/restock_diff.py index 9751a195..a590f73d 100644 --- a/changedetectionio/processors/restock_diff.py +++ b/changedetectionio/processors/restock_diff.py @@ -61,4 +61,4 @@ class perform_site_check(difference_detection_processor): # Always record the new checksum update_obj["previous_md5"] = fetched_md5 - return changed_detected, update_obj, self.fetcher.instock_data.encode('utf-8') + return changed_detected, update_obj, self.fetcher.instock_data.encode('utf-8').strip() diff --git a/changedetectionio/res/stock-not-in-stock.js b/changedetectionio/res/stock-not-in-stock.js index 12ba53b3..681cbd58 100644 --- a/changedetectionio/res/stock-not-in-stock.js +++ b/changedetectionio/res/stock-not-in-stock.js @@ -1,117 +1,131 @@ function isItemInStock() { - // @todo Pass these in so the same list can be used in non-JS fetchers - const outOfStockTexts = [ - ' أخبرني عندما يتوفر', - '0 in stock', - 'agotado', - 'article épuisé', - 'artikel zurzeit vergriffen', - 'as soon as stock is available', - 'ausverkauft', // sold out - 'available for back order', - 'back-order or out of stock', - 'backordered', - 'benachrichtigt mich', // notify me - 'brak na stanie', - 'brak w magazynie', - 'coming soon', - 'currently have any tickets for this', - 'currently unavailable', - 'dostępne wkrótce', - 'en rupture de stock', - 'ist derzeit nicht auf lager', - 'item is no longer available', - 'let me know when it\'s available', - 'message if back in stock', - 'nachricht bei', - 'nicht auf lager', - 'nicht lieferbar', - 'nicht zur verfügung', - 'niet beschikbaar', - 'niet leverbaar', - 'no disponible temporalmente', - 'no longer in stock', - 'no tickets available', - 'not available', - 'not currently available', - 'not in stock', - 'notify me when available', - 'não estamos a aceitar encomendas', - 'out of stock', - 'out-of-stock', - 'produkt niedostępny', - 'sold out', - 'sold-out', - 'temporarily out of stock', - 'temporarily unavailable', - 'tickets unavailable', - 'tijdelijk uitverkocht', - 'unavailable tickets', - 'we do not currently have an estimate of when this product will be back in stock.', - 'zur zeit nicht an lager', - '品切れ', - '已售完', - '품절' - ]; + // @todo Pass these in so the same list can be used in non-JS fetchers + const outOfStockTexts = [ + ' أخبرني عندما يتوفر', + '0 in stock', + 'agotado', + 'article épuisé', + 'artikel zurzeit vergriffen', + 'as soon as stock is available', + 'ausverkauft', // sold out + 'available for back order', + 'back-order or out of stock', + 'backordered', + 'benachrichtigt mich', // notify me + 'brak na stanie', + 'brak w magazynie', + 'coming soon', + 'currently have any tickets for this', + 'currently unavailable', + 'dostępne wkrótce', + 'en rupture de stock', + 'ist derzeit nicht auf lager', + 'item is no longer available', + 'let me know when it\'s available', + 'message if back in stock', + 'nachricht bei', + 'nicht auf lager', + 'nicht lieferbar', + 'nicht zur verfügung', + 'niet beschikbaar', + 'niet leverbaar', + 'no disponible temporalmente', + 'no longer in stock', + 'no tickets available', + 'not available', + 'not currently available', + 'not in stock', + 'notify me when available', + 'não estamos a aceitar encomendas', + 'out of stock', + 'out-of-stock', + 'produkt niedostępny', + 'sold out', + 'sold-out', + 'temporarily out of stock', + 'temporarily unavailable', + 'tickets unavailable', + 'tijdelijk uitverkocht', + 'unavailable tickets', + 'we do not currently have an estimate of when this product will be back in stock.', + 'we don\'t know when or if this item will be back in stock.', + 'zur zeit nicht an lager', + '品切れ', + '已售完', + '품절' + ]; + function getElementBaseText(element) { + // .textContent can include text from children which may give the wrong results + // scan only immediate TEXT_NODEs, which will be a child of the element + var text = ""; + for (var i = 0; i < element.childNodes.length; ++i) + if (element.childNodes[i].nodeType === Node.TEXT_NODE) + text += element.childNodes[i].textContent; + return text.toLowerCase().trim(); + } - const negateOutOfStockRegexs = [ - '[0-9] in stock' - ] - var negateOutOfStockRegexs_r = []; - for (let i = 0; i < negateOutOfStockRegexs.length; i++) { - negateOutOfStockRegexs_r.push(new RegExp(negateOutOfStockRegexs[0], 'g')); - } + const negateOutOfStockRegexs = [ + '[0-9] in stock' + ] + var negateOutOfStockRegexs_r = []; + for (let i = 0; i < negateOutOfStockRegexs.length; i++) { + negateOutOfStockRegexs_r.push(new RegExp(negateOutOfStockRegexs[0], 'g')); + } + // The out-of-stock or in-stock-text is generally always above-the-fold + // and often below-the-fold is a list of related products that may or may not contain trigger text + // so it's good to filter to just the 'above the fold' elements + // and it should be atleast 100px from the top to ignore items in the toolbar, sometimes menu items like "Coming soon" exist + const elementsToScan = Array.from(document.getElementsByTagName('*')).filter(element => element.getBoundingClientRect().top + window.scrollY <= window.innerHeight && element.getBoundingClientRect().top + window.scrollY >= 100); - const elementsWithZeroChildren = Array.from(document.getElementsByTagName('*')).filter(element => element.children.length === 0); + var elementText = ""; - // REGEXS THAT REALLY MEAN IT'S IN STOCK - for (let i = elementsWithZeroChildren.length - 1; i >= 0; i--) { - const element = elementsWithZeroChildren[i]; - if (element.offsetWidth > 0 || element.offsetHeight > 0 || element.getClientRects().length > 0) { - var elementText=""; - if (element.tagName.toLowerCase() === "input") { - elementText = element.value.toLowerCase(); - } else { - elementText = element.textContent.toLowerCase(); - } + // REGEXS THAT REALLY MEAN IT'S IN STOCK + for (let i = elementsToScan.length - 1; i >= 0; i--) { + const element = elementsToScan[i]; + elementText = ""; + if (element.tagName.toLowerCase() === "input") { + elementText = element.value.toLowerCase(); + } else { + elementText = getElementBaseText(element); + } - if (elementText.length) { - // try which ones could mean its in stock - for (let i = 0; i < negateOutOfStockRegexs.length; i++) { - if (negateOutOfStockRegexs_r[i].test(elementText)) { - return 'Possibly in stock'; - } + if (elementText.length) { + // try which ones could mean its in stock + for (let i = 0; i < negateOutOfStockRegexs.length; i++) { + if (negateOutOfStockRegexs_r[i].test(elementText)) { + return 'Possibly in stock'; + } + } } - } } - } - // OTHER STUFF THAT COULD BE THAT IT'S OUT OF STOCK - for (let i = elementsWithZeroChildren.length - 1; i >= 0; i--) { - const element = elementsWithZeroChildren[i]; - if (element.offsetWidth > 0 || element.offsetHeight > 0 || element.getClientRects().length > 0) { - var elementText=""; - if (element.tagName.toLowerCase() === "input") { - elementText = element.value.toLowerCase(); - } else { - elementText = element.textContent.toLowerCase(); - } + // OTHER STUFF THAT COULD BE THAT IT'S OUT OF STOCK + for (let i = elementsToScan.length - 1; i >= 0; i--) { + const element = elementsToScan[i]; + if (element.offsetWidth > 0 || element.offsetHeight > 0 || element.getClientRects().length > 0) { + elementText = ""; + if (element.tagName.toLowerCase() === "input") { + elementText = element.value.toLowerCase(); + } else { + elementText = getElementBaseText(element); + } - if (elementText.length) { - // and these mean its out of stock - for (const outOfStockText of outOfStockTexts) { - if (elementText.includes(outOfStockText)) { - return elementText; // item is out of stock - } + if (elementText.length) { + // and these mean its out of stock + for (const outOfStockText of outOfStockTexts) { + if (elementText.includes(outOfStockText)) { + return outOfStockText; // item is out of stock + } + } + } } - } } - } - return 'Possibly in stock'; // possibly in stock, cant decide otherwise. + return 'Possibly in stock'; // possibly in stock, cant decide otherwise. } // returns the element text that makes it think it's out of stock -return isItemInStock(); +return isItemInStock().trim() +