Restock detection - Check all elements for text to get stock status from, only consider elements inside the viewport, only consider elements more than 100px from the top (avoid menu) , trim any text returned (#2040)

pull/2107/head
dgtlmoon 1 year ago committed by GitHub
parent 65428655b8
commit 1749c07750
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -61,4 +61,4 @@ class perform_site_check(difference_detection_processor):
# Always record the new checksum # Always record the new checksum
update_obj["previous_md5"] = fetched_md5 update_obj["previous_md5"] = fetched_md5
return changed_detected, update_obj, self.fetcher.instock_data.encode('utf-8') return changed_detected, update_obj, self.fetcher.instock_data.encode('utf-8').strip()

@ -1,117 +1,131 @@
function isItemInStock() { function isItemInStock() {
// @todo Pass these in so the same list can be used in non-JS fetchers // @todo Pass these in so the same list can be used in non-JS fetchers
const outOfStockTexts = [ const outOfStockTexts = [
' أخبرني عندما يتوفر', ' أخبرني عندما يتوفر',
'0 in stock', '0 in stock',
'agotado', 'agotado',
'article épuisé', 'article épuisé',
'artikel zurzeit vergriffen', 'artikel zurzeit vergriffen',
'as soon as stock is available', 'as soon as stock is available',
'ausverkauft', // sold out 'ausverkauft', // sold out
'available for back order', 'available for back order',
'back-order or out of stock', 'back-order or out of stock',
'backordered', 'backordered',
'benachrichtigt mich', // notify me 'benachrichtigt mich', // notify me
'brak na stanie', 'brak na stanie',
'brak w magazynie', 'brak w magazynie',
'coming soon', 'coming soon',
'currently have any tickets for this', 'currently have any tickets for this',
'currently unavailable', 'currently unavailable',
'dostępne wkrótce', 'dostępne wkrótce',
'en rupture de stock', 'en rupture de stock',
'ist derzeit nicht auf lager', 'ist derzeit nicht auf lager',
'item is no longer available', 'item is no longer available',
'let me know when it\'s available', 'let me know when it\'s available',
'message if back in stock', 'message if back in stock',
'nachricht bei', 'nachricht bei',
'nicht auf lager', 'nicht auf lager',
'nicht lieferbar', 'nicht lieferbar',
'nicht zur verfügung', 'nicht zur verfügung',
'niet beschikbaar', 'niet beschikbaar',
'niet leverbaar', 'niet leverbaar',
'no disponible temporalmente', 'no disponible temporalmente',
'no longer in stock', 'no longer in stock',
'no tickets available', 'no tickets available',
'not available', 'not available',
'not currently available', 'not currently available',
'not in stock', 'not in stock',
'notify me when available', 'notify me when available',
'não estamos a aceitar encomendas', 'não estamos a aceitar encomendas',
'out of stock', 'out of stock',
'out-of-stock', 'out-of-stock',
'produkt niedostępny', 'produkt niedostępny',
'sold out', 'sold out',
'sold-out', 'sold-out',
'temporarily out of stock', 'temporarily out of stock',
'temporarily unavailable', 'temporarily unavailable',
'tickets unavailable', 'tickets unavailable',
'tijdelijk uitverkocht', 'tijdelijk uitverkocht',
'unavailable tickets', 'unavailable tickets',
'we do not currently have an estimate of when this product will be back in stock.', 'we do not currently have an estimate of when this product will be back in stock.',
'zur zeit nicht an lager', 'we don\'t know when or if this item will be back in stock.',
'品切れ', 'zur zeit nicht an lager',
'已售完', '品切れ',
'품절' '已售完',
]; '품절'
];
function getElementBaseText(element) {
// .textContent can include text from children which may give the wrong results
// scan only immediate TEXT_NODEs, which will be a child of the element
var text = "";
for (var i = 0; i < element.childNodes.length; ++i)
if (element.childNodes[i].nodeType === Node.TEXT_NODE)
text += element.childNodes[i].textContent;
return text.toLowerCase().trim();
}
const negateOutOfStockRegexs = [ const negateOutOfStockRegexs = [
'[0-9] in stock' '[0-9] in stock'
] ]
var negateOutOfStockRegexs_r = []; var negateOutOfStockRegexs_r = [];
for (let i = 0; i < negateOutOfStockRegexs.length; i++) { for (let i = 0; i < negateOutOfStockRegexs.length; i++) {
negateOutOfStockRegexs_r.push(new RegExp(negateOutOfStockRegexs[0], 'g')); negateOutOfStockRegexs_r.push(new RegExp(negateOutOfStockRegexs[0], 'g'));
} }
// The out-of-stock or in-stock-text is generally always above-the-fold
// and often below-the-fold is a list of related products that may or may not contain trigger text
// so it's good to filter to just the 'above the fold' elements
// and it should be atleast 100px from the top to ignore items in the toolbar, sometimes menu items like "Coming soon" exist
const elementsToScan = Array.from(document.getElementsByTagName('*')).filter(element => element.getBoundingClientRect().top + window.scrollY <= window.innerHeight && element.getBoundingClientRect().top + window.scrollY >= 100);
const elementsWithZeroChildren = Array.from(document.getElementsByTagName('*')).filter(element => element.children.length === 0); var elementText = "";
// REGEXS THAT REALLY MEAN IT'S IN STOCK // REGEXS THAT REALLY MEAN IT'S IN STOCK
for (let i = elementsWithZeroChildren.length - 1; i >= 0; i--) { for (let i = elementsToScan.length - 1; i >= 0; i--) {
const element = elementsWithZeroChildren[i]; const element = elementsToScan[i];
if (element.offsetWidth > 0 || element.offsetHeight > 0 || element.getClientRects().length > 0) { elementText = "";
var elementText=""; if (element.tagName.toLowerCase() === "input") {
if (element.tagName.toLowerCase() === "input") { elementText = element.value.toLowerCase();
elementText = element.value.toLowerCase(); } else {
} else { elementText = getElementBaseText(element);
elementText = element.textContent.toLowerCase(); }
}
if (elementText.length) { if (elementText.length) {
// try which ones could mean its in stock // try which ones could mean its in stock
for (let i = 0; i < negateOutOfStockRegexs.length; i++) { for (let i = 0; i < negateOutOfStockRegexs.length; i++) {
if (negateOutOfStockRegexs_r[i].test(elementText)) { if (negateOutOfStockRegexs_r[i].test(elementText)) {
return 'Possibly in stock'; return 'Possibly in stock';
} }
}
} }
}
} }
}
// OTHER STUFF THAT COULD BE THAT IT'S OUT OF STOCK // OTHER STUFF THAT COULD BE THAT IT'S OUT OF STOCK
for (let i = elementsWithZeroChildren.length - 1; i >= 0; i--) { for (let i = elementsToScan.length - 1; i >= 0; i--) {
const element = elementsWithZeroChildren[i]; const element = elementsToScan[i];
if (element.offsetWidth > 0 || element.offsetHeight > 0 || element.getClientRects().length > 0) { if (element.offsetWidth > 0 || element.offsetHeight > 0 || element.getClientRects().length > 0) {
var elementText=""; elementText = "";
if (element.tagName.toLowerCase() === "input") { if (element.tagName.toLowerCase() === "input") {
elementText = element.value.toLowerCase(); elementText = element.value.toLowerCase();
} else { } else {
elementText = element.textContent.toLowerCase(); elementText = getElementBaseText(element);
} }
if (elementText.length) { if (elementText.length) {
// and these mean its out of stock // and these mean its out of stock
for (const outOfStockText of outOfStockTexts) { for (const outOfStockText of outOfStockTexts) {
if (elementText.includes(outOfStockText)) { if (elementText.includes(outOfStockText)) {
return elementText; // item is out of stock return outOfStockText; // item is out of stock
} }
}
}
} }
}
} }
}
return 'Possibly in stock'; // possibly in stock, cant decide otherwise. return 'Possibly in stock'; // possibly in stock, cant decide otherwise.
} }
// returns the element text that makes it think it's out of stock // returns the element text that makes it think it's out of stock
return isItemInStock(); return isItemInStock().trim()

Loading…
Cancel
Save