Price detection/scraping - Adding extra element training data (#2582)

pull/2602/head
dgtlmoon 5 months ago committed by GitHub
parent 6dd1fa2b88
commit 7071df061a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -164,6 +164,15 @@ visibleElementsArray.forEach(function (element) {
} }
} }
let label = "not-interesting" // A placeholder, the actual labels for training are done by hand for now
let text = element.textContent.trim().slice(0, 30).trim();
while (/\n{2,}|\t{2,}/.test(text)) {
text = text.replace(/\n{2,}/g, '\n').replace(/\t{2,}/g, '\t')
}
// Try to identify any possible currency amounts "Sale: 4000" or "Sale now 3000 Kc", can help with the training.
const hasDigitCurrency = (/\d/.test(text.slice(0, 6)) || /\d/.test(text.slice(-6)) ) && /([€£$¥₩₹]|USD|AUD|EUR|Kč|kr|SEK|,)/.test(text) ;
size_pos.push({ size_pos.push({
xpath: xpath_result, xpath: xpath_result,
@ -171,9 +180,16 @@ visibleElementsArray.forEach(function (element) {
height: Math.round(bbox['height']), height: Math.round(bbox['height']),
left: Math.floor(bbox['left']), left: Math.floor(bbox['left']),
top: Math.floor(bbox['top']) + scroll_y, top: Math.floor(bbox['top']) + scroll_y,
// tagName used by Browser Steps
tagName: (element.tagName) ? element.tagName.toLowerCase() : '', tagName: (element.tagName) ? element.tagName.toLowerCase() : '',
// tagtype used by Browser Steps
tagtype: (element.tagName.toLowerCase() === 'input' && element.type) ? element.type.toLowerCase() : '', tagtype: (element.tagName.toLowerCase() === 'input' && element.type) ? element.type.toLowerCase() : '',
isClickable: window.getComputedStyle(element).cursor == "pointer" isClickable: window.getComputedStyle(element).cursor === "pointer",
// Used by the keras trainer
fontSize: window.getComputedStyle(element).getPropertyValue('font-size'),
fontWeight: window.getComputedStyle(element).getPropertyValue('font-weight'),
hasDigitCurrency: hasDigitCurrency,
label: label,
}); });
}); });

Loading…
Cancel
Save