more tweaks

pull/2185/head
dgtlmoon 11 months ago
parent 71a46130b4
commit 491715b9b9

@ -18,6 +18,7 @@ module.exports = async ({page, context}) => {
await page.setBypassCSP(true) await page.setBypassCSP(true)
await page.setExtraHTTPHeaders(req_headers); await page.setExtraHTTPHeaders(req_headers);
var total_size = 0;
if (user_agent) { if (user_agent) {
await page.setUserAgent(user_agent); await page.setUserAgent(user_agent);
@ -42,102 +43,89 @@ module.exports = async ({page, context}) => {
height: 768, height: 768,
deviceScaleFactor: 1, deviceScaleFactor: 1,
}); });
await page.setRequestInterception(true); await page.setRequestInterception(true);
if (disk_cache_dir) { await page.setCacheEnabled(false);
console.log(">>>>>>>>>>>>>>> LOCAL DISK CACHE ENABLED <<<<<<<<<<<<<<<<<<<<<");
}
const fs = require('fs');
const crypto = require('crypto');
function file_is_expired(file_path) {
if (!fs.existsSync(file_path)) {
return true;
}
var stats = fs.statSync(file_path);
const now_date = new Date();
const expire_seconds = 300;
if ((now_date / 1000) - (stats.mtime.getTime() / 1000) > expire_seconds) {
console.log("CACHE EXPIRED: " + file_path);
return true;
}
return false;
} await page.evaluateOnNewDocument('navigator.serviceWorker.register = () => { console.warn("Service Worker registration blocked by Playwright")}');
page.on('request', async (request) => { await page.evaluateOnNewDocument(`
// General blocking of requests that waste traffic
if (block_url_list.some(substring => request.url().toLowerCase().includes(substring))) return request.abort();
if (disk_cache_dir) { const toBlob = HTMLCanvasElement.prototype.toBlob;
const url = request.url(); const toDataURL = HTMLCanvasElement.prototype.toDataURL;
const key = crypto.createHash('md5').update(url).digest("hex");
const dir_path = disk_cache_dir + key.slice(0, 1) + '/' + key.slice(1, 2) + '/' + key.slice(2, 3) + '/';
// https://stackoverflow.com/questions/4482686/check-synchronously-if-file-directory-exists-in-node-js HTMLCanvasElement.prototype.manipulate = function() {
console.warn("ma");
const {width, height} = this;
const context = this.getContext('2d');
var dt = new Date();
if (fs.existsSync(dir_path + key)) { const shift = {
console.log("* CACHE HIT , using - " + dir_path + key + " - " + url); 'r': dt.getDay()-3,
const cached_data = fs.readFileSync(dir_path + key); 'g': dt.getDay()-3,
// @todo headers can come from dir_path+key+".meta" json file 'b': dt.getDay()-3
request.respond({ };
status: 200, console.log(shift);
//contentType: 'text/html', //@todo const matt = context.getImageData(0, 0, width, height);
body: cached_data for (let i = 0; i < height; i += Math.max(1, parseInt(height / 10))) {
}); for (let j = 0; j < width; j += Math.max(1, parseInt(width / 10))) {
return; const n = ((i * (width * 4)) + (j * 4));
matt.data[n + 0] = matt.data[n + 0] + shift.r;
matt.data[n + 1] = matt.data[n + 1] + shift.g;
matt.data[n + 2] = matt.data[n + 2] + shift.b;
} }
} }
request.continue(); context.putImageData(matt, 0, 0);
}); };
if (disk_cache_dir) { Object.defineProperty(HTMLCanvasElement.prototype, 'toBlob', {
page.on('response', async (response) => { value: function() {
const url = response.url(); console.warn("toblob");
// Basic filtering for sane responses if (true) {
if (response.request().method() != 'GET' || response.request().resourceType() == 'xhr' || response.request().resourceType() == 'document' || response.status() != 200) { try {
console.log("Skipping (not useful) - Status:" + response.status() + " Method:" + response.request().method() + " ResourceType:" + response.request().resourceType() + " " + url); this.manipulate();
return;
} }
if (no_cache_list.some(substring => url.toLowerCase().includes(substring))) { catch(e) {
console.log("Skipping (no_cache_list) - " + url); console.warn('manipulation failed', e);
return;
} }
if (url.toLowerCase().includes('data:')) {
console.log("Skipping (embedded-data) - " + url);
return;
} }
response.buffer().then(buffer => { return toBlob.apply(this, arguments);
if (buffer.length > 100) {
console.log("Cache - Saving " + response.request().method() + " - " + url + " - " + response.request().resourceType());
const key = crypto.createHash('md5').update(url).digest("hex");
const dir_path = disk_cache_dir + key.slice(0, 1) + '/' + key.slice(1, 2) + '/' + key.slice(2, 3) + '/';
if (!fs.existsSync(dir_path)) {
fs.mkdirSync(dir_path, {recursive: true})
} }
});
if (fs.existsSync(dir_path + key)) { Object.defineProperty(HTMLCanvasElement.prototype, 'toDataURL', {
if (file_is_expired(dir_path + key)) { value: function() {
fs.writeFileSync(dir_path + key, buffer); console.warn("todata");
if (true) {
try {
this.manipulate();
} }
} else { catch(e) {
fs.writeFileSync(dir_path + key, buffer); console.warn('manipulation failed', e);
} }
} }
}); return toDataURL.apply(this, arguments);
});
} }
});
const r = await page.goto(url, { Object.defineProperty(navigator, 'webdriver', {get: () => false});
waitUntil: 'load' `)
await page.emulateTimezone('America/Chicago');
var r = await page.goto(url, {
waitUntil: 'load', timeout: 0
}); });
await page.waitForTimeout(1000); // https://github.com/puppeteer/puppeteer/issues/2479#issuecomment-408263504
if (r === null) {
r = await page.waitForResponse(() => true);
}
await page.waitForTimeout(4000);
await page.waitForTimeout(extra_wait_ms); await page.waitForTimeout(extra_wait_ms);
if (execute_js) { if (execute_js) {
await page.evaluate(execute_js); await page.evaluate(execute_js);
await page.waitForTimeout(200); await page.waitForTimeout(200);
@ -176,6 +164,8 @@ module.exports = async ({page, context}) => {
} }
var html = await page.content(); var html = await page.content();
page.close();
return { return {
data: { data: {
'content': html, 'content': html,
@ -183,7 +173,8 @@ module.exports = async ({page, context}) => {
'instock_data': instock_data, 'instock_data': instock_data,
'screenshot': b64s, 'screenshot': b64s,
'status_code': r.status(), 'status_code': r.status(),
'xpath_data': xpath_data 'xpath_data': xpath_data,
'total_size': total_size
}, },
type: 'application/json', type: 'application/json',
}; };

@ -57,14 +57,17 @@ function isItemInStock() {
'tickets unavailable', 'tickets unavailable',
'tijdelijk uitverkocht', 'tijdelijk uitverkocht',
'unavailable tickets', 'unavailable tickets',
'vorbestellung ist bald möglich',
'we do not currently have an estimate of when this product will be back in stock.', 'we do not currently have an estimate of when this product will be back in stock.',
'we don\'t know when or if this item will be back in stock.', 'we don\'t know when or if this item will be back in stock.',
'zur zeit nicht an lager', 'zur zeit nicht an lager',
'品切れ', '品切れ',
'已售完', '已售完',
'已售',
'품절' '품절'
]; ];
const vh = Math.max(document.documentElement.clientHeight || 0, window.innerHeight || 0); const vh = Math.max(document.documentElement.clientHeight || 0, window.innerHeight || 0);
function getElementBaseText(element) { function getElementBaseText(element) {
@ -77,7 +80,7 @@ function isItemInStock() {
return text.toLowerCase().trim(); return text.toLowerCase().trim();
} }
const negateOutOfStockRegex = new RegExp('([0-9] in stock|add to cart)', 'ig'); const negateOutOfStockRegex = new RegExp('^([0-9] in stock|add to cart|in stock)', 'ig');
// The out-of-stock or in-stock-text is generally always above-the-fold // The out-of-stock or in-stock-text is generally always above-the-fold
// and often below-the-fold is a list of related products that may or may not contain trigger text // and often below-the-fold is a list of related products that may or may not contain trigger text
@ -93,18 +96,6 @@ function isItemInStock() {
function collectVisibleElements(parent, visibleElements) { function collectVisibleElements(parent, visibleElements) {
if (!parent) return; // Base case: if parent is null or undefined, return if (!parent) return; // Base case: if parent is null or undefined, return
// Check if the parent itself is visible
const parentComputedStyle = window.getComputedStyle(parent);
if (
parentComputedStyle.display === 'none' ||
parentComputedStyle.visibility === 'hidden' ||
parent.offsetWidth <= 0 ||
parent.offsetHeight <= 0 ||
parentComputedStyle.contentVisibility === 'hidden'
) {
return; // If parent is not visible, stop iteration
}
// Add the parent itself to the visible elements array if it's of the specified types // Add the parent itself to the visible elements array if it's of the specified types
visibleElements.push(parent); visibleElements.push(parent);
@ -143,7 +134,7 @@ function isItemInStock() {
elementText = ""; elementText = "";
if (element.tagName.toLowerCase() === "input") { if (element.tagName.toLowerCase() === "input") {
elementText = element.value.toLowerCase(); elementText = element.value.toLowerCase().trim();
} else { } else {
elementText = getElementBaseText(element); elementText = getElementBaseText(element);
} }
@ -167,7 +158,7 @@ function isItemInStock() {
} }
elementText = ""; elementText = "";
if (element.tagName.toLowerCase() === "input") { if (element.tagName.toLowerCase() === "input") {
elementText = element.value.toLowerCase(); elementText = element.value.toLowerCase().trim();
} else { } else {
elementText = getElementBaseText(element); elementText = getElementBaseText(element);
} }
@ -176,7 +167,7 @@ function isItemInStock() {
// and these mean its out of stock // and these mean its out of stock
for (const outOfStockText of outOfStockTexts) { for (const outOfStockText of outOfStockTexts) {
if (elementText.includes(outOfStockText)) { if (elementText.includes(outOfStockText)) {
console.log(`Selected 'Out of Stock' - found text "${outOfStockText}"`) console.log(`Selected 'Out of Stock' - found text "${outOfStockText}" - "${elementText}"`)
return outOfStockText; // item is out of stock return outOfStockText; // item is out of stock
} }
} }
@ -190,3 +181,4 @@ function isItemInStock() {
// returns the element text that makes it think it's out of stock // returns the element text that makes it think it's out of stock
return isItemInStock().trim() return isItemInStock().trim()

@ -91,17 +91,6 @@ console.log("Scanning %ELEMENTS%");
function collectVisibleElements(parent, visibleElements) { function collectVisibleElements(parent, visibleElements) {
if (!parent) return; // Base case: if parent is null or undefined, return if (!parent) return; // Base case: if parent is null or undefined, return
// Check if the parent itself is visible
const parentComputedStyle = window.getComputedStyle(parent);
if (
parentComputedStyle.display === 'none' ||
parentComputedStyle.visibility === 'hidden' ||
parent.offsetWidth <= 0 ||
parent.offsetHeight <= 0 ||
parentComputedStyle.contentVisibility === 'hidden'
) {
return; // If parent is not visible, stop iteration
}
// Add the parent itself to the visible elements array if it's of the specified types // Add the parent itself to the visible elements array if it's of the specified types
const tagName = parent.tagName.toLowerCase(); const tagName = parent.tagName.toLowerCase();
@ -184,7 +173,7 @@ visibleElementsArray.forEach(function (element) {
top: Math.floor(bbox['top']) + scroll_y, top: Math.floor(bbox['top']) + scroll_y,
tagName: (element.tagName) ? element.tagName.toLowerCase() : '', tagName: (element.tagName) ? element.tagName.toLowerCase() : '',
tagtype: (element.tagName.toLowerCase() === 'input' && element.type) ? element.type.toLowerCase() : '', tagtype: (element.tagName.toLowerCase() === 'input' && element.type) ? element.type.toLowerCase() : '',
isClickable: false isClickable: window.getComputedStyle(element).cursor == "pointer"
}); });
}); });

Loading…
Cancel
Save