more tweaks

pull/2185/head
dgtlmoon 11 months ago
parent 71a46130b4
commit 491715b9b9

@ -18,6 +18,7 @@ module.exports = async ({page, context}) => {
await page.setBypassCSP(true)
await page.setExtraHTTPHeaders(req_headers);
var total_size = 0;
if (user_agent) {
await page.setUserAgent(user_agent);
@ -42,102 +43,89 @@ module.exports = async ({page, context}) => {
height: 768,
deviceScaleFactor: 1,
});
await page.setRequestInterception(true);
if (disk_cache_dir) {
console.log(">>>>>>>>>>>>>>> LOCAL DISK CACHE ENABLED <<<<<<<<<<<<<<<<<<<<<");
await page.setCacheEnabled(false);
await page.evaluateOnNewDocument('navigator.serviceWorker.register = () => { console.warn("Service Worker registration blocked by Playwright")}');
await page.evaluateOnNewDocument(`
const toBlob = HTMLCanvasElement.prototype.toBlob;
const toDataURL = HTMLCanvasElement.prototype.toDataURL;
HTMLCanvasElement.prototype.manipulate = function() {
console.warn("ma");
const {width, height} = this;
const context = this.getContext('2d');
var dt = new Date();
const shift = {
'r': dt.getDay()-3,
'g': dt.getDay()-3,
'b': dt.getDay()-3
};
console.log(shift);
const matt = context.getImageData(0, 0, width, height);
for (let i = 0; i < height; i += Math.max(1, parseInt(height / 10))) {
for (let j = 0; j < width; j += Math.max(1, parseInt(width / 10))) {
const n = ((i * (width * 4)) + (j * 4));
matt.data[n + 0] = matt.data[n + 0] + shift.r;
matt.data[n + 1] = matt.data[n + 1] + shift.g;
matt.data[n + 2] = matt.data[n + 2] + shift.b;
}
}
const fs = require('fs');
const crypto = require('crypto');
context.putImageData(matt, 0, 0);
};
function file_is_expired(file_path) {
if (!fs.existsSync(file_path)) {
return true;
Object.defineProperty(HTMLCanvasElement.prototype, 'toBlob', {
value: function() {
console.warn("toblob");
if (true) {
try {
this.manipulate();
}
var stats = fs.statSync(file_path);
const now_date = new Date();
const expire_seconds = 300;
if ((now_date / 1000) - (stats.mtime.getTime() / 1000) > expire_seconds) {
console.log("CACHE EXPIRED: " + file_path);
return true;
catch(e) {
console.warn('manipulation failed', e);
}
return false;
}
return toBlob.apply(this, arguments);
}
page.on('request', async (request) => {
// General blocking of requests that waste traffic
if (block_url_list.some(substring => request.url().toLowerCase().includes(substring))) return request.abort();
if (disk_cache_dir) {
const url = request.url();
const key = crypto.createHash('md5').update(url).digest("hex");
const dir_path = disk_cache_dir + key.slice(0, 1) + '/' + key.slice(1, 2) + '/' + key.slice(2, 3) + '/';
// https://stackoverflow.com/questions/4482686/check-synchronously-if-file-directory-exists-in-node-js
if (fs.existsSync(dir_path + key)) {
console.log("* CACHE HIT , using - " + dir_path + key + " - " + url);
const cached_data = fs.readFileSync(dir_path + key);
// @todo headers can come from dir_path+key+".meta" json file
request.respond({
status: 200,
//contentType: 'text/html', //@todo
body: cached_data
});
return;
}
});
Object.defineProperty(HTMLCanvasElement.prototype, 'toDataURL', {
value: function() {
console.warn("todata");
if (true) {
try {
this.manipulate();
}
request.continue();
});
catch(e) {
console.warn('manipulation failed', e);
}
}
return toDataURL.apply(this, arguments);
}
});
if (disk_cache_dir) {
page.on('response', async (response) => {
const url = response.url();
// Basic filtering for sane responses
if (response.request().method() != 'GET' || response.request().resourceType() == 'xhr' || response.request().resourceType() == 'document' || response.status() != 200) {
console.log("Skipping (not useful) - Status:" + response.status() + " Method:" + response.request().method() + " ResourceType:" + response.request().resourceType() + " " + url);
return;
}
if (no_cache_list.some(substring => url.toLowerCase().includes(substring))) {
console.log("Skipping (no_cache_list) - " + url);
return;
}
if (url.toLowerCase().includes('data:')) {
console.log("Skipping (embedded-data) - " + url);
return;
}
response.buffer().then(buffer => {
if (buffer.length > 100) {
console.log("Cache - Saving " + response.request().method() + " - " + url + " - " + response.request().resourceType());
const key = crypto.createHash('md5').update(url).digest("hex");
const dir_path = disk_cache_dir + key.slice(0, 1) + '/' + key.slice(1, 2) + '/' + key.slice(2, 3) + '/';
if (!fs.existsSync(dir_path)) {
fs.mkdirSync(dir_path, {recursive: true})
}
if (fs.existsSync(dir_path + key)) {
if (file_is_expired(dir_path + key)) {
fs.writeFileSync(dir_path + key, buffer);
}
} else {
fs.writeFileSync(dir_path + key, buffer);
}
}
});
});
}
Object.defineProperty(navigator, 'webdriver', {get: () => false});
`)
await page.emulateTimezone('America/Chicago');
const r = await page.goto(url, {
waitUntil: 'load'
var r = await page.goto(url, {
waitUntil: 'load', timeout: 0
});
await page.waitForTimeout(1000);
// https://github.com/puppeteer/puppeteer/issues/2479#issuecomment-408263504
if (r === null) {
r = await page.waitForResponse(() => true);
}
await page.waitForTimeout(4000);
await page.waitForTimeout(extra_wait_ms);
if (execute_js) {
await page.evaluate(execute_js);
await page.waitForTimeout(200);
@ -176,6 +164,8 @@ module.exports = async ({page, context}) => {
}
var html = await page.content();
page.close();
return {
data: {
'content': html,
@ -183,8 +173,9 @@ module.exports = async ({page, context}) => {
'instock_data': instock_data,
'screenshot': b64s,
'status_code': r.status(),
'xpath_data': xpath_data
'xpath_data': xpath_data,
'total_size': total_size
},
type: 'application/json',
};
};
};

@ -57,14 +57,17 @@ function isItemInStock() {
'tickets unavailable',
'tijdelijk uitverkocht',
'unavailable tickets',
'vorbestellung ist bald möglich',
'we do not currently have an estimate of when this product will be back in stock.',
'we don\'t know when or if this item will be back in stock.',
'zur zeit nicht an lager',
'品切れ',
'已售完',
'已售',
'품절'
];
const vh = Math.max(document.documentElement.clientHeight || 0, window.innerHeight || 0);
function getElementBaseText(element) {
@ -77,7 +80,7 @@ function isItemInStock() {
return text.toLowerCase().trim();
}
const negateOutOfStockRegex = new RegExp('([0-9] in stock|add to cart)', 'ig');
const negateOutOfStockRegex = new RegExp('^([0-9] in stock|add to cart|in stock)', 'ig');
// The out-of-stock or in-stock-text is generally always above-the-fold
// and often below-the-fold is a list of related products that may or may not contain trigger text
@ -93,18 +96,6 @@ function isItemInStock() {
function collectVisibleElements(parent, visibleElements) {
if (!parent) return; // Base case: if parent is null or undefined, return
// Check if the parent itself is visible
const parentComputedStyle = window.getComputedStyle(parent);
if (
parentComputedStyle.display === 'none' ||
parentComputedStyle.visibility === 'hidden' ||
parent.offsetWidth <= 0 ||
parent.offsetHeight <= 0 ||
parentComputedStyle.contentVisibility === 'hidden'
) {
return; // If parent is not visible, stop iteration
}
// Add the parent itself to the visible elements array if it's of the specified types
visibleElements.push(parent);
@ -143,7 +134,7 @@ function isItemInStock() {
elementText = "";
if (element.tagName.toLowerCase() === "input") {
elementText = element.value.toLowerCase();
elementText = element.value.toLowerCase().trim();
} else {
elementText = getElementBaseText(element);
}
@ -167,7 +158,7 @@ function isItemInStock() {
}
elementText = "";
if (element.tagName.toLowerCase() === "input") {
elementText = element.value.toLowerCase();
elementText = element.value.toLowerCase().trim();
} else {
elementText = getElementBaseText(element);
}
@ -176,7 +167,7 @@ function isItemInStock() {
// and these mean its out of stock
for (const outOfStockText of outOfStockTexts) {
if (elementText.includes(outOfStockText)) {
console.log(`Selected 'Out of Stock' - found text "${outOfStockText}"`)
console.log(`Selected 'Out of Stock' - found text "${outOfStockText}" - "${elementText}"`)
return outOfStockText; // item is out of stock
}
}
@ -190,3 +181,4 @@ function isItemInStock() {
// returns the element text that makes it think it's out of stock
return isItemInStock().trim()

@ -91,17 +91,6 @@ console.log("Scanning %ELEMENTS%");
function collectVisibleElements(parent, visibleElements) {
if (!parent) return; // Base case: if parent is null or undefined, return
// Check if the parent itself is visible
const parentComputedStyle = window.getComputedStyle(parent);
if (
parentComputedStyle.display === 'none' ||
parentComputedStyle.visibility === 'hidden' ||
parent.offsetWidth <= 0 ||
parent.offsetHeight <= 0 ||
parentComputedStyle.contentVisibility === 'hidden'
) {
return; // If parent is not visible, stop iteration
}
// Add the parent itself to the visible elements array if it's of the specified types
const tagName = parent.tagName.toLowerCase();
@ -184,7 +173,7 @@ visibleElementsArray.forEach(function (element) {
top: Math.floor(bbox['top']) + scroll_y,
tagName: (element.tagName) ? element.tagName.toLowerCase() : '',
tagtype: (element.tagName.toLowerCase() === 'input' && element.type) ? element.type.toLowerCase() : '',
isClickable: false
isClickable: window.getComputedStyle(element).cursor == "pointer"
});
});

Loading…
Cancel
Save