@ -16,24 +16,23 @@ try {
}
}
// Include the getXpath script directly, easier than fetching
// Include the getXpath script directly, easier than fetching
function getxpath ( e ) {
function getxpath ( e ) {
var n = e ;
var n = e ;
if ( n && n . id ) return '//*[@id="' + n . id + '"]' ;
if ( n && n . id ) return '//*[@id="' + n . id + '"]' ;
for ( var o = [ ] ; n && Node . ELEMENT _NODE === n . nodeType ; ) {
for ( var o = [ ] ; n && Node . ELEMENT _NODE === n . nodeType ; ) {
for ( var i = 0 , r = ! 1 , d = n . previousSibling ; d ; ) d . nodeType !== Node . DOCUMENT _TYPE _NODE && d . nodeName === n . nodeName && i ++ , d = d . previousSibling ;
for ( var i = 0 , r = ! 1 , d = n . previousSibling ; d ; ) d . nodeType !== Node . DOCUMENT _TYPE _NODE && d . nodeName === n . nodeName && i ++ , d = d . previousSibling ;
for ( d = n . nextSibling ; d ; ) {
for ( d = n . nextSibling ; d ; ) {
if ( d . nodeName === n . nodeName ) {
if ( d . nodeName === n . nodeName ) {
r = ! 0 ;
r = ! 0 ;
break
break
}
d = d . nextSibling
}
}
o. push ( ( n . prefix ? n . prefix + ":" : "" ) + n . localName + ( i || r ? "[" + ( i + 1 ) + "]" : "" ) ) , n = n . parentNode
d = d . nextSibling
}
}
return o . length ? "/" + o . reverse ( ) . join ( "/" ) : ""
o . push ( ( n . prefix ? n . prefix + ":" : "" ) + n . localName + ( i || r ? "[" + ( i + 1 ) + "]" : "" ) ) , n = n . parentNode
}
}
return o . length ? "/" + o . reverse ( ) . join ( "/" ) : ""
}
const findUpTag = ( el ) => {
const findUpTag = ( el ) => {
let r = el
let r = el
@ -59,14 +58,14 @@ const findUpTag = (el) => {
// Strategy 2: Keep going up until we hit an ID tag, imagine it's like #list-widget div h4
// Strategy 2: Keep going up until we hit an ID tag, imagine it's like #list-widget div h4
while ( r . parentNode ) {
while ( r . parentNode ) {
if ( depth == 5 ) {
if ( depth == = 5 ) {
break ;
break ;
}
}
if ( '' !== r . id ) {
if ( '' !== r . id ) {
chained _css . unshift ( "#" + CSS . escape ( r . id ) ) ;
chained _css . unshift ( "#" + CSS . escape ( r . id ) ) ;
final _selector = chained _css . join ( ' > ' ) ;
final _selector = chained _css . join ( ' > ' ) ;
// Be sure theres only one, some sites have multiples of the same ID tag :-(
// Be sure theres only one, some sites have multiples of the same ID tag :-(
if ( window . document . querySelectorAll ( final _selector ) . length == 1 ) {
if ( window . document . querySelectorAll ( final _selector ) . length == = 1 ) {
return final _selector ;
return final _selector ;
}
}
return null ;
return null ;
@ -82,30 +81,60 @@ const findUpTag = (el) => {
// @todo - if it's SVG or IMG, go into image diff mode
// @todo - if it's SVG or IMG, go into image diff mode
// %ELEMENTS% replaced at injection time because different interfaces use it with different settings
// %ELEMENTS% replaced at injection time because different interfaces use it with different settings
var elements = window . document . querySelectorAll ( "%ELEMENTS%" ) ;
var size _pos = [ ] ;
var size _pos = [ ] ;
// after page fetch, inject this JS
// after page fetch, inject this JS
// build a map of all elements and their positions (maybe that only include text?)
// build a map of all elements and their positions (maybe that only include text?)
var bbox ;
var bbox ;
for ( var i = 0 ; i < elements . length ; i ++ ) {
console . log ( "Scanning %ELEMENTS%" ) ;
bbox = elements [ i ] . getBoundingClientRect ( ) ;
function collectVisibleElements ( parent , visibleElements ) {
if ( ! parent ) return ; // Base case: if parent is null or undefined, return
// Exclude items that are not interactable or visible
if ( elements [ i ] . style . opacity === "0" ) {
// Add the parent itself to the visible elements array if it's of the specified types
continue
const tagName = parent . tagName . toLowerCase ( ) ;
if ( "%ELEMENTS%" . split ( ',' ) . includes ( tagName ) ) {
visibleElements . push ( parent ) ;
}
}
if ( elements [ i ] . style . display === "none" || elements [ i ] . style . pointerEvents === "none" ) {
continue
// Iterate over the parent's children
const children = parent . children ;
for ( let i = 0 ; i < children . length ; i ++ ) {
const child = children [ i ] ;
if (
child . nodeType === Node . ELEMENT _NODE &&
window . getComputedStyle ( child ) . display !== 'none' &&
window . getComputedStyle ( child ) . visibility !== 'hidden' &&
child . offsetWidth >= 0 &&
child . offsetHeight >= 0 &&
window . getComputedStyle ( child ) . contentVisibility !== 'hidden'
) {
// If the child is an element and is visible, recursively collect visible elements
collectVisibleElements ( child , visibleElements ) ;
}
}
}
}
// Create an array to hold the visible elements
const visibleElementsArray = [ ] ;
// Call collectVisibleElements with the starting parent element
collectVisibleElements ( document . body , visibleElementsArray ) ;
visibleElementsArray . forEach ( function ( element ) {
bbox = element . getBoundingClientRect ( ) ;
// Skip really small ones, and where width or height ==0
// Skip really small ones, and where width or height ==0
if ( bbox [ 'width' ] * bbox [ 'height' ] < 100 ) {
if ( bbox [ 'width' ] * bbox [ 'height' ] < 10 ) {
continue ;
return
}
}
// Don't include elements that are offset from canvas
// Don't include elements that are offset from canvas
if ( bbox [ 'top' ] + scroll _y < 0 || bbox [ 'left' ] < 0 ) {
if ( bbox [ 'top' ] + scroll _y < 0 || bbox [ 'left' ] < 0 ) {
continue ;
return
}
}
// @todo the getXpath kind of sucks, it doesnt know when there is for example just one ID sometimes
// @todo the getXpath kind of sucks, it doesnt know when there is for example just one ID sometimes
@ -114,46 +143,41 @@ for (var i = 0; i < elements.length; i++) {
// 1st primitive - if it has class, try joining it all and select, if theres only one.. well thats us.
// 1st primitive - if it has class, try joining it all and select, if theres only one.. well thats us.
xpath _result = false ;
xpath _result = false ;
try {
try {
var d = findUpTag ( element s[ i ] ) ;
var d = findUpTag ( element ) ;
if ( d ) {
if ( d ) {
xpath _result = d ;
xpath _result = d ;
}
}
} catch ( e ) {
} catch ( e ) {
console . log ( e ) ;
console . log ( e ) ;
}
}
// You could swap it and default to getXpath and then try the smarter one
// You could swap it and default to getXpath and then try the smarter one
// default back to the less intelligent one
// default back to the less intelligent one
if ( ! xpath _result ) {
if ( ! xpath _result ) {
try {
try {
// I've seen on FB and eBay that this doesnt work
// I've seen on FB and eBay that this doesnt work
// ReferenceError: getXPath is not defined at eval (eval at evaluate (:152:29), <anonymous>:67:20) at UtilityScript.evaluate (<anonymous>:159:18) at UtilityScript.<anonymous> (<anonymous>:1:44)
// ReferenceError: getXPath is not defined at eval (eval at evaluate (:152:29), <anonymous>:67:20) at UtilityScript.evaluate (<anonymous>:159:18) at UtilityScript.<anonymous> (<anonymous>:1:44)
xpath _result = getxpath ( element s[ i ] ) ;
xpath _result = getxpath ( element ) ;
} catch ( e ) {
} catch ( e ) {
console . log ( e ) ;
console . log ( e ) ;
continue;
return
}
}
}
}
if ( window . getComputedStyle ( elements [ i ] ) . visibility === "hidden" ) {
continue ;
}
// @todo Possible to ONLY list where it's clickable to save JSON xfer size
size _pos . push ( {
size _pos . push ( {
xpath : xpath _result ,
xpath : xpath _result ,
width : Math . round ( bbox [ 'width' ] ) ,
width : Math . round ( bbox [ 'width' ] ) ,
height : Math . round ( bbox [ 'height' ] ) ,
height : Math . round ( bbox [ 'height' ] ) ,
left : Math . floor ( bbox [ 'left' ] ) ,
left : Math . floor ( bbox [ 'left' ] ) ,
top : Math . floor ( bbox [ 'top' ] ) + scroll _y ,
top : Math . floor ( bbox [ 'top' ] ) + scroll _y ,
tagName : ( element s[ i ] . tagName ) ? element s[ i ] . tagName . toLowerCase ( ) : '' ,
tagName : ( element . tagName ) ? element . tagName . toLowerCase ( ) : '' ,
tagtype : ( element s[ i ] . tagName == 'INPUT' && elements [ i ] . type ) ? element s[ i ] . type . toLowerCase ( ) : '' ,
tagtype : ( element . tagName . toLowerCase ( ) === 'input' && element . type ) ? element . type . toLowerCase ( ) : '' ,
isClickable : ( elements [ i ] . onclick ) || window . getComputedStyle ( element s[ i ] ) . cursor == "pointer"
isClickable : window . getComputedStyle ( element ) . cursor == "pointer"
} ) ;
} ) ;
}
} ) ;
// Inject the current one set in the include_filters, which may be a CSS rule
// Inject the current one set in the include_filters, which may be a CSS rule
// used for displaying the current one in VisualSelector, where its not one we generated.
// used for displaying the current one in VisualSelector, where its not one we generated.
@ -180,7 +204,7 @@ if (include_filters.length) {
}
}
} catch ( e ) {
} catch ( e ) {
// Maybe catch DOMException and alert?
// Maybe catch DOMException and alert?
console . log ( "xpath_element_scraper: Exception selecting element from filter " + f ) ;
console . log ( "xpath_element_scraper: Exception selecting element from filter " + f ) ;
console . log ( e ) ;
console . log ( e ) ;
}
}
@ -211,7 +235,7 @@ if (include_filters.length) {
}
}
}
}
if ( ! q ) {
if ( ! q ) {
console . log ( "xpath_element_scraper: filter element " + f + " was not found" ) ;
console . log ( "xpath_element_scraper: filter element " + f + " was not found" ) ;
}
}
@ -221,7 +245,7 @@ if (include_filters.length) {
width : parseInt ( bbox [ 'width' ] ) ,
width : parseInt ( bbox [ 'width' ] ) ,
height : parseInt ( bbox [ 'height' ] ) ,
height : parseInt ( bbox [ 'height' ] ) ,
left : parseInt ( bbox [ 'left' ] ) ,
left : parseInt ( bbox [ 'left' ] ) ,
top : parseInt ( bbox [ 'top' ] ) + scroll _y
top : parseInt ( bbox [ 'top' ] ) + scroll _y
} ) ;
} ) ;
}
}
}
}
@ -229,7 +253,7 @@ if (include_filters.length) {
// Sort the elements so we find the smallest one first, in other words, we find the smallest one matching in that area
// Sort the elements so we find the smallest one first, in other words, we find the smallest one matching in that area
// so that we dont select the wrapping element by mistake and be unable to select what we want
// so that we dont select the wrapping element by mistake and be unable to select what we want
size _pos . sort ( ( a , b ) => ( a . width * a . height > b . width * b . height ) ? 1 : - 1 )
size _pos . sort ( ( a , b ) => ( a . width * a . height > b . width * b . height ) ? 1 : - 1 )
// Window.width required for proper scaling in the frontend
// Window.width required for proper scaling in the frontend
return { 'size_pos' : size _pos , 'browser_width' : window . innerWidth } ;
return { 'size_pos' : size _pos , 'browser_width' : window . innerWidth } ;