VisualSelector & BrowserSteps - Scraper improvements, remove duplicate code

export-regex-csv
dgtlmoon 2 years ago
parent 326b7aacbb
commit 69756f20f2

@ -257,12 +257,10 @@ class browsersteps_live_ui(steppable_browser_interface):
self.page.evaluate("var include_filters=''")
from pkg_resources import resource_string
# The code that scrapes elements and makes a list of elements/size/position to click on in the VisualSelector
# @todo dont duplicate these selectors, or just let them both use the same data?
xpath_element_js = resource_string(__name__, "../../res/xpath_element_scraper.js").decode('utf-8')
xpath_element_js = xpath_element_js.replace('%ELEMENTS%',
'div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4, header, footer, section, article, aside, details, main, nav, section')
from changedetectionio.content_fetcher import visualselector_xpath_selectors
xpath_element_js = xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors)
xpath_data = self.page.evaluate("async () => {" + xpath_element_js + "}")
screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72)))
return (screenshot, xpath_data)

@ -7,6 +7,8 @@ import requests
import sys
import time
visualselector_xpath_selectors = 'div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4, header, footer, section, article, aside, details, main, nav, section, summary'
class Non200ErrorCodeReceived(Exception):
def __init__(self, status_code, url, screenshot=None, xpath_data=None, page_html=None):
# Set this so we can use it in other parts of the app
@ -367,7 +369,7 @@ class base_html_playwright(Fetcher):
else:
self.page.evaluate("var include_filters=''")
self.xpath_data = self.page.evaluate("async () => {" + self.xpath_element_js.replace('%ELEMENTS%', 'div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4, header, footer, section, article, aside, details, main, nav, section, summary') + "}")
self.xpath_data = self.page.evaluate("async () => {" + self.xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors) + "}")
# Bug 3 in Playwright screenshot handling
# Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it

@ -116,7 +116,7 @@ for (var i = 0; i < elements.length; i++) {
left: Math.floor(bbox['left']),
top: Math.floor(bbox['top']),
tagName: (elements[i].tagName) ? elements[i].tagName.toLowerCase() : '',
tagtype: (elements[i].type) ? elements[i].type.toLowerCase() : ''
tagtype: (elements[i].tagName == 'INPUT' && elements[i].type) ? elements[i].type.toLowerCase() : ''
});
}

Loading…
Cancel
Save