From 69756f20f218f56d5ddc5fbf4ec2ee7135d9a540 Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Fri, 25 Nov 2022 10:45:38 +0100 Subject: [PATCH] VisualSelector & BrowserSteps - Scraper improvements, remove duplicate code --- changedetectionio/blueprint/browser_steps/browser_steps.py | 6 ++---- changedetectionio/content_fetcher.py | 4 +++- changedetectionio/res/xpath_element_scraper.js | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/changedetectionio/blueprint/browser_steps/browser_steps.py b/changedetectionio/blueprint/browser_steps/browser_steps.py index 1207d192..b6a7af08 100644 --- a/changedetectionio/blueprint/browser_steps/browser_steps.py +++ b/changedetectionio/blueprint/browser_steps/browser_steps.py @@ -257,12 +257,10 @@ class browsersteps_live_ui(steppable_browser_interface): self.page.evaluate("var include_filters=''") from pkg_resources import resource_string # The code that scrapes elements and makes a list of elements/size/position to click on in the VisualSelector - # @todo dont duplicate these selectors, or just let them both use the same data? xpath_element_js = resource_string(__name__, "../../res/xpath_element_scraper.js").decode('utf-8') - xpath_element_js = xpath_element_js.replace('%ELEMENTS%', - 'div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4, header, footer, section, article, aside, details, main, nav, section') + from changedetectionio.content_fetcher import visualselector_xpath_selectors + xpath_element_js = xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors) xpath_data = self.page.evaluate("async () => {" + xpath_element_js + "}") - screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72))) return (screenshot, xpath_data) diff --git a/changedetectionio/content_fetcher.py b/changedetectionio/content_fetcher.py index 0d956049..1f86cdd0 100644 --- a/changedetectionio/content_fetcher.py +++ b/changedetectionio/content_fetcher.py @@ -7,6 +7,8 @@ import requests import sys import time +visualselector_xpath_selectors = 'div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4, header, footer, section, article, aside, details, main, nav, section, summary' + class Non200ErrorCodeReceived(Exception): def __init__(self, status_code, url, screenshot=None, xpath_data=None, page_html=None): # Set this so we can use it in other parts of the app @@ -367,7 +369,7 @@ class base_html_playwright(Fetcher): else: self.page.evaluate("var include_filters=''") - self.xpath_data = self.page.evaluate("async () => {" + self.xpath_element_js.replace('%ELEMENTS%', 'div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4, header, footer, section, article, aside, details, main, nav, section, summary') + "}") + self.xpath_data = self.page.evaluate("async () => {" + self.xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors) + "}") # Bug 3 in Playwright screenshot handling # Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it diff --git a/changedetectionio/res/xpath_element_scraper.js b/changedetectionio/res/xpath_element_scraper.js index 92641804..e1acc2cf 100644 --- a/changedetectionio/res/xpath_element_scraper.js +++ b/changedetectionio/res/xpath_element_scraper.js @@ -116,7 +116,7 @@ for (var i = 0; i < elements.length; i++) { left: Math.floor(bbox['left']), top: Math.floor(bbox['top']), tagName: (elements[i].tagName) ? elements[i].tagName.toLowerCase() : '', - tagtype: (elements[i].type) ? elements[i].type.toLowerCase() : '' + tagtype: (elements[i].tagName == 'INPUT' && elements[i].type) ? elements[i].type.toLowerCase() : '' }); }