From 69756f20f218f56d5ddc5fbf4ec2ee7135d9a540 Mon Sep 17 00:00:00 2001
From: dgtlmoon <dgtlmoon@gmail.com>
Date: Fri, 25 Nov 2022 10:45:38 +0100
Subject: [PATCH] VisualSelector & BrowserSteps - Scraper improvements, remove
 duplicate code

---
 changedetectionio/blueprint/browser_steps/browser_steps.py | 6 ++----
 changedetectionio/content_fetcher.py                       | 4 +++-
 changedetectionio/res/xpath_element_scraper.js             | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/changedetectionio/blueprint/browser_steps/browser_steps.py b/changedetectionio/blueprint/browser_steps/browser_steps.py
index 1207d192..b6a7af08 100644
--- a/changedetectionio/blueprint/browser_steps/browser_steps.py
+++ b/changedetectionio/blueprint/browser_steps/browser_steps.py
@@ -257,12 +257,10 @@ class browsersteps_live_ui(steppable_browser_interface):
         self.page.evaluate("var include_filters=''")
         from pkg_resources import resource_string
         # The code that scrapes elements and makes a list of elements/size/position to click on in the VisualSelector
-        # @todo dont duplicate these selectors, or just let them both use the same data?
         xpath_element_js = resource_string(__name__, "../../res/xpath_element_scraper.js").decode('utf-8')
-        xpath_element_js = xpath_element_js.replace('%ELEMENTS%',
-                                                    'div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4, header, footer, section, article, aside, details, main, nav, section')
+        from changedetectionio.content_fetcher import visualselector_xpath_selectors
+        xpath_element_js = xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors)
         xpath_data = self.page.evaluate("async () => {" + xpath_element_js + "}")
-
         screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72)))
 
         return (screenshot, xpath_data)
diff --git a/changedetectionio/content_fetcher.py b/changedetectionio/content_fetcher.py
index 0d956049..1f86cdd0 100644
--- a/changedetectionio/content_fetcher.py
+++ b/changedetectionio/content_fetcher.py
@@ -7,6 +7,8 @@ import requests
 import sys
 import time
 
+visualselector_xpath_selectors = 'div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4, header, footer, section, article, aside, details, main, nav, section, summary'
+
 class Non200ErrorCodeReceived(Exception):
     def __init__(self, status_code, url, screenshot=None, xpath_data=None, page_html=None):
         # Set this so we can use it in other parts of the app
@@ -367,7 +369,7 @@ class base_html_playwright(Fetcher):
             else:
                 self.page.evaluate("var include_filters=''")
 
-            self.xpath_data = self.page.evaluate("async () => {" + self.xpath_element_js.replace('%ELEMENTS%', 'div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4, header, footer, section, article, aside, details, main, nav, section, summary') + "}")
+            self.xpath_data = self.page.evaluate("async () => {" + self.xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors) + "}")
 
             # Bug 3 in Playwright screenshot handling
             # Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
diff --git a/changedetectionio/res/xpath_element_scraper.js b/changedetectionio/res/xpath_element_scraper.js
index 92641804..e1acc2cf 100644
--- a/changedetectionio/res/xpath_element_scraper.js
+++ b/changedetectionio/res/xpath_element_scraper.js
@@ -116,7 +116,7 @@ for (var i = 0; i < elements.length; i++) {
         left: Math.floor(bbox['left']),
         top: Math.floor(bbox['top']),
         tagName: (elements[i].tagName) ? elements[i].tagName.toLowerCase() : '',
-        tagtype: (elements[i].type) ? elements[i].type.toLowerCase() : ''
+        tagtype: (elements[i].tagName == 'INPUT' && elements[i].type) ? elements[i].type.toLowerCase() : ''
     });
 
 }