Adding new Visual Selector for choosing the area of the webpage to monitor - playwright/browserless only (#566)

3 years ago · eef56e52c6
parent 8e3195f394
commit eef56e52c6
21 changed files with 670 additions and 47 deletions
--- a/changedetectionio/init.py
+++ b/changedetectionio/init.py
@ -626,6 +626,12 @@ def changedetection_app(config=None, datastore_o=None):
            if request.method == 'POST' and not form.validate():
                flash("An error occurred, please see below.", "error")

+            visualselector_data_is_ready = datastore.visualselector_data_is_ready(uuid)
+
+            # Only works reliably with Playwright
+            visualselector_enabled = os.getenv('PLAYWRIGHT_DRIVER_URL', False) and default['fetch_backend'] == 'html_webdriver'
+
+
            output = render_template("edit.html",
                                     uuid=uuid,
                                     watch=datastore.data['watching'][uuid],
@ -633,7 +639,9 @@ def changedetection_app(config=None, datastore_o=None):
                                     has_empty_checktime=using_default_check_time,
                                     using_global_webdriver_wait=default['webdriver_delay'] is None,
                                     current_base_url=datastore.data['settings']['application']['base_url'],
-                                     emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False)
+                                     emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
+                                     visualselector_data_is_ready=visualselector_data_is_ready,
+                                     visualselector_enabled=visualselector_enabled
                                     )

        return output
@ -976,10 +984,9 @@ def changedetection_app(config=None, datastore_o=None):

    @app.route("/static/<string:group>/<string:filename>", methods=['GET'])
    def static_content(group, filename):
-        if group == 'screenshot':
-
        from flask import make_response

+        if group == 'screenshot':
            # Could be sensitive, follow password requirements
            if datastore.data['settings']['application']['password'] and not flask_login.current_user.is_authenticated:
                abort(403)
@ -998,6 +1005,26 @@ def changedetection_app(config=None, datastore_o=None):
            except FileNotFoundError:
                abort(404)

+
+        if group == 'visual_selector_data':
+            # Could be sensitive, follow password requirements
+            if datastore.data['settings']['application']['password'] and not flask_login.current_user.is_authenticated:
+                abort(403)
+
+            # These files should be in our subdirectory
+            try:
+                # set nocache, set content-type
+                watch_dir = datastore_o.datastore_path + "/" + filename
+                response = make_response(send_from_directory(filename="elements.json", directory=watch_dir, path=watch_dir + "/elements.json"))
+                response.headers['Content-type'] = 'application/json'
+                response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate'
+                response.headers['Pragma'] = 'no-cache'
+                response.headers['Expires'] = 0
+                return response
+
+            except FileNotFoundError:
+                abort(404)
+
        # These files should be in our subdirectory
        try:
            return send_from_directory("static/{}".format(group), path=filename)
@ -1150,7 +1177,6 @@ def changedetection_app(config=None, datastore_o=None):
        # paste in etc
        return redirect(url_for('index'))

-
    # @todo handle ctrl break
    ticker_thread = threading.Thread(target=ticker_thread_check_time_launch_checks).start()

--- a/changedetectionio/content_fetcher.py
+++ b/changedetectionio/content_fetcher.py
@ -27,6 +27,117 @@ class Fetcher():
    status_code = None
    content = None
    headers = None
+
+    fetcher_description = "No description"
+    xpath_element_js = """               
+                // Include the getXpath script directly, easier than fetching
+                !function(e,n){"object"==typeof exports&&"undefined"!=typeof module?module.exports=n():"function"==typeof define&&define.amd?define(n):(e=e||self).getXPath=n()}(this,function(){return function(e){var n=e;if(n&&n.id)return'//*[@id="'+n.id+'"]';for(var o=[];n&&Node.ELEMENT_NODE===n.nodeType;){for(var i=0,r=!1,d=n.previousSibling;d;)d.nodeType!==Node.DOCUMENT_TYPE_NODE&&d.nodeName===n.nodeName&&i++,d=d.previousSibling;for(d=n.nextSibling;d;){if(d.nodeName===n.nodeName){r=!0;break}d=d.nextSibling}o.push((n.prefix?n.prefix+":":"")+n.localName+(i||r?"["+(i+1)+"]":"")),n=n.parentNode}return o.length?"/"+o.reverse().join("/"):""}});
+
+
+                const findUpTag = (el) => {
+                  let r = el
+                  chained_css = [];
+                  depth=0;
+            
+                // Strategy 1: Keep going up until we hit an ID tag, imagine it's like  #list-widget div h4
+                  while (r.parentNode) {
+                    if(depth==5) {
+                      break;
+                    }
+                    if('' !==r.id) {
+                      chained_css.unshift("#"+r.id);
+                      final_selector= chained_css.join('>');
+                      // Be sure theres only one, some sites have multiples of the same ID tag :-(
+                      if (window.document.querySelectorAll(final_selector).length ==1 ) {
+                        return final_selector;
+                      }
+                      return null;
+                    } else {
+                      chained_css.unshift(r.tagName.toLowerCase());
+                    }
+                    r=r.parentNode;
+                    depth+=1;
+                  }
+                  return null;
+                }
+
+
+                // @todo - if it's SVG or IMG, go into image diff mode
+                var elements = window.document.querySelectorAll("div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4, header, footer, section, article, aside, details, main, nav, section, summary");
+                var size_pos=[];
+                // after page fetch, inject this JS
+                // build a map of all elements and their positions (maybe that only include text?)
+                var bbox;
+                for (var i = 0; i < elements.length; i++) {   
+                 bbox = elements[i].getBoundingClientRect();
+
+                 // forget really small ones
+                 if (bbox['width'] <20 && bbox['height'] < 20 ) {
+                   continue;
+                 }
+
+                 // @todo the getXpath kind of sucks, it doesnt know when there is for example just one ID sometimes
+                 // it should not traverse when we know we can anchor off just an ID one level up etc..
+                 // maybe, get current class or id, keep traversing up looking for only class or id until there is just one match 
+
+                 // 1st primitive - if it has class, try joining it all and select, if theres only one.. well thats us.
+                 xpath_result=false;
+                 
+                 try {
+                   var d= findUpTag(elements[i]);
+                   if (d) {
+                     xpath_result =d;
+                   }                
+                 } catch (e) {
+                   var x=1;
+                 }
+                 
+// You could swap it and default to getXpath and then try the smarter one
+                 // default back to the less intelligent one
+                 if (!xpath_result) {
+                   xpath_result = getXPath(elements[i]);                   
+                 }
+                 if(window.getComputedStyle(elements[i]).visibility === "hidden") {
+                   continue;
+                 }
+
+                 size_pos.push({
+                   xpath: xpath_result,
+                   width: Math.round(bbox['width']), 
+                   height: Math.round(bbox['height']), 
+                   left: Math.floor(bbox['left']), 
+                   top: Math.floor(bbox['top']), 
+                   childCount: elements[i].childElementCount
+                 });                 
+                }
+
+
+                // inject the current one set in the css_filter, which may be a CSS rule
+                // used for displaying the current one in VisualSelector, where its not one we generated.
+                if (css_filter.length) {
+                   // is it xpath?
+                   if (css_filter.startsWith('/') ) {
+                     q=document.evaluate(css_filter, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
+                   } else {
+                     q=document.querySelector(css_filter);
+                   }
+                   bbox = q.getBoundingClientRect();                
+                   if (bbox && bbox['width'] >0 && bbox['height']>0) {                       
+                       size_pos.push({
+                           xpath: css_filter,
+                           width: bbox['width'], 
+                           height: bbox['height'],
+                           left: bbox['left'],
+                           top: bbox['top'],
+                           childCount: q.childElementCount
+                         });
+                     }
+                }
+// https://stackoverflow.com/questions/1145850/how-to-get-height-of-entire-document-with-javascript
+                return {'size_pos':size_pos, 'browser_width': window.innerWidth, 'browser_height':document.body.scrollHeight};
+    """
+    xpath_data = None
+
    # Will be needed in the future by the VisualSelector, always get this where possible.
    screenshot = False
    fetcher_description = "No description"
@ -47,7 +158,8 @@ class Fetcher():
            request_headers,
            request_body,
            request_method,
-            ignore_status_codes=False):
+            ignore_status_codes=False,
+            current_css_filter=None):
        # Should set self.error, self.status_code and self.content
        pass

@ -128,7 +240,8 @@ class base_html_playwright(Fetcher):
            request_headers,
            request_body,
            request_method,
-            ignore_status_codes=False):
+            ignore_status_codes=False,
+            current_css_filter=None):

        from playwright.sync_api import sync_playwright
        import playwright._impl._api_types
@ -148,8 +261,8 @@ class base_html_playwright(Fetcher):
                proxy=self.proxy
            )
            page = context.new_page()
-            page.set_viewport_size({"width": 1280, "height": 1024})
            try:
+               # Bug - never set viewport size BEFORE page.goto
                response = page.goto(url, timeout=timeout * 1000, wait_until='commit')
                # Wait_until = commit
                # - `'commit'` - consider operation to be finished when network response is received and the document started loading.
@ -166,14 +279,27 @@ class base_html_playwright(Fetcher):
            if len(page.content().strip()) == 0:
                raise EmptyReply(url=url, status_code=None)

+            # Bug 2(?) Set the viewport size AFTER loading the page
+            page.set_viewport_size({"width": 1280, "height": 1024})
+            # Bugish - Let the page redraw/reflow
+            page.set_viewport_size({"width": 1280, "height": 1024})
+
            self.status_code = response.status
            self.content = page.content()
            self.headers = response.all_headers()

+            if current_css_filter is not None:
+                page.evaluate("var css_filter='{}'".format(current_css_filter))
+            else:
+                page.evaluate("var css_filter=''")
+
+            self.xpath_data = page.evaluate("async () => {" + self.xpath_element_js + "}")
+            # Bug 3 in Playwright screenshot handling
            # Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
            # JPEG is better here because the screenshots can be very very large
            page.screenshot(type='jpeg', clip={'x': 1.0, 'y': 1.0, 'width': 1280, 'height': 1024})
-            self.screenshot = page.screenshot(type='jpeg', full_page=True, quality=90)
+            self.screenshot = page.screenshot(type='jpeg', full_page=True, quality=92)
+
            context.close()
            browser.close()

@ -225,7 +351,8 @@ class base_html_webdriver(Fetcher):
            request_headers,
            request_body,
            request_method,
-            ignore_status_codes=False):
+            ignore_status_codes=False,
+            current_css_filter=None):

        from selenium import webdriver
        from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
@ -245,6 +372,10 @@ class base_html_webdriver(Fetcher):
            self.quit()
            raise

+        self.driver.set_window_size(1280, 1024)
+        self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
+        self.screenshot = self.driver.get_screenshot_as_png()
+
        # @todo - how to check this? is it possible?
        self.status_code = 200
        # @todo somehow we should try to get this working for WebDriver
@ -254,8 +385,6 @@ class base_html_webdriver(Fetcher):
        time.sleep(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay)
        self.content = self.driver.page_source
        self.headers = {}
-        self.screenshot = self.driver.get_screenshot_as_png()
-        self.quit()

    # Does the connection to the webdriver work? run a test connection.
    def is_ready(self):
@ -292,7 +421,8 @@ class html_requests(Fetcher):
            request_headers,
            request_body,
            request_method,
-            ignore_status_codes=False):
+            ignore_status_codes=False,
+            current_css_filter=None):

        proxies={}

--- a/changedetectionio/fetch_site_status.py
+++ b/changedetectionio/fetch_site_status.py
@ -94,6 +94,7 @@ class perform_site_check():
            # If the klass doesnt exist, just use a default
            klass = getattr(content_fetcher, "html_requests")

+
        proxy_args = self.set_proxy_from_list(watch)
        fetcher = klass(proxy_override=proxy_args)

@ -104,7 +105,8 @@ class perform_site_check():
        elif system_webdriver_delay is not None:
            fetcher.render_extract_delay = system_webdriver_delay

-        fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_code)
+        fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_code, watch['css_filter'])
+        fetcher.quit()

        # Fetching complete, now filters
        # @todo move to class / maybe inside of fetcher abstract base?
@ -236,4 +238,4 @@ class perform_site_check():
                if not watch['title'] or not len(watch['title']):
                    update_obj['title'] = html_tools.extract_element(find='title', html_content=fetcher.content)

-        return changed_detected, update_obj, text_content_before_ignored_filter, fetcher.screenshot
+        return changed_detected, update_obj, text_content_before_ignored_filter, fetcher.screenshot, fetcher.xpath_data
--- a/changedetectionio/run_all_tests.sh
+++ b/changedetectionio/run_all_tests.sh
@ -22,3 +22,26 @@ echo "RUNNING WITH BASE_URL SET"
 export BASE_URL="https://really-unique-domain.io"
 pytest tests/test_notification.py

+
+# Now for the selenium and playwright/browserless fetchers
+# Note - this is not UI functional tests - just checking that each one can fetch the content
+
+echo "TESTING WEBDRIVER FETCH > SELENIUM/WEBDRIVER..."
+docker run -d --name $$-test_selenium  -p 4444:4444 --rm --shm-size="2g"  selenium/standalone-chrome-debug:3.141.59
+# takes a while to spin up
+sleep 5
+export WEBDRIVER_URL=http://localhost:4444/wd/hub
+pytest tests/fetchers/test_content.py
+unset WEBDRIVER_URL
+docker kill $$-test_selenium
+
+echo "TESTING WEBDRIVER FETCH > PLAYWRIGHT/BROWSERLESS..."
+# Not all platforms support playwright (not ARM/rPI), so it's not packaged in requirements.txt
+pip3 install playwright~=1.22
+docker run -d --name $$-test_browserless -e "DEFAULT_LAUNCH_ARGS=[\"--window-size=1920,1080\"]" --rm  -p 3000:3000  --shm-size="2g"  browserless/chrome:1.53-chrome-stable
+# takes a while to spin up
+sleep 5
+export PLAYWRIGHT_DRIVER_URL=ws://127.0.0.1:3000
+pytest tests/fetchers/test_content.py
+unset PLAYWRIGHT_DRIVER_URL
+docker kill $$-test_browserless
--- a/changedetectionio/static/images/Playwright-icon.png
+++ b/changedetectionio/static/images/Playwright-icon.png
--- a/changedetectionio/static/images/beta-logo.png
+++ b/changedetectionio/static/images/beta-logo.png
--- a/changedetectionio/static/js/limit.js
+++ b/changedetectionio/static/js/limit.js
@ -0,0 +1,56 @@
+/**
+ * debounce
+ * @param {integer} milliseconds This param indicates the number of milliseconds
+ *     to wait after the last call before calling the original function.
+ * @param {object} What "this" refers to in the returned function.
+ * @return {function} This returns a function that when called will wait the
+ *     indicated number of milliseconds after the last call before
+ *     calling the original function.
+ */
+Function.prototype.debounce = function (milliseconds, context) {
+    var baseFunction = this,
+        timer = null,
+        wait = milliseconds;
+
+    return function () {
+        var self = context || this,
+            args = arguments;
+
+        function complete() {
+            baseFunction.apply(self, args);
+            timer = null;
+        }
+
+        if (timer) {
+            clearTimeout(timer);
+        }
+
+        timer = setTimeout(complete, wait);
+    };
+};
+
+/**
+* throttle
+* @param {integer} milliseconds This param indicates the number of milliseconds
+*     to wait between calls before calling the original function.
+* @param {object} What "this" refers to in the returned function.
+* @return {function} This returns a function that when called will wait the
+*     indicated number of milliseconds between calls before
+*     calling the original function.
+*/
+Function.prototype.throttle = function (milliseconds, context) {
+    var baseFunction = this,
+        lastEventTimestamp = null,
+        limit = milliseconds;
+
+    return function () {
+        var self = context || this,
+            args = arguments,
+            now = Date.now();
+
+        if (!lastEventTimestamp || now - lastEventTimestamp >= limit) {
+            lastEventTimestamp = now;
+            baseFunction.apply(self, args);
+        }
+    };
+};
--- a/changedetectionio/static/js/visual-selector.js
+++ b/changedetectionio/static/js/visual-selector.js
@ -0,0 +1,219 @@
+// Horrible proof of concept code :)
+// yes - this is really a hack, if you are a front-ender and want to help, please get in touch!
+
+$(document).ready(function() {
+
+    $('#visualselector-tab').click(function () {
+        $("img#selector-background").off('load');
+        bootstrap_visualselector();
+    });
+
+    $(document).on('keydown', function(event) {
+        if ($("img#selector-background").is(":visible")) {
+            if (event.key == "Escape") {
+                state_clicked=false;
+                ctx.clearRect(0, 0, c.width, c.height);
+            }
+        }
+    });
+
+    // For when the page loads
+    if(!window.location.hash || window.location.hash != '#visualselector') {
+        $("img#selector-background").attr('src','');
+        return;
+    }
+
+    // Handle clearing button/link
+    $('#clear-selector').on('click', function(event) {
+        if(!state_clicked) {
+            alert('Oops, Nothing selected!');
+        }
+        state_clicked=false;
+        ctx.clearRect(0, 0, c.width, c.height);
+    });
+
+
+    bootstrap_visualselector();
+
+    var current_selected_i;
+    var state_clicked=false;
+
+    var c;
+
+    // greyed out fill context
+    var xctx;
+    // redline highlight context
+    var ctx;
+
+    var current_default_xpath;
+    var x_scale=1;
+    var y_scale=1;
+    var selector_image;
+    var selector_image_rect;
+    var vh;
+    var selector_data;
+
+
+    function bootstrap_visualselector() {
+        if ( 1 ) {
+            // bootstrap it, this will trigger everything else
+            $("img#selector-background").bind('load', function () {
+                console.log("Loaded background...");
+               c = document.getElementById("selector-canvas");
+                // greyed out fill context
+               xctx = c.getContext("2d");
+                // redline highlight context
+               ctx = c.getContext("2d");
+               current_default_xpath =$("#css_filter").val();
+               fetch_data();
+               $('#selector-canvas').off("mousemove");
+               // screenshot_url defined in the edit.html template
+            }).attr("src", screenshot_url);
+        }
+    }
+
+    function fetch_data() {
+      // Image is ready
+      $('.fetching-update-notice').html("Fetching element data..");
+
+      $.ajax({
+        url: watch_visual_selector_data_url,
+        context: document.body
+      }).done(function (data) {
+        $('.fetching-update-notice').html("Rendering..");
+        selector_data = data;
+        console.log("Reported browser width from backend: "+data['browser_width']);
+        state_clicked=false;
+        set_scale();
+        reflow_selector();
+        $('.fetching-update-notice').fadeOut();
+      });
+    };
+
+
+
+    function set_scale() {
+
+      // some things to check if the scaling doesnt work
+      // - that the widths/sizes really are about the actual screen size cat elements.json |grep -o width......|sort|uniq
+      selector_image = $("img#selector-background")[0];
+      selector_image_rect = selector_image.getBoundingClientRect();
+
+      // make the canvas the same size as the image
+      $('#selector-canvas').attr('height', selector_image_rect.height);
+      $('#selector-canvas').attr('width', selector_image_rect.width);
+      $('#selector-wrapper').attr('width', selector_image_rect.width);
+      x_scale = selector_image_rect.width / selector_data['browser_width'];
+      y_scale = selector_image_rect.height / selector_image.naturalHeight;
+      ctx.strokeStyle = 'rgba(255,0,0, 0.9)';
+      ctx.fillStyle = 'rgba(255,0,0, 0.1)';
+      ctx.lineWidth = 3;
+      console.log("scaling set  x: "+x_scale+" by y:"+y_scale);
+      $("#selector-current-xpath").css('max-width', selector_image_rect.width);
+    }
+
+    function reflow_selector() {
+        $(window).resize(function() {
+            set_scale();
+            highlight_current_selected_i();
+        });
+      var selector_currnt_xpath_text=$("#selector-current-xpath span");
+
+      set_scale();
+
+      console.log(selector_data['size_pos'].length + " selectors found");
+
+      // highlight the default one if we can find it in the xPath list
+      // or the xpath matches the default one
+      found = false;
+      if(current_default_xpath.length) {
+          for (var i = selector_data['size_pos'].length; i!==0; i--) {
+            var sel = selector_data['size_pos'][i-1];
+            if(selector_data['size_pos'][i - 1].xpath == current_default_xpath) {
+            console.log("highlighting "+current_default_xpath);
+              current_selected_i = i-1;
+              highlight_current_selected_i();
+              found = true;
+              break;
+            }
+          }
+        if(!found) {
+          alert("unfortunately your existing CSS/xPath Filter was no longer found!");
+        }
+      }
+
+
+      $('#selector-canvas').bind('mousemove', function (e) {
+        if(state_clicked) {
+          return;
+        }
+        ctx.clearRect(0, 0, c.width, c.height);
+        current_selected_i=null;
+
+        // Reverse order - the most specific one should be deeper/"laster"
+        // Basically, find the most 'deepest'
+        var found=0;
+        ctx.fillStyle = 'rgba(205,0,0,0.35)';
+        for (var i = selector_data['size_pos'].length; i!==0; i--) {
+          // draw all of them? let them choose somehow?
+          var sel = selector_data['size_pos'][i-1];
+          // If we are in a bounding-box
+          if (e.offsetY > sel.top * y_scale && e.offsetY < sel.top * y_scale + sel.height * y_scale
+              &&
+              e.offsetX > sel.left * y_scale && e.offsetX < sel.left * y_scale + sel.width * y_scale
+
+          ) {
+
+            // FOUND ONE
+            set_current_selected_text(sel.xpath);
+            ctx.strokeRect(sel.left * x_scale, sel.top * y_scale, sel.width * x_scale, sel.height * y_scale);
+            ctx.fillRect(sel.left * x_scale, sel.top * y_scale, sel.width * x_scale, sel.height * y_scale);
+
+            // no need to keep digging
+            // @todo or, O to go out/up, I to go in
+            // or double click to go up/out the selector?
+            current_selected_i=i-1;
+            found+=1;
+            break;
+          }
+        }
+
+      }.debounce(5));
+
+      function set_current_selected_text(s) {
+        selector_currnt_xpath_text[0].innerHTML=s;
+      }
+
+      function highlight_current_selected_i() {
+        if(state_clicked) {
+          state_clicked=false;
+          xctx.clearRect(0,0,c.width, c.height);
+          return;
+        }
+
+        var sel = selector_data['size_pos'][current_selected_i];
+        if (sel[0] == '/') {
+        // @todo - not sure just checking / is right
+            $("#css_filter").val('xpath:'+sel.xpath);
+        } else {
+            $("#css_filter").val(sel.xpath);
+        }
+        xctx.fillStyle = 'rgba(205,205,205,0.95)';
+        xctx.strokeStyle = 'rgba(225,0,0,0.9)';
+        xctx.lineWidth = 3;
+        xctx.fillRect(0,0,c.width, c.height);
+        // Clear out what only should be seen (make a clear/clean spot)
+        xctx.clearRect(sel.left * x_scale, sel.top * y_scale, sel.width * x_scale, sel.height * y_scale);
+        xctx.strokeRect(sel.left * x_scale, sel.top * y_scale, sel.width * x_scale, sel.height * y_scale);
+        state_clicked=true;
+        set_current_selected_text(sel.xpath);
+
+      }
+
+
+      $('#selector-canvas').bind('mousedown', function (e) {
+        highlight_current_selected_i();
+      });
+    }
+
+});
--- a/changedetectionio/static/js/watch-overview.js
+++ b/changedetectionio/static/js/watch-overview.js
@ -4,6 +4,7 @@ $(function () {
    $(this).closest('.unviewed').removeClass('unviewed');
  });

+
  $('.with-share-link > *').click(function () {
      $("#copied-clipboard").remove();

@ -20,5 +21,6 @@ $(function () {
       $(this).remove();
      });
  });
+
 });

--- a/changedetectionio/static/styles/styles.css
+++ b/changedetectionio/static/styles/styles.css
@ -338,7 +338,8 @@ footer {
    padding-top: 110px; }
  div.tabs.collapsable ul li {
    display: block;
-    border-radius: 0px; }
+    border-radius: 0px;
+    margin-right: 0px; }
  input[type='text'] {
    width: 100%; }
  /*
@ -429,6 +430,15 @@ and also iPads specifically.
  .tab-pane-inner:target {
    display: block; }

+#beta-logo {
+  height: 50px;
+  right: -3px;
+  top: -3px;
+  position: absolute; }
+
+#selector-header {
+  padding-bottom: 1em; }
+
 .edit-form {
  min-width: 70%;
  /* so it cant overflow */
@ -454,6 +464,24 @@ ul {
  .time-check-widget tr input[type="number"] {
    width: 5em; }

+#selector-wrapper {
+  height: 600px;
+  overflow-y: scroll;
+  position: relative; }
+  #selector-wrapper > img {
+    position: absolute;
+    z-index: 4;
+    max-width: 100%; }
+  #selector-wrapper > canvas {
+    position: relative;
+    z-index: 5;
+    max-width: 100%; }
+    #selector-wrapper > canvas:hover {
+      cursor: pointer; }
+
+#selector-current-xpath {
+  font-size: 80%; }
+
 #webdriver-override-options input[type="number"] {
  width: 5em; }

--- a/changedetectionio/static/styles/styles.scss
+++ b/changedetectionio/static/styles/styles.scss
@ -469,6 +469,7 @@ footer {
  div.tabs.collapsable ul li {
    display: block;
    border-radius: 0px;
+    margin-right: 0px;
  }

  input[type='text'] {
@ -613,6 +614,18 @@ $form-edge-padding: 20px;
    padding: 0px;
 }

+#beta-logo {
+    height: 50px;
+    // looks better when it's hanging off a little
+    right: -3px;
+    top: -3px;
+    position: absolute;
+}
+
+#selector-header {
+    padding-bottom: 1em;
+}
+
 .edit-form {
  min-width: 70%;
  /* so it cant overflow */
@ -649,6 +662,30 @@ ul {
    }
 }

+#selector-wrapper {
+ height: 600px;
+ overflow-y: scroll;
+ position: relative;
+    //width: 100%;
+ > img {
+    position: absolute;
+    z-index: 4;
+    max-width: 100%;
+ }
+ >canvas {
+    position: relative;
+    z-index: 5;
+     max-width: 100%;
+     &:hover {
+     cursor: pointer;
+     }
+ }
+}
+
+#selector-current-xpath {
+  font-size: 80%;
+}
+
 #webdriver-override-options {
        input[type="number"] {
            width: 5em;
--- a/changedetectionio/store.py
+++ b/changedetectionio/store.py
@ -372,6 +372,15 @@ class ChangeDetectionStore:

        return False

+    def visualselector_data_is_ready(self, watch_uuid):
+        output_path = "{}/{}".format(self.datastore_path, watch_uuid)
+        screenshot_filename = "{}/last-screenshot.png".format(output_path)
+        elements_index_filename = "{}/elements.json".format(output_path)
+        if path.isfile(screenshot_filename) and  path.isfile(elements_index_filename) :
+            return True
+
+        return False
+
    # Save as PNG, PNG is larger but better for doing visual diff in the future
    def save_screenshot(self, watch_uuid, screenshot: bytes):
        output_path = "{}/{}".format(self.datastore_path, watch_uuid)
@ -380,6 +389,14 @@ class ChangeDetectionStore:
            f.write(screenshot)
            f.close()

+    def save_xpath_data(self, watch_uuid, data):
+        output_path = "{}/{}".format(self.datastore_path, watch_uuid)
+        fname = "{}/elements.json".format(output_path)
+        with open(fname, 'w') as f:
+            f.write(json.dumps(data))
+            f.close()
+
+
    def sync_to_json(self):
        logging.info("Saving JSON..")
        print("Saving JSON..")
--- a/changedetectionio/templates/diff.html
+++ b/changedetectionio/templates/diff.html
@ -39,9 +39,6 @@
 <div class="tabs">
    <ul>
        <li class="tab" id="default-tab"><a href="#text">Text</a></li>
-{% if screenshot %}
-        <li class="tab"><a href="#screenshot">Current screenshot</a></li>
-{% endif %}
    </ul>
 </div>

@ -63,18 +60,6 @@
         </table>
         Diff algorithm from the amazing <a href="https://github.com/kpdecker/jsdiff">github.com/kpdecker/jsdiff</a>
     </div>
-
-{% if screenshot %}
-     <div class="tab-pane-inner" id="screenshot">
-         <p>
-             <i>For now, only the most recent screenshot is saved and displayed.</i></br>
-             <strong>Note: No changedetection is performed on the image yet, but we are working on that in an upcoming release.</strong>
-         </p>
-
-         <img src="{{url_for('static_content', group='screenshot', filename=uuid)}}">
-     </div>
-{% endif %}
-
 </div>


--- a/changedetectionio/templates/edit.html
+++ b/changedetectionio/templates/edit.html
@ -5,12 +5,18 @@
 <script type="text/javascript" src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
 <script>
    const notification_base_url="{{url_for('ajax_callback_send_notification_test')}}";
+    const watch_visual_selector_data_url="{{url_for('static_content', group='visual_selector_data', filename=uuid)}}";
+    const screenshot_url="{{url_for('static_content', group='screenshot', filename=uuid)}}";
+
 {% if emailprefix %}
    const email_notification_prefix=JSON.parse('{{ emailprefix|tojson }}');
 {% endif %}
+
 </script>
 <script type="text/javascript" src="{{url_for('static_content', group='js', filename='watch-settings.js')}}" defer></script>
 <script type="text/javascript" src="{{url_for('static_content', group='js', filename='notifications.js')}}" defer></script>
+<script type="text/javascript" src="{{url_for('static_content', group='js', filename='visual-selector.js')}}" defer></script>
+<script type="text/javascript" src="{{url_for('static_content', group='js', filename='limit.js')}}" defer></script>

 <div class="edit-form monospaced-textarea">

@ -18,6 +24,7 @@
        <ul>
            <li class="tab" id="default-tab"><a href="#general">General</a></li>
            <li class="tab"><a href="#request">Request</a></li>
+            <li class="tab"><a id="visualselector-tab" href="#visualselector">Visual Selector</a></li>
            <li class="tab"><a href="#filters-and-triggers">Filters &amp; Triggers</a></li>
            <li class="tab"><a href="#notifications">Notifications</a></li>
        </ul>
@ -194,6 +201,46 @@ nav
                </fieldset>
            </div>

+            <div class="tab-pane-inner visual-selector-ui" id="visualselector">
+                <img id="beta-logo" src="{{url_for('static_content', group='images', filename='beta-logo.png')}}">
+
+                <fieldset>
+                    <div class="pure-control-group">
+                        {% if visualselector_enabled %}
+                            {% if visualselector_data_is_ready %}
+                                <div id="selector-header">
+                                    <a id="clear-selector" class="pure-button button-secondary button-xsmall" style="font-size: 70%">Clear selection</a>
+                                    <i class="fetching-update-notice" style="font-size: 80%;">One moment, fetching screenshot and element information..</i>
+                                </div>
+                                <div id="selector-wrapper">
+                                    <!-- request the screenshot and get the element offset info ready -->
+                                    <!-- use img src ready load to know everything is ready to map out -->
+                                    <!-- @todo: maybe something interesting like a field to select 'elements that contain text... and their parents n' -->
+                                    <img id="selector-background" />
+                                    <canvas id="selector-canvas"></canvas>
+
+                                </div>
+                                <div id="selector-current-xpath" style="overflow-x: hidden"><strong>Currently:</strong>&nbsp;<span class="text">Loading...</span></div>
+
+                            <span class="pure-form-message-inline">
+                                <p><span style="font-weight: bold">Beta!</span> The Visual Selector is new and there may be minor bugs, please report pages that dont work, help us to improve this software!</p>
+                            </span>
+
+                            {% else %}
+                                <span class="pure-form-message-inline">Screenshot and element data is not available or not yet ready.</span>
+                            {% endif %}
+                        {% else %}
+                            <span class="pure-form-message-inline">
+                                <p>Sorry, this functionality only works with Playwright/Chrome enabled watches.</p>
+                                <p>Enable the Playwright Chrome fetcher, or alternatively try our <a href="https://lemonade.changedetection.io/start">very affordable subscription based service</a>.</p>
+                                <p>This is because Selenium/WebDriver can not extract full page screenshots reliably.</p>
+
+                            </span>
+                        {% endif %}
+                    </div>
+                </fieldset>
+            </div>
+
            <div id="actions">
                <div class="pure-control-group">

--- a/changedetectionio/templates/preview.html
+++ b/changedetectionio/templates/preview.html
@ -10,9 +10,6 @@
 <div class="tabs">
    <ul>
        <li class="tab" id="default-tab"><a href="#text">Text</a></li>
-{% if screenshot %}
-        <li class="tab"><a href="#screenshot">Current screenshot</a></li>
-{% endif %}
    </ul>
 </div>

@ -31,16 +28,5 @@
            </tbody>
        </table>
    </div>
-
-{% if screenshot %}
-     <div class="tab-pane-inner" id="screenshot">
-         <p>
-             <i>For now, only the most recent screenshot is saved and displayed.</i></br>
-             <strong>Note: No changedetection is performed on the image yet, but we are working on that in an upcoming release.</strong>
-         </p>
-
-        <img src="{{url_for('static_content', group='screenshot', filename=uuid)}}">
-     </div>
-{% endif %}
 </div>
 {% endblock %}
--- a/changedetectionio/templates/watch-overview.html
+++ b/changedetectionio/templates/watch-overview.html
@ -3,6 +3,7 @@
 {% from '_helpers.jinja' import render_simple_field %}
 <script type="text/javascript" src="{{url_for('static_content', group='js', filename='jquery-3.6.0.min.js')}}"></script>
 <script type="text/javascript" src="{{url_for('static_content', group='js', filename='watch-overview.js')}}" defer></script>
+
 <div class="box">

    <form class="pure-form" action="{{ url_for('form_watch_add') }}" method="POST" id="new-watch-form">
--- a/changedetectionio/tests/fetchers/init.py
+++ b/changedetectionio/tests/fetchers/init.py
@ -0,0 +1,2 @@
+"""Tests for the app."""
+
--- a/changedetectionio/tests/fetchers/conftest.py
+++ b/changedetectionio/tests/fetchers/conftest.py
@ -0,0 +1,3 @@
+#!/usr/bin/python3
+
+from .. import conftest
--- a/changedetectionio/tests/fetchers/test_content.py
+++ b/changedetectionio/tests/fetchers/test_content.py
@ -0,0 +1,48 @@
+#!/usr/bin/python3
+
+import time
+from flask import url_for
+from ..util import live_server_setup
+import logging
+
+
+def test_fetch_webdriver_content(client, live_server):
+    live_server_setup(live_server)
+
+    #####################
+    res = client.post(
+        url_for("settings_page"),
+        data={"application-empty_pages_are_a_change": "",
+              "requests-time_between_check-minutes": 180,
+              'application-fetch_backend': "html_webdriver"},
+        follow_redirects=True
+    )
+
+    assert b"Settings updated." in res.data
+
+    # Add our URL to the import page
+    res = client.post(
+        url_for("import_page"),
+        data={"urls": "https://changedetection.io/ci-test.html"},
+        follow_redirects=True
+    )
+
+    assert b"1 Imported" in res.data
+    time.sleep(3)
+    attempt = 0
+    while attempt < 20:
+        res = client.get(url_for("index"))
+        if not b'Checking now' in res.data:
+            break
+        logging.getLogger().info("Waiting for check to not say 'Checking now'..")
+        time.sleep(3)
+        attempt += 1
+
+
+    res = client.get(
+        url_for("preview_page", uuid="first"),
+        follow_redirects=True
+    )
+    logging.getLogger().info("Looking for correct fetched HTML (text) from server")
+
+    assert b'cool it works' in res.data
--- a/changedetectionio/tests/test_trigger.py
+++ b/changedetectionio/tests/test_trigger.py
@ -121,7 +121,7 @@ def test_trigger_functionality(client, live_server):
    res = client.get(url_for("index"))
    assert b'unviewed' not in res.data

-    # Just to be sure.. set a regular modified change..
+    # Now set the content which contains the trigger text
    time.sleep(sleep_time_for_fetch_thread)
    set_modified_with_trigger_text_response()

@ -130,6 +130,12 @@ def test_trigger_functionality(client, live_server):
    res = client.get(url_for("index"))
    assert b'unviewed' in res.data

+    # https://github.com/dgtlmoon/changedetection.io/issues/616
+    # Apparently the actual snapshot that contains the trigger never shows
+    res = client.get(url_for("diff_history_page", uuid="first"))
+    assert b'foobar123' in res.data
+
+
    # Check the preview/highlighter, we should be able to see what we triggered on, but it should be highlighted
    res = client.get(url_for("preview_page", uuid="first"))
    # We should be able to see what we ignored
--- a/changedetectionio/update_worker.py
+++ b/changedetectionio/update_worker.py
@ -40,10 +40,11 @@ class update_worker(threading.Thread):
                    contents = ""
                    screenshot = False
                    update_obj= {}
+                    xpath_data = False
                    now = time.time()

                    try:
-                        changed_detected, update_obj, contents, screenshot = update_handler.run(uuid)
+                        changed_detected, update_obj, contents, screenshot, xpath_data = update_handler.run(uuid)

                        # Re #342
                        # In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.
@ -55,6 +56,7 @@ class update_worker(threading.Thread):
                    except content_fetcher.ReplyWithContentButNoText as e:
                        # Totally fine, it's by choice - just continue on, nothing more to care about
                        # Page had elements/content but no renderable text
+                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Got HTML content but no text found."})
                        pass
                    except content_fetcher.EmptyReply as e:
                        # Some kind of custom to-str handler in the exception handler that does this?
@ -148,6 +150,9 @@ class update_worker(threading.Thread):
                        # Always save the screenshot if it's available
                        if screenshot:
                            self.datastore.save_screenshot(watch_uuid=uuid, screenshot=screenshot)
+                        if xpath_data:
+                            self.datastore.save_xpath_data(watch_uuid=uuid, data=xpath_data)
+

                self.current_uuid = None  # Done
                self.q.task_done()