diff --git a/changedetectionio/__init__.py b/changedetectionio/__init__.py index 425575f8..a5087e19 100644 --- a/changedetectionio/__init__.py +++ b/changedetectionio/__init__.py @@ -626,6 +626,12 @@ def changedetection_app(config=None, datastore_o=None): if request.method == 'POST' and not form.validate(): flash("An error occurred, please see below.", "error") + visualselector_data_is_ready = datastore.visualselector_data_is_ready(uuid) + + # Only works reliably with Playwright + visualselector_enabled = os.getenv('PLAYWRIGHT_DRIVER_URL', False) and default['fetch_backend'] == 'html_webdriver' + + output = render_template("edit.html", uuid=uuid, watch=datastore.data['watching'][uuid], @@ -633,7 +639,9 @@ def changedetection_app(config=None, datastore_o=None): has_empty_checktime=using_default_check_time, using_global_webdriver_wait=default['webdriver_delay'] is None, current_base_url=datastore.data['settings']['application']['base_url'], - emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False) + emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False), + visualselector_data_is_ready=visualselector_data_is_ready, + visualselector_enabled=visualselector_enabled ) return output @@ -976,10 +984,9 @@ def changedetection_app(config=None, datastore_o=None): @app.route("/static//", methods=['GET']) def static_content(group, filename): - if group == 'screenshot': - - from flask import make_response + from flask import make_response + if group == 'screenshot': # Could be sensitive, follow password requirements if datastore.data['settings']['application']['password'] and not flask_login.current_user.is_authenticated: abort(403) @@ -998,6 +1005,26 @@ def changedetection_app(config=None, datastore_o=None): except FileNotFoundError: abort(404) + + if group == 'visual_selector_data': + # Could be sensitive, follow password requirements + if datastore.data['settings']['application']['password'] and not flask_login.current_user.is_authenticated: + abort(403) + + # These files should be in our subdirectory + try: + # set nocache, set content-type + watch_dir = datastore_o.datastore_path + "/" + filename + response = make_response(send_from_directory(filename="elements.json", directory=watch_dir, path=watch_dir + "/elements.json")) + response.headers['Content-type'] = 'application/json' + response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate' + response.headers['Pragma'] = 'no-cache' + response.headers['Expires'] = 0 + return response + + except FileNotFoundError: + abort(404) + # These files should be in our subdirectory try: return send_from_directory("static/{}".format(group), path=filename) @@ -1150,7 +1177,6 @@ def changedetection_app(config=None, datastore_o=None): # paste in etc return redirect(url_for('index')) - # @todo handle ctrl break ticker_thread = threading.Thread(target=ticker_thread_check_time_launch_checks).start() diff --git a/changedetectionio/content_fetcher.py b/changedetectionio/content_fetcher.py index 0deb8966..5ac95927 100644 --- a/changedetectionio/content_fetcher.py +++ b/changedetectionio/content_fetcher.py @@ -27,6 +27,117 @@ class Fetcher(): status_code = None content = None headers = None + + fetcher_description = "No description" + xpath_element_js = """ + // Include the getXpath script directly, easier than fetching + !function(e,n){"object"==typeof exports&&"undefined"!=typeof module?module.exports=n():"function"==typeof define&&define.amd?define(n):(e=e||self).getXPath=n()}(this,function(){return function(e){var n=e;if(n&&n.id)return'//*[@id="'+n.id+'"]';for(var o=[];n&&Node.ELEMENT_NODE===n.nodeType;){for(var i=0,r=!1,d=n.previousSibling;d;)d.nodeType!==Node.DOCUMENT_TYPE_NODE&&d.nodeName===n.nodeName&&i++,d=d.previousSibling;for(d=n.nextSibling;d;){if(d.nodeName===n.nodeName){r=!0;break}d=d.nextSibling}o.push((n.prefix?n.prefix+":":"")+n.localName+(i||r?"["+(i+1)+"]":"")),n=n.parentNode}return o.length?"/"+o.reverse().join("/"):""}}); + + + const findUpTag = (el) => { + let r = el + chained_css = []; + depth=0; + + // Strategy 1: Keep going up until we hit an ID tag, imagine it's like #list-widget div h4 + while (r.parentNode) { + if(depth==5) { + break; + } + if('' !==r.id) { + chained_css.unshift("#"+r.id); + final_selector= chained_css.join('>'); + // Be sure theres only one, some sites have multiples of the same ID tag :-( + if (window.document.querySelectorAll(final_selector).length ==1 ) { + return final_selector; + } + return null; + } else { + chained_css.unshift(r.tagName.toLowerCase()); + } + r=r.parentNode; + depth+=1; + } + return null; + } + + + // @todo - if it's SVG or IMG, go into image diff mode + var elements = window.document.querySelectorAll("div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4, header, footer, section, article, aside, details, main, nav, section, summary"); + var size_pos=[]; + // after page fetch, inject this JS + // build a map of all elements and their positions (maybe that only include text?) + var bbox; + for (var i = 0; i < elements.length; i++) { + bbox = elements[i].getBoundingClientRect(); + + // forget really small ones + if (bbox['width'] <20 && bbox['height'] < 20 ) { + continue; + } + + // @todo the getXpath kind of sucks, it doesnt know when there is for example just one ID sometimes + // it should not traverse when we know we can anchor off just an ID one level up etc.. + // maybe, get current class or id, keep traversing up looking for only class or id until there is just one match + + // 1st primitive - if it has class, try joining it all and select, if theres only one.. well thats us. + xpath_result=false; + + try { + var d= findUpTag(elements[i]); + if (d) { + xpath_result =d; + } + } catch (e) { + var x=1; + } + +// You could swap it and default to getXpath and then try the smarter one + // default back to the less intelligent one + if (!xpath_result) { + xpath_result = getXPath(elements[i]); + } + if(window.getComputedStyle(elements[i]).visibility === "hidden") { + continue; + } + + size_pos.push({ + xpath: xpath_result, + width: Math.round(bbox['width']), + height: Math.round(bbox['height']), + left: Math.floor(bbox['left']), + top: Math.floor(bbox['top']), + childCount: elements[i].childElementCount + }); + } + + + // inject the current one set in the css_filter, which may be a CSS rule + // used for displaying the current one in VisualSelector, where its not one we generated. + if (css_filter.length) { + // is it xpath? + if (css_filter.startsWith('/') ) { + q=document.evaluate(css_filter, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue; + } else { + q=document.querySelector(css_filter); + } + bbox = q.getBoundingClientRect(); + if (bbox && bbox['width'] >0 && bbox['height']>0) { + size_pos.push({ + xpath: css_filter, + width: bbox['width'], + height: bbox['height'], + left: bbox['left'], + top: bbox['top'], + childCount: q.childElementCount + }); + } + } +// https://stackoverflow.com/questions/1145850/how-to-get-height-of-entire-document-with-javascript + return {'size_pos':size_pos, 'browser_width': window.innerWidth, 'browser_height':document.body.scrollHeight}; + """ + xpath_data = None + # Will be needed in the future by the VisualSelector, always get this where possible. screenshot = False fetcher_description = "No description" @@ -47,7 +158,8 @@ class Fetcher(): request_headers, request_body, request_method, - ignore_status_codes=False): + ignore_status_codes=False, + current_css_filter=None): # Should set self.error, self.status_code and self.content pass @@ -128,7 +240,8 @@ class base_html_playwright(Fetcher): request_headers, request_body, request_method, - ignore_status_codes=False): + ignore_status_codes=False, + current_css_filter=None): from playwright.sync_api import sync_playwright import playwright._impl._api_types @@ -148,8 +261,8 @@ class base_html_playwright(Fetcher): proxy=self.proxy ) page = context.new_page() - page.set_viewport_size({"width": 1280, "height": 1024}) try: + # Bug - never set viewport size BEFORE page.goto response = page.goto(url, timeout=timeout * 1000, wait_until='commit') # Wait_until = commit # - `'commit'` - consider operation to be finished when network response is received and the document started loading. @@ -166,14 +279,27 @@ class base_html_playwright(Fetcher): if len(page.content().strip()) == 0: raise EmptyReply(url=url, status_code=None) + # Bug 2(?) Set the viewport size AFTER loading the page + page.set_viewport_size({"width": 1280, "height": 1024}) + # Bugish - Let the page redraw/reflow + page.set_viewport_size({"width": 1280, "height": 1024}) + self.status_code = response.status self.content = page.content() self.headers = response.all_headers() + if current_css_filter is not None: + page.evaluate("var css_filter='{}'".format(current_css_filter)) + else: + page.evaluate("var css_filter=''") + + self.xpath_data = page.evaluate("async () => {" + self.xpath_element_js + "}") + # Bug 3 in Playwright screenshot handling # Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it # JPEG is better here because the screenshots can be very very large page.screenshot(type='jpeg', clip={'x': 1.0, 'y': 1.0, 'width': 1280, 'height': 1024}) - self.screenshot = page.screenshot(type='jpeg', full_page=True, quality=90) + self.screenshot = page.screenshot(type='jpeg', full_page=True, quality=92) + context.close() browser.close() @@ -225,7 +351,8 @@ class base_html_webdriver(Fetcher): request_headers, request_body, request_method, - ignore_status_codes=False): + ignore_status_codes=False, + current_css_filter=None): from selenium import webdriver from selenium.webdriver.common.desired_capabilities import DesiredCapabilities @@ -245,6 +372,10 @@ class base_html_webdriver(Fetcher): self.quit() raise + self.driver.set_window_size(1280, 1024) + self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5))) + self.screenshot = self.driver.get_screenshot_as_png() + # @todo - how to check this? is it possible? self.status_code = 200 # @todo somehow we should try to get this working for WebDriver @@ -254,8 +385,6 @@ class base_html_webdriver(Fetcher): time.sleep(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay) self.content = self.driver.page_source self.headers = {} - self.screenshot = self.driver.get_screenshot_as_png() - self.quit() # Does the connection to the webdriver work? run a test connection. def is_ready(self): @@ -292,7 +421,8 @@ class html_requests(Fetcher): request_headers, request_body, request_method, - ignore_status_codes=False): + ignore_status_codes=False, + current_css_filter=None): proxies={} diff --git a/changedetectionio/fetch_site_status.py b/changedetectionio/fetch_site_status.py index c8b95321..8629f454 100644 --- a/changedetectionio/fetch_site_status.py +++ b/changedetectionio/fetch_site_status.py @@ -94,6 +94,7 @@ class perform_site_check(): # If the klass doesnt exist, just use a default klass = getattr(content_fetcher, "html_requests") + proxy_args = self.set_proxy_from_list(watch) fetcher = klass(proxy_override=proxy_args) @@ -104,7 +105,8 @@ class perform_site_check(): elif system_webdriver_delay is not None: fetcher.render_extract_delay = system_webdriver_delay - fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_code) + fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_code, watch['css_filter']) + fetcher.quit() # Fetching complete, now filters # @todo move to class / maybe inside of fetcher abstract base? @@ -236,4 +238,4 @@ class perform_site_check(): if not watch['title'] or not len(watch['title']): update_obj['title'] = html_tools.extract_element(find='title', html_content=fetcher.content) - return changed_detected, update_obj, text_content_before_ignored_filter, fetcher.screenshot + return changed_detected, update_obj, text_content_before_ignored_filter, fetcher.screenshot, fetcher.xpath_data diff --git a/changedetectionio/run_all_tests.sh b/changedetectionio/run_all_tests.sh index 82b603f3..c2bbf9aa 100755 --- a/changedetectionio/run_all_tests.sh +++ b/changedetectionio/run_all_tests.sh @@ -22,3 +22,26 @@ echo "RUNNING WITH BASE_URL SET" export BASE_URL="https://really-unique-domain.io" pytest tests/test_notification.py + +# Now for the selenium and playwright/browserless fetchers +# Note - this is not UI functional tests - just checking that each one can fetch the content + +echo "TESTING WEBDRIVER FETCH > SELENIUM/WEBDRIVER..." +docker run -d --name $$-test_selenium -p 4444:4444 --rm --shm-size="2g" selenium/standalone-chrome-debug:3.141.59 +# takes a while to spin up +sleep 5 +export WEBDRIVER_URL=http://localhost:4444/wd/hub +pytest tests/fetchers/test_content.py +unset WEBDRIVER_URL +docker kill $$-test_selenium + +echo "TESTING WEBDRIVER FETCH > PLAYWRIGHT/BROWSERLESS..." +# Not all platforms support playwright (not ARM/rPI), so it's not packaged in requirements.txt +pip3 install playwright~=1.22 +docker run -d --name $$-test_browserless -e "DEFAULT_LAUNCH_ARGS=[\"--window-size=1920,1080\"]" --rm -p 3000:3000 --shm-size="2g" browserless/chrome:1.53-chrome-stable +# takes a while to spin up +sleep 5 +export PLAYWRIGHT_DRIVER_URL=ws://127.0.0.1:3000 +pytest tests/fetchers/test_content.py +unset PLAYWRIGHT_DRIVER_URL +docker kill $$-test_browserless \ No newline at end of file diff --git a/changedetectionio/static/images/Playwright-icon.png b/changedetectionio/static/images/Playwright-icon.png new file mode 100644 index 00000000..75db893b Binary files /dev/null and b/changedetectionio/static/images/Playwright-icon.png differ diff --git a/changedetectionio/static/images/beta-logo.png b/changedetectionio/static/images/beta-logo.png new file mode 100644 index 00000000..e5533ee2 Binary files /dev/null and b/changedetectionio/static/images/beta-logo.png differ diff --git a/changedetectionio/static/js/limit.js b/changedetectionio/static/js/limit.js new file mode 100644 index 00000000..bc190cde --- /dev/null +++ b/changedetectionio/static/js/limit.js @@ -0,0 +1,56 @@ +/** + * debounce + * @param {integer} milliseconds This param indicates the number of milliseconds + * to wait after the last call before calling the original function. + * @param {object} What "this" refers to in the returned function. + * @return {function} This returns a function that when called will wait the + * indicated number of milliseconds after the last call before + * calling the original function. + */ +Function.prototype.debounce = function (milliseconds, context) { + var baseFunction = this, + timer = null, + wait = milliseconds; + + return function () { + var self = context || this, + args = arguments; + + function complete() { + baseFunction.apply(self, args); + timer = null; + } + + if (timer) { + clearTimeout(timer); + } + + timer = setTimeout(complete, wait); + }; +}; + +/** +* throttle +* @param {integer} milliseconds This param indicates the number of milliseconds +* to wait between calls before calling the original function. +* @param {object} What "this" refers to in the returned function. +* @return {function} This returns a function that when called will wait the +* indicated number of milliseconds between calls before +* calling the original function. +*/ +Function.prototype.throttle = function (milliseconds, context) { + var baseFunction = this, + lastEventTimestamp = null, + limit = milliseconds; + + return function () { + var self = context || this, + args = arguments, + now = Date.now(); + + if (!lastEventTimestamp || now - lastEventTimestamp >= limit) { + lastEventTimestamp = now; + baseFunction.apply(self, args); + } + }; +}; \ No newline at end of file diff --git a/changedetectionio/static/js/visual-selector.js b/changedetectionio/static/js/visual-selector.js new file mode 100644 index 00000000..e6fa9091 --- /dev/null +++ b/changedetectionio/static/js/visual-selector.js @@ -0,0 +1,219 @@ +// Horrible proof of concept code :) +// yes - this is really a hack, if you are a front-ender and want to help, please get in touch! + +$(document).ready(function() { + + $('#visualselector-tab').click(function () { + $("img#selector-background").off('load'); + bootstrap_visualselector(); + }); + + $(document).on('keydown', function(event) { + if ($("img#selector-background").is(":visible")) { + if (event.key == "Escape") { + state_clicked=false; + ctx.clearRect(0, 0, c.width, c.height); + } + } + }); + + // For when the page loads + if(!window.location.hash || window.location.hash != '#visualselector') { + $("img#selector-background").attr('src',''); + return; + } + + // Handle clearing button/link + $('#clear-selector').on('click', function(event) { + if(!state_clicked) { + alert('Oops, Nothing selected!'); + } + state_clicked=false; + ctx.clearRect(0, 0, c.width, c.height); + }); + + + bootstrap_visualselector(); + + var current_selected_i; + var state_clicked=false; + + var c; + + // greyed out fill context + var xctx; + // redline highlight context + var ctx; + + var current_default_xpath; + var x_scale=1; + var y_scale=1; + var selector_image; + var selector_image_rect; + var vh; + var selector_data; + + + function bootstrap_visualselector() { + if ( 1 ) { + // bootstrap it, this will trigger everything else + $("img#selector-background").bind('load', function () { + console.log("Loaded background..."); + c = document.getElementById("selector-canvas"); + // greyed out fill context + xctx = c.getContext("2d"); + // redline highlight context + ctx = c.getContext("2d"); + current_default_xpath =$("#css_filter").val(); + fetch_data(); + $('#selector-canvas').off("mousemove"); + // screenshot_url defined in the edit.html template + }).attr("src", screenshot_url); + } + } + + function fetch_data() { + // Image is ready + $('.fetching-update-notice').html("Fetching element data.."); + + $.ajax({ + url: watch_visual_selector_data_url, + context: document.body + }).done(function (data) { + $('.fetching-update-notice').html("Rendering.."); + selector_data = data; + console.log("Reported browser width from backend: "+data['browser_width']); + state_clicked=false; + set_scale(); + reflow_selector(); + $('.fetching-update-notice').fadeOut(); + }); + }; + + + + function set_scale() { + + // some things to check if the scaling doesnt work + // - that the widths/sizes really are about the actual screen size cat elements.json |grep -o width......|sort|uniq + selector_image = $("img#selector-background")[0]; + selector_image_rect = selector_image.getBoundingClientRect(); + + // make the canvas the same size as the image + $('#selector-canvas').attr('height', selector_image_rect.height); + $('#selector-canvas').attr('width', selector_image_rect.width); + $('#selector-wrapper').attr('width', selector_image_rect.width); + x_scale = selector_image_rect.width / selector_data['browser_width']; + y_scale = selector_image_rect.height / selector_image.naturalHeight; + ctx.strokeStyle = 'rgba(255,0,0, 0.9)'; + ctx.fillStyle = 'rgba(255,0,0, 0.1)'; + ctx.lineWidth = 3; + console.log("scaling set x: "+x_scale+" by y:"+y_scale); + $("#selector-current-xpath").css('max-width', selector_image_rect.width); + } + + function reflow_selector() { + $(window).resize(function() { + set_scale(); + highlight_current_selected_i(); + }); + var selector_currnt_xpath_text=$("#selector-current-xpath span"); + + set_scale(); + + console.log(selector_data['size_pos'].length + " selectors found"); + + // highlight the default one if we can find it in the xPath list + // or the xpath matches the default one + found = false; + if(current_default_xpath.length) { + for (var i = selector_data['size_pos'].length; i!==0; i--) { + var sel = selector_data['size_pos'][i-1]; + if(selector_data['size_pos'][i - 1].xpath == current_default_xpath) { + console.log("highlighting "+current_default_xpath); + current_selected_i = i-1; + highlight_current_selected_i(); + found = true; + break; + } + } + if(!found) { + alert("unfortunately your existing CSS/xPath Filter was no longer found!"); + } + } + + + $('#selector-canvas').bind('mousemove', function (e) { + if(state_clicked) { + return; + } + ctx.clearRect(0, 0, c.width, c.height); + current_selected_i=null; + + // Reverse order - the most specific one should be deeper/"laster" + // Basically, find the most 'deepest' + var found=0; + ctx.fillStyle = 'rgba(205,0,0,0.35)'; + for (var i = selector_data['size_pos'].length; i!==0; i--) { + // draw all of them? let them choose somehow? + var sel = selector_data['size_pos'][i-1]; + // If we are in a bounding-box + if (e.offsetY > sel.top * y_scale && e.offsetY < sel.top * y_scale + sel.height * y_scale + && + e.offsetX > sel.left * y_scale && e.offsetX < sel.left * y_scale + sel.width * y_scale + + ) { + + // FOUND ONE + set_current_selected_text(sel.xpath); + ctx.strokeRect(sel.left * x_scale, sel.top * y_scale, sel.width * x_scale, sel.height * y_scale); + ctx.fillRect(sel.left * x_scale, sel.top * y_scale, sel.width * x_scale, sel.height * y_scale); + + // no need to keep digging + // @todo or, O to go out/up, I to go in + // or double click to go up/out the selector? + current_selected_i=i-1; + found+=1; + break; + } + } + + }.debounce(5)); + + function set_current_selected_text(s) { + selector_currnt_xpath_text[0].innerHTML=s; + } + + function highlight_current_selected_i() { + if(state_clicked) { + state_clicked=false; + xctx.clearRect(0,0,c.width, c.height); + return; + } + + var sel = selector_data['size_pos'][current_selected_i]; + if (sel[0] == '/') { + // @todo - not sure just checking / is right + $("#css_filter").val('xpath:'+sel.xpath); + } else { + $("#css_filter").val(sel.xpath); + } + xctx.fillStyle = 'rgba(205,205,205,0.95)'; + xctx.strokeStyle = 'rgba(225,0,0,0.9)'; + xctx.lineWidth = 3; + xctx.fillRect(0,0,c.width, c.height); + // Clear out what only should be seen (make a clear/clean spot) + xctx.clearRect(sel.left * x_scale, sel.top * y_scale, sel.width * x_scale, sel.height * y_scale); + xctx.strokeRect(sel.left * x_scale, sel.top * y_scale, sel.width * x_scale, sel.height * y_scale); + state_clicked=true; + set_current_selected_text(sel.xpath); + + } + + + $('#selector-canvas').bind('mousedown', function (e) { + highlight_current_selected_i(); + }); + } + +}); diff --git a/changedetectionio/static/js/watch-overview.js b/changedetectionio/static/js/watch-overview.js index 1431b1b9..a06034b1 100644 --- a/changedetectionio/static/js/watch-overview.js +++ b/changedetectionio/static/js/watch-overview.js @@ -4,6 +4,7 @@ $(function () { $(this).closest('.unviewed').removeClass('unviewed'); }); + $('.with-share-link > *').click(function () { $("#copied-clipboard").remove(); @@ -20,5 +21,6 @@ $(function () { $(this).remove(); }); }); + }); diff --git a/changedetectionio/static/styles/styles.css b/changedetectionio/static/styles/styles.css index 26300bea..724be932 100644 --- a/changedetectionio/static/styles/styles.css +++ b/changedetectionio/static/styles/styles.css @@ -338,7 +338,8 @@ footer { padding-top: 110px; } div.tabs.collapsable ul li { display: block; - border-radius: 0px; } + border-radius: 0px; + margin-right: 0px; } input[type='text'] { width: 100%; } /* @@ -429,6 +430,15 @@ and also iPads specifically. .tab-pane-inner:target { display: block; } +#beta-logo { + height: 50px; + right: -3px; + top: -3px; + position: absolute; } + +#selector-header { + padding-bottom: 1em; } + .edit-form { min-width: 70%; /* so it cant overflow */ @@ -454,6 +464,24 @@ ul { .time-check-widget tr input[type="number"] { width: 5em; } +#selector-wrapper { + height: 600px; + overflow-y: scroll; + position: relative; } + #selector-wrapper > img { + position: absolute; + z-index: 4; + max-width: 100%; } + #selector-wrapper > canvas { + position: relative; + z-index: 5; + max-width: 100%; } + #selector-wrapper > canvas:hover { + cursor: pointer; } + +#selector-current-xpath { + font-size: 80%; } + #webdriver-override-options input[type="number"] { width: 5em; } diff --git a/changedetectionio/static/styles/styles.scss b/changedetectionio/static/styles/styles.scss index 6066bcde..ca97be4a 100644 --- a/changedetectionio/static/styles/styles.scss +++ b/changedetectionio/static/styles/styles.scss @@ -469,6 +469,7 @@ footer { div.tabs.collapsable ul li { display: block; border-radius: 0px; + margin-right: 0px; } input[type='text'] { @@ -613,6 +614,18 @@ $form-edge-padding: 20px; padding: 0px; } +#beta-logo { + height: 50px; + // looks better when it's hanging off a little + right: -3px; + top: -3px; + position: absolute; +} + +#selector-header { + padding-bottom: 1em; +} + .edit-form { min-width: 70%; /* so it cant overflow */ @@ -649,6 +662,30 @@ ul { } } +#selector-wrapper { + height: 600px; + overflow-y: scroll; + position: relative; + //width: 100%; + > img { + position: absolute; + z-index: 4; + max-width: 100%; + } + >canvas { + position: relative; + z-index: 5; + max-width: 100%; + &:hover { + cursor: pointer; + } + } +} + +#selector-current-xpath { + font-size: 80%; +} + #webdriver-override-options { input[type="number"] { width: 5em; diff --git a/changedetectionio/store.py b/changedetectionio/store.py index 2ef09c54..45d9a0a5 100644 --- a/changedetectionio/store.py +++ b/changedetectionio/store.py @@ -372,6 +372,15 @@ class ChangeDetectionStore: return False + def visualselector_data_is_ready(self, watch_uuid): + output_path = "{}/{}".format(self.datastore_path, watch_uuid) + screenshot_filename = "{}/last-screenshot.png".format(output_path) + elements_index_filename = "{}/elements.json".format(output_path) + if path.isfile(screenshot_filename) and path.isfile(elements_index_filename) : + return True + + return False + # Save as PNG, PNG is larger but better for doing visual diff in the future def save_screenshot(self, watch_uuid, screenshot: bytes): output_path = "{}/{}".format(self.datastore_path, watch_uuid) @@ -380,6 +389,14 @@ class ChangeDetectionStore: f.write(screenshot) f.close() + def save_xpath_data(self, watch_uuid, data): + output_path = "{}/{}".format(self.datastore_path, watch_uuid) + fname = "{}/elements.json".format(output_path) + with open(fname, 'w') as f: + f.write(json.dumps(data)) + f.close() + + def sync_to_json(self): logging.info("Saving JSON..") print("Saving JSON..") diff --git a/changedetectionio/templates/diff.html b/changedetectionio/templates/diff.html index c32da120..db8b7f73 100644 --- a/changedetectionio/templates/diff.html +++ b/changedetectionio/templates/diff.html @@ -39,9 +39,6 @@
@@ -63,18 +60,6 @@ Diff algorithm from the amazing github.com/kpdecker/jsdiff - -{% if screenshot %} -
-

- For now, only the most recent screenshot is saved and displayed.
- Note: No changedetection is performed on the image yet, but we are working on that in an upcoming release. -

- - -
-{% endif %} - diff --git a/changedetectionio/templates/edit.html b/changedetectionio/templates/edit.html index 17ad6eb7..bf5e7aa3 100644 --- a/changedetectionio/templates/edit.html +++ b/changedetectionio/templates/edit.html @@ -5,12 +5,18 @@ + +
@@ -18,6 +24,7 @@ @@ -194,6 +201,46 @@ nav
+
+ + +
+
+ {% if visualselector_enabled %} + {% if visualselector_data_is_ready %} +
+ Clear selection + One moment, fetching screenshot and element information.. +
+
+ + + + + + +
+
Currently: Loading...
+ + +

Beta! The Visual Selector is new and there may be minor bugs, please report pages that dont work, help us to improve this software!

+
+ + {% else %} + Screenshot and element data is not available or not yet ready. + {% endif %} + {% else %} + +

Sorry, this functionality only works with Playwright/Chrome enabled watches.

+

Enable the Playwright Chrome fetcher, or alternatively try our very affordable subscription based service.

+

This is because Selenium/WebDriver can not extract full page screenshots reliably.

+ +
+ {% endif %} +
+
+
+
diff --git a/changedetectionio/templates/preview.html b/changedetectionio/templates/preview.html index 25ff4986..7015d795 100644 --- a/changedetectionio/templates/preview.html +++ b/changedetectionio/templates/preview.html @@ -10,9 +10,6 @@
@@ -31,16 +28,5 @@
- -{% if screenshot %} -
-

- For now, only the most recent screenshot is saved and displayed.
- Note: No changedetection is performed on the image yet, but we are working on that in an upcoming release. -

- - -
-{% endif %}
{% endblock %} \ No newline at end of file diff --git a/changedetectionio/templates/watch-overview.html b/changedetectionio/templates/watch-overview.html index 1f37d5bc..f2d0c857 100644 --- a/changedetectionio/templates/watch-overview.html +++ b/changedetectionio/templates/watch-overview.html @@ -3,6 +3,7 @@ {% from '_helpers.jinja' import render_simple_field %} +
diff --git a/changedetectionio/tests/fetchers/__init__.py b/changedetectionio/tests/fetchers/__init__.py new file mode 100644 index 00000000..085b3d78 --- /dev/null +++ b/changedetectionio/tests/fetchers/__init__.py @@ -0,0 +1,2 @@ +"""Tests for the app.""" + diff --git a/changedetectionio/tests/fetchers/conftest.py b/changedetectionio/tests/fetchers/conftest.py new file mode 100644 index 00000000..430513d4 --- /dev/null +++ b/changedetectionio/tests/fetchers/conftest.py @@ -0,0 +1,3 @@ +#!/usr/bin/python3 + +from .. import conftest diff --git a/changedetectionio/tests/fetchers/test_content.py b/changedetectionio/tests/fetchers/test_content.py new file mode 100644 index 00000000..02c2c026 --- /dev/null +++ b/changedetectionio/tests/fetchers/test_content.py @@ -0,0 +1,48 @@ +#!/usr/bin/python3 + +import time +from flask import url_for +from ..util import live_server_setup +import logging + + +def test_fetch_webdriver_content(client, live_server): + live_server_setup(live_server) + + ##################### + res = client.post( + url_for("settings_page"), + data={"application-empty_pages_are_a_change": "", + "requests-time_between_check-minutes": 180, + 'application-fetch_backend': "html_webdriver"}, + follow_redirects=True + ) + + assert b"Settings updated." in res.data + + # Add our URL to the import page + res = client.post( + url_for("import_page"), + data={"urls": "https://changedetection.io/ci-test.html"}, + follow_redirects=True + ) + + assert b"1 Imported" in res.data + time.sleep(3) + attempt = 0 + while attempt < 20: + res = client.get(url_for("index")) + if not b'Checking now' in res.data: + break + logging.getLogger().info("Waiting for check to not say 'Checking now'..") + time.sleep(3) + attempt += 1 + + + res = client.get( + url_for("preview_page", uuid="first"), + follow_redirects=True + ) + logging.getLogger().info("Looking for correct fetched HTML (text) from server") + + assert b'cool it works' in res.data \ No newline at end of file diff --git a/changedetectionio/tests/test_trigger.py b/changedetectionio/tests/test_trigger.py index 7eacaff5..66b8121e 100644 --- a/changedetectionio/tests/test_trigger.py +++ b/changedetectionio/tests/test_trigger.py @@ -121,7 +121,7 @@ def test_trigger_functionality(client, live_server): res = client.get(url_for("index")) assert b'unviewed' not in res.data - # Just to be sure.. set a regular modified change.. + # Now set the content which contains the trigger text time.sleep(sleep_time_for_fetch_thread) set_modified_with_trigger_text_response() @@ -130,6 +130,12 @@ def test_trigger_functionality(client, live_server): res = client.get(url_for("index")) assert b'unviewed' in res.data + # https://github.com/dgtlmoon/changedetection.io/issues/616 + # Apparently the actual snapshot that contains the trigger never shows + res = client.get(url_for("diff_history_page", uuid="first")) + assert b'foobar123' in res.data + + # Check the preview/highlighter, we should be able to see what we triggered on, but it should be highlighted res = client.get(url_for("preview_page", uuid="first")) # We should be able to see what we ignored diff --git a/changedetectionio/update_worker.py b/changedetectionio/update_worker.py index c23ae82a..0e2b344f 100644 --- a/changedetectionio/update_worker.py +++ b/changedetectionio/update_worker.py @@ -40,10 +40,11 @@ class update_worker(threading.Thread): contents = "" screenshot = False update_obj= {} + xpath_data = False now = time.time() try: - changed_detected, update_obj, contents, screenshot = update_handler.run(uuid) + changed_detected, update_obj, contents, screenshot, xpath_data = update_handler.run(uuid) # Re #342 # In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes. @@ -55,6 +56,7 @@ class update_worker(threading.Thread): except content_fetcher.ReplyWithContentButNoText as e: # Totally fine, it's by choice - just continue on, nothing more to care about # Page had elements/content but no renderable text + self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Got HTML content but no text found."}) pass except content_fetcher.EmptyReply as e: # Some kind of custom to-str handler in the exception handler that does this? @@ -148,6 +150,9 @@ class update_worker(threading.Thread): # Always save the screenshot if it's available if screenshot: self.datastore.save_screenshot(watch_uuid=uuid, screenshot=screenshot) + if xpath_data: + self.datastore.save_xpath_data(watch_uuid=uuid, data=xpath_data) + self.current_uuid = None # Done self.q.task_done()