just re-use the existing page fetch

550-visual-selector
dgtlmoon 3 years ago
parent a086991b54
commit 12aa77ee35

@ -1101,158 +1101,6 @@ def changedetection_app(config=None, datastore_o=None):
flash("{} watches are queued for rechecking.".format(i))
return redirect(url_for('index', tag=tag))
@app.route("/api/request-visual-selector-data/<string:uuid>", methods=['GET'])
@login_required
def visualselector_request_current_screenshot_and_metadata(uuid):
import json
watch = deepcopy(datastore.data['watching'][uuid])
path_to_datafile = os.path.join(datastore_o.datastore_path, uuid, "elements.json")
try:
os.unlink(path_to_datafile)
except FileNotFoundError:
pass
# docker run -p 3000:3000 browserless/chrome
# @todo this needs abstracting out?
from playwright.sync_api import sync_playwright
with sync_playwright() as p:
browser = p.chromium.connect_over_cdp("ws://127.0.0.1:3000")
page = browser.new_page()
# @todo handle timeouts for long pages >30sec
try:
page.goto(watch['url'])
except Exception as e:
pass
#time.sleep(3)
# https://github.com/microsoft/playwright/issues/620
# Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
screenshot = page.screenshot(type='jpeg', clip={'x': 1.0, 'y': 1.0, 'width': 1280, 'height': 1024})
screenshot = page.screenshot(type='jpeg', full_page=True, quality=90)
# Could be made a lot faster
# https://toruskit.com/blog/how-to-get-element-bounds-without-reflow/
# lazy quoting, probably going to be bad later.
css_filter = watch['css_filter'].replace('"', '\\"')
css_filter = css_filter.replace('\'', '\\\'')
page.evaluate("var css_filter='{}';".format(css_filter))
info = page.evaluate("""async () => {
// Include the getXpath script directly, easier than fetching
!function(e,n){"object"==typeof exports&&"undefined"!=typeof module?module.exports=n():"function"==typeof define&&define.amd?define(n):(e=e||self).getXPath=n()}(this,function(){return function(e){var n=e;if(n&&n.id)return'//*[@id="'+n.id+'"]';for(var o=[];n&&Node.ELEMENT_NODE===n.nodeType;){for(var i=0,r=!1,d=n.previousSibling;d;)d.nodeType!==Node.DOCUMENT_TYPE_NODE&&d.nodeName===n.nodeName&&i++,d=d.previousSibling;for(d=n.nextSibling;d;){if(d.nodeName===n.nodeName){r=!0;break}d=d.nextSibling}o.push((n.prefix?n.prefix+":":"")+n.localName+(i||r?"["+(i+1)+"]":"")),n=n.parentNode}return o.length?"/"+o.reverse().join("/"):""}});
//# sourceMappingURL=index.umd.js.map
const findUpTag = (el) => {
let r = el
chained_css = [];
while (r.parentNode) {
if(r.classList.length >0) {
// limit to just using 2 class names of each, stops from getting really huge selector strings
current_css='.'+Array.from(r.classList).slice(0, 2).join('.');
chained_css.unshift(current_css);
var f=chained_css.join(' ');
var q=document.querySelectorAll(f);
if(q.length==1) return current_css;
if(f.length >120) return null;
}
r = r.parentNode;
}
return null;
}
var elements = document.getElementsByTagName("*");
var size_pos=[];
// after page fetch, inject this JS
// build a map of all elements and their positions (maybe that only include text?)
var bbox;
for (var i = 0; i < elements.length; i++) {
bbox = elements[i].getBoundingClientRect();
// forget reallysmall ones
if (bbox['width'] <10 && bbox['height'] <10 ) {
continue;
}
// @todo the getXpath kind of sucks, it doesnt know when there is for example just one ID sometimes
// it should not traverse when we know we can anchor off just an ID one level up etc..
// maybe, get current class or id, keep traversing up looking for only class or id until there is just one match
// 1st primitive - if it has class, try joining it all and select, if theres only one.. well thats us.
xpath_result=false;
try {
var d= findUpTag(elements[i]);
if (d) {
xpath_result =d;
}
} catch (e) {
var x=1;
}
// default back to the less intelligent one
if (!xpath_result) {
xpath_result = getXPath(elements[i]);
}
size_pos.push({
xpath: xpath_result,
width: bbox['width'],
height: bbox['height'],
left: bbox['left'],
top: bbox['top'],
childCount: elements[i].childElementCount
});
}
// inject the current one set in the css_filter, which may be a CSS rule
// used for displaying the current one in VisualSelector, where its not one we generated.
if (css_filter.length) {
// is it xpath?
if (css_filter.startsWith('/') ) {
q=document.evaluate(css_filter, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
} else {
q=document.querySelector(css_filter);
}
if (q) {
bbox = q.getBoundingClientRect();
size_pos.push({
xpath: css_filter,
width: bbox['width'],
height: bbox['height'],
left: bbox['left'],
top: bbox['top'],
childCount: q.childElementCount
});
}
}
return size_pos;
}""")
browser.close()
with open(path_to_datafile ,'w') as f:
f.write(json.dumps(info, indent=4))
response = make_response(screenshot)
response.headers['Content-type'] = 'image/jpeg'
response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate'
response.headers['Pragma'] = 'no-cache'
response.headers['Expires'] = 0
return response
# @todo handle ctrl break
ticker_thread = threading.Thread(target=ticker_thread_check_time_launch_checks).start()

@ -7,6 +7,7 @@ from selenium.webdriver.common.proxy import Proxy as SeleniumProxy
from selenium.common.exceptions import WebDriverException
import requests
import time
import json
import urllib3.exceptions
@ -26,6 +27,102 @@ class Fetcher():
headers = None
fetcher_description ="No description"
xpath_element_js="""
// Include the getXpath script directly, easier than fetching
!function(e,n){"object"==typeof exports&&"undefined"!=typeof module?module.exports=n():"function"==typeof define&&define.amd?define(n):(e=e||self).getXPath=n()}(this,function(){return function(e){var n=e;if(n&&n.id)return'//*[@id="'+n.id+'"]';for(var o=[];n&&Node.ELEMENT_NODE===n.nodeType;){for(var i=0,r=!1,d=n.previousSibling;d;)d.nodeType!==Node.DOCUMENT_TYPE_NODE&&d.nodeName===n.nodeName&&i++,d=d.previousSibling;for(d=n.nextSibling;d;){if(d.nodeName===n.nodeName){r=!0;break}d=d.nextSibling}o.push((n.prefix?n.prefix+":":"")+n.localName+(i||r?"["+(i+1)+"]":"")),n=n.parentNode}return o.length?"/"+o.reverse().join("/"):""}});
//# sourceMappingURL=index.umd.js.map
const findUpTag = (el) => {
let r = el
chained_css = [];
while (r.parentNode) {
if(r.classList.length >0) {
// limit to just using 2 class names of each, stops from getting really huge selector strings
current_css='.'+Array.from(r.classList).slice(0, 2).join('.');
chained_css.unshift(current_css);
var f=chained_css.join(' ');
var q=document.querySelectorAll(f);
if(q.length==1) return current_css;
if(f.length >120) return null;
}
r = r.parentNode;
}
return null;
}
var elements = document.getElementsByTagName("*");
var size_pos=[];
// after page fetch, inject this JS
// build a map of all elements and their positions (maybe that only include text?)
var bbox;
for (var i = 0; i < elements.length; i++) {
bbox = elements[i].getBoundingClientRect();
// forget reallysmall ones
if (bbox['width'] <10 && bbox['height'] <10 ) {
continue;
}
// @todo the getXpath kind of sucks, it doesnt know when there is for example just one ID sometimes
// it should not traverse when we know we can anchor off just an ID one level up etc..
// maybe, get current class or id, keep traversing up looking for only class or id until there is just one match
// 1st primitive - if it has class, try joining it all and select, if theres only one.. well thats us.
xpath_result=false;
try {
var d= findUpTag(elements[i]);
if (d) {
xpath_result =d;
}
} catch (e) {
var x=1;
}
// default back to the less intelligent one
if (!xpath_result) {
xpath_result = getXPath(elements[i]);
}
size_pos.push({
xpath: xpath_result,
width: bbox['width'],
height: bbox['height'],
left: bbox['left'],
top: bbox['top'],
childCount: elements[i].childElementCount
});
}
// inject the current one set in the css_filter, which may be a CSS rule
// used for displaying the current one in VisualSelector, where its not one we generated.
if (css_filter.length) {
// is it xpath?
if (css_filter.startsWith('/') ) {
q=document.evaluate(css_filter, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
} else {
q=document.querySelector(css_filter);
}
if (q) {
bbox = q.getBoundingClientRect();
size_pos.push({
xpath: css_filter,
width: bbox['width'],
height: bbox['height'],
left: bbox['left'],
top: bbox['top'],
childCount: q.childElementCount
});
}
}
return size_pos;
"""
@abstractmethod
def get_error(self):
@ -59,6 +156,11 @@ class Fetcher():
def is_ready(self):
return True
@abstractmethod
def get_xpath_data(self, current_css_xpath_filter):
return None
# Maybe for the future, each fetcher provides its own diff output, could be used for text, image
# the current one would return javascript output (as we use JS to generate the diff)
#
@ -163,6 +265,15 @@ class html_webdriver(Fetcher):
self.quit()
return True
def get_xpath_data(self, current_css_xpath_filter):
# lazy quoting, probably going to be bad later.
css_filter = current_css_xpath_filter.replace('"', '\\"')
css_filter = css_filter.replace('\'', '\\\'')
info = self.driver.execute_script("var css_filter='{}';".format(css_filter)+self.xpath_element_js)
return info
def quit(self):
if self.driver:
try:

@ -195,6 +195,7 @@ class perform_site_check():
if self.datastore.data['settings']['application'].get('real_browser_save_screenshot', True):
screenshot = fetcher.screenshot()
xpath_elements = fetcher.get_xpath_data(watch['css_filter'])
fetcher.quit()
return changed_detected, update_obj, text_content_before_ignored_filter, screenshot
return changed_detected, update_obj, text_content_before_ignored_filter, screenshot, xpath_elements

@ -357,6 +357,14 @@ class ChangeDetectionStore:
f.write(screenshot)
f.close()
def save_xpath_data(self, watch_uuid, data):
output_path = "{}/{}".format(self.datastore_path, watch_uuid)
fname = "{}/elements.json".format(output_path)
with open(fname, 'w') as f:
f.write(json.dumps(data))
f.close()
def sync_to_json(self):
logging.info("Saving JSON..")
print("Saving JSON..")

@ -189,7 +189,7 @@ nav
<!-- use img src ready load to know everything is ready to map out -->
<img id="selector-background" />
<script>
$("img#selector-background").attr("src", "{{ url_for('visualselector_request_current_screenshot_and_metadata', uuid=uuid) }}");
$("img#selector-background").attr("src", "{{url_for('static_content', group='screenshot', filename=uuid)}}");
$("img#selector-background").bind('load', function () {
fetch_data();
});
@ -199,7 +199,6 @@ nav
<div id="selector-current-xpath"><strong>Currently:</strong>&nbsp;<span class="text">Loading...</span></div>
</div>
</fieldset>
</div>

@ -40,10 +40,11 @@ class update_worker(threading.Thread):
contents = ""
screenshot = False
update_obj= {}
xpath_data = False
now = time.time()
try:
changed_detected, update_obj, contents, screenshot = update_handler.run(uuid)
changed_detected, update_obj, contents, screenshot, xpath_data = update_handler.run(uuid)
# Re #342
# In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.
@ -144,6 +145,9 @@ class update_worker(threading.Thread):
# Always save the screenshot if it's available
if screenshot:
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=screenshot)
if xpath_data:
self.datastore.save_xpath_data(watch_uuid=uuid, data=xpath_data)
self.current_uuid = None # Done
self.q.task_done()

1532
f

File diff suppressed because one or more lines are too long
Loading…
Cancel
Save