From fae1164c0b1898efed4f336c150ef0c354c0b912 Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Sun, 10 Jul 2022 13:56:01 +0200 Subject: [PATCH] Ability to specify JS before running change-detection (#744) --- changedetectionio/content_fetcher.py | 11 ++++++++++- changedetectionio/fetch_site_status.py | 3 +++ changedetectionio/forms.py | 2 ++ changedetectionio/model/Watch.py | 3 ++- changedetectionio/store.py | 3 ++- changedetectionio/templates/edit.html | 13 ++++++++----- 6 files changed, 27 insertions(+), 8 deletions(-) diff --git a/changedetectionio/content_fetcher.py b/changedetectionio/content_fetcher.py index bb269455..ca43edc8 100644 --- a/changedetectionio/content_fetcher.py +++ b/changedetectionio/content_fetcher.py @@ -46,6 +46,7 @@ class Fetcher(): headers = None fetcher_description = "No description" + webdriver_js_execute_code = None xpath_element_js = """ // Include the getXpath script directly, easier than fetching !function(e,n){"object"==typeof exports&&"undefined"!=typeof module?module.exports=n():"function"==typeof define&&define.amd?define(n):(e=e||self).getXPath=n()}(this,function(){return function(e){var n=e;if(n&&n.id)return'//*[@id="'+n.id+'"]';for(var o=[];n&&Node.ELEMENT_NODE===n.nodeType;){for(var i=0,r=!1,d=n.previousSibling;d;)d.nodeType!==Node.DOCUMENT_TYPE_NODE&&d.nodeName===n.nodeName&&i++,d=d.previousSibling;for(d=n.nextSibling;d;){if(d.nodeName===n.nodeName){r=!0;break}d=d.nextSibling}o.push((n.prefix?n.prefix+":":"")+n.localName+(i||r?"["+(i+1)+"]":"")),n=n.parentNode}return o.length?"/"+o.reverse().join("/"):""}}); @@ -175,7 +176,6 @@ class Fetcher(): # Will be needed in the future by the VisualSelector, always get this where possible. screenshot = False - fetcher_description = "No description" system_http_proxy = os.getenv('HTTP_PROXY') system_https_proxy = os.getenv('HTTPS_PROXY') @@ -319,6 +319,9 @@ class base_html_playwright(Fetcher): with page.expect_navigation(): response = page.goto(url, wait_until='load') + if self.webdriver_js_execute_code is not None: + page.evaluate(self.webdriver_js_execute_code) + except playwright._impl._api_types.TimeoutError as e: context.close() browser.close() @@ -450,6 +453,12 @@ class base_html_webdriver(Fetcher): self.driver.set_window_size(1280, 1024) self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5))) + + if self.webdriver_js_execute_code is not None: + self.driver.execute_script(self.webdriver_js_execute_code) + # Selenium doesn't automatically wait for actions as good as Playwright, so wait again + self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5))) + self.screenshot = self.driver.get_screenshot_as_png() # @todo - how to check this? is it possible? diff --git a/changedetectionio/fetch_site_status.py b/changedetectionio/fetch_site_status.py index aca0fd41..ca9f419a 100644 --- a/changedetectionio/fetch_site_status.py +++ b/changedetectionio/fetch_site_status.py @@ -106,6 +106,9 @@ class perform_site_check(): elif system_webdriver_delay is not None: fetcher.render_extract_delay = system_webdriver_delay + if watch['webdriver_js_execute_code'] is not None and watch['webdriver_js_execute_code'].strip(): + fetcher.webdriver_js_execute_code = watch['webdriver_js_execute_code'] + fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_code, watch['css_filter']) fetcher.quit() diff --git a/changedetectionio/forms.py b/changedetectionio/forms.py index c857b64c..4ad1b1a7 100644 --- a/changedetectionio/forms.py +++ b/changedetectionio/forms.py @@ -344,6 +344,8 @@ class watchForm(commonSettingsForm): trigger_text = StringListField('Trigger/wait for text', [validators.Optional(), ValidateListRegex()]) text_should_not_be_present = StringListField('Block change-detection if text matches', [validators.Optional(), ValidateListRegex()]) + webdriver_js_execute_code = TextAreaField('Execute JavaScript before change detection', render_kw={"rows": "5"}, validators=[validators.Optional()]) + save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"}) save_and_preview_button = SubmitField('Save & Preview', render_kw={"class": "pure-button pure-button-primary"}) proxy = RadioField('Proxy') diff --git a/changedetectionio/model/Watch.py b/changedetectionio/model/Watch.py index 8debe4c9..acfd9117 100644 --- a/changedetectionio/model/Watch.py +++ b/changedetectionio/model/Watch.py @@ -47,7 +47,8 @@ class model(dict): # Requires setting to None on submit if it's the same as the default # Should be all None by default, so we use the system default in this case. 'time_between_check': {'weeks': None, 'days': None, 'hours': None, 'minutes': None, 'seconds': None}, - 'webdriver_delay': None + 'webdriver_delay': None, + 'webdriver_js_execute_code': None, # Run before change-detection } jitter_seconds = 0 mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7} diff --git a/changedetectionio/store.py b/changedetectionio/store.py index 8eda5f14..fbafbe04 100644 --- a/changedetectionio/store.py +++ b/changedetectionio/store.py @@ -298,7 +298,8 @@ class ChangeDetectionStore: 'ignore_text', 'css_filter', 'subtractive_selectors', 'trigger_text', 'extract_title_as_title', 'extract_text', - 'text_should_not_be_present']: + 'text_should_not_be_present', + 'webdriver_js_execute_code']: if res.get(k): apply_extras[k] = res[k] diff --git a/changedetectionio/templates/edit.html b/changedetectionio/templates/edit.html index 0e85c5c7..b13afe46 100644 --- a/changedetectionio/templates/edit.html +++ b/changedetectionio/templates/edit.html @@ -88,14 +88,17 @@ If you're having trouble waiting for the page to be fully rendered (text missing etc), try increasing the 'wait' time here.
This will wait n seconds before extracting the text. + {% if using_global_webdriver_wait %} +
Using the current global default settings + {% endif %} - - {% if using_global_webdriver_wait %} -
- Using the current global default settings +
+ {{ render_field(form.webdriver_js_execute_code) }} +
+ Run this code before performing change detection, handy for filling in fields and other actions More help and examples here +
- {% endif %}
{% if not playwright_enabled %}