diff --git a/changedetectionio/__init__.py b/changedetectionio/__init__.py index 2a97d7a9..cbd29567 100644 --- a/changedetectionio/__init__.py +++ b/changedetectionio/__init__.py @@ -605,12 +605,12 @@ def changedetection_app(config=None, datastore_o=None): if request.method == 'POST' and not form.validate(): flash("An error occurred, please see below.", "error") - output = render_template("edit.html", uuid=uuid, watch=datastore.data['watching'][uuid], form=form, has_empty_checktime=using_default_check_time, + using_global_webdriver_wait=default['webdriver_delay'] is None, current_base_url=datastore.data['settings']['application']['base_url'], emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False) ) diff --git a/changedetectionio/content_fetcher.py b/changedetectionio/content_fetcher.py index 27e24e58..3797d8c9 100644 --- a/changedetectionio/content_fetcher.py +++ b/changedetectionio/content_fetcher.py @@ -28,6 +28,9 @@ class Fetcher(): system_http_proxy = os.getenv('HTTP_PROXY') system_https_proxy = os.getenv('HTTPS_PROXY') + # Time ONTOP of the system defined env minimum time + render_extract_delay=0 + @abstractmethod def get_error(self): return self.error @@ -147,7 +150,7 @@ class base_html_playwright(Fetcher): # - `'commit'` - consider operation to be finished when network response is received and the document started loading. # Better to not use any smarts from Playwright and just wait an arbitrary number of seconds # This seemed to solve nearly all 'TimeoutErrors' - extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay page.wait_for_timeout(extra_wait * 1000) except playwright._impl._api_types.TimeoutError as e: raise EmptyReply(url=url, status_code=None) @@ -240,7 +243,7 @@ class base_html_webdriver(Fetcher): # raise EmptyReply(url=url, status_code=r.status_code) # @todo - dom wait loaded? - time.sleep(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5))) + time.sleep(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay) self.content = self.driver.page_source self.headers = {} self.screenshot = self.driver.get_screenshot_as_png() diff --git a/changedetectionio/fetch_site_status.py b/changedetectionio/fetch_site_status.py index 93e21663..0770a362 100644 --- a/changedetectionio/fetch_site_status.py +++ b/changedetectionio/fetch_site_status.py @@ -97,7 +97,13 @@ class perform_site_check(): proxy_args = self.set_proxy_from_list(watch) fetcher = klass(proxy_override=proxy_args) - # Proxy List support + # Configurable per-watch or global extra delay before extracting text (for webDriver types) + system_webdriver_delay = self.datastore.data['settings']['application'].get('webdriver_delay', None) + if watch['webdriver_delay'] is not None: + fetcher.render_extract_delay = watch['webdriver_delay'] + elif system_webdriver_delay is not None: + fetcher.render_extract_delay = system_webdriver_delay + fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_code) # Fetching complete, now filters diff --git a/changedetectionio/forms.py b/changedetectionio/forms.py index 6d12267e..14d52cab 100644 --- a/changedetectionio/forms.py +++ b/changedetectionio/forms.py @@ -318,6 +318,7 @@ class commonSettingsForm(Form): notification_format = SelectField('Notification format', choices=valid_notification_formats.keys(), default=default_notification_format) fetch_backend = RadioField(u'Fetch method', choices=content_fetcher.available_fetchers(), validators=[ValidateContentFetcherIsReady()]) extract_title_as_title = BooleanField('Extract from document and use as watch title', default=False) + webdriver_delay = IntegerField('Wait seconds before extracting text', validators=[validators.Optional(), validators.NumberRange(min=1, message="Should contain one or more seconds")] ) class watchForm(commonSettingsForm): diff --git a/changedetectionio/model/App.py b/changedetectionio/model/App.py index 21d53f7d..c0c7b135 100644 --- a/changedetectionio/model/App.py +++ b/changedetectionio/model/App.py @@ -41,7 +41,8 @@ class model(dict): 'notification_body': default_notification_body, 'notification_format': default_notification_format, 'real_browser_save_screenshot': True, - 'schema_version' : 0 + 'schema_version' : 0, + 'webdriver_delay': None # Extra delay in seconds before extracting text } } } diff --git a/changedetectionio/model/Watch.py b/changedetectionio/model/Watch.py index 43d6b979..672e0a3e 100644 --- a/changedetectionio/model/Watch.py +++ b/changedetectionio/model/Watch.py @@ -43,7 +43,8 @@ class model(dict): # Re #110, so then if this is set to None, we know to use the default value instead # Requires setting to None on submit if it's the same as the default # Should be all None by default, so we use the system default in this case. - 'time_between_check': {'weeks': None, 'days': None, 'hours': None, 'minutes': None, 'seconds': None} + 'time_between_check': {'weeks': None, 'days': None, 'hours': None, 'minutes': None, 'seconds': None}, + 'webdriver_delay': None } def __init__(self, *arg, **kw): diff --git a/changedetectionio/static/js/global-settings.js b/changedetectionio/static/js/global-settings.js new file mode 100644 index 00000000..9ef7c307 --- /dev/null +++ b/changedetectionio/static/js/global-settings.js @@ -0,0 +1,16 @@ +$(document).ready(function() { + function toggle() { + if ($('input[name="application-fetch_backend"]:checked').val() != 'html_requests') { + $('#requests-override-options').hide(); + $('#webdriver-override-options').show(); + } else { + $('#requests-override-options').show(); + $('#webdriver-override-options').hide(); + } + } + $('input[name="application-fetch_backend"]').click(function (e) { + toggle(); + }); + toggle(); + +}); diff --git a/changedetectionio/static/js/watch-settings.js b/changedetectionio/static/js/watch-settings.js index c7f070fe..429013f1 100644 --- a/changedetectionio/static/js/watch-settings.js +++ b/changedetectionio/static/js/watch-settings.js @@ -2,8 +2,10 @@ $(document).ready(function() { function toggle() { if ($('input[name="fetch_backend"]:checked').val() != 'html_requests') { $('#requests-override-options').hide(); + $('#webdriver-override-options').show(); } else { $('#requests-override-options').show(); + $('#webdriver-override-options').hide(); } } $('input[name="fetch_backend"]').click(function (e) { diff --git a/changedetectionio/templates/edit.html b/changedetectionio/templates/edit.html index 98dedfb4..18cf061a 100644 --- a/changedetectionio/templates/edit.html +++ b/changedetectionio/templates/edit.html @@ -73,6 +73,21 @@ </span> </div> {% endif %} + <fieldset class="pure-group" id="webdriver-override-options"> + <div class="pure-form-message-inline"> + <strong>If you're having trouble waiting for the page to be fully rendered (text missing etc), try increasing the 'wait' time here.</strong> + <br/> + This will wait <i>n</i> seconds before extracting the text. + </div> + <div class="pure-control-group"> + {{ render_field(form.webdriver_delay) }} + </div> + {% if using_global_webdriver_wait %} + <div class="pure-form-message-inline"> + <strong>Using the current global default settings</strong> + </div> + {% endif %} + </fieldset> <fieldset class="pure-group" id="requests-override-options"> <div class="pure-form-message-inline"> <strong>Request override is currently only used by the <i>Basic fast Plaintext/HTTP Client</i> method.</strong> diff --git a/changedetectionio/templates/settings.html b/changedetectionio/templates/settings.html index 2b052985..bd3a7632 100644 --- a/changedetectionio/templates/settings.html +++ b/changedetectionio/templates/settings.html @@ -12,6 +12,7 @@ <script type="text/javascript" src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script> <script type="text/javascript" src="{{url_for('static_content', group='js', filename='notifications.js')}}" defer></script> +<script type="text/javascript" src="{{url_for('static_content', group='js', filename='global-settings.js')}}" defer></script> <div class="edit-form"> <div class="tabs collapsable"> <ul> @@ -87,6 +88,16 @@ <p>The <strong>Chrome/Javascript</strong> method requires a network connection to a running WebDriver+Chrome server, set by the ENV var 'WEBDRIVER_URL'. </p> </span> </div> + <fieldset class="pure-group" id="webdriver-override-options"> + <div class="pure-form-message-inline"> + <strong>If you're having trouble waiting for the page to be fully rendered (text missing etc), try increasing the 'wait' time here.</strong> + <br/> + This will wait <i>n</i> seconds before extracting the text. + </div> + <div class="pure-control-group"> + {{ render_field(form.application.form.webdriver_delay) }} + </div> + </fieldset> </div>