From d4b52371034ccaed13a3996332bca1d22c6d6024 Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Sat, 7 May 2022 17:14:40 +0200 Subject: [PATCH] Playwright fetcher - more reliable by just waiting arbitrary seconds after the last network IO --- changedetectionio/content_fetcher.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/changedetectionio/content_fetcher.py b/changedetectionio/content_fetcher.py index 9e0b889a..eace751b 100644 --- a/changedetectionio/content_fetcher.py +++ b/changedetectionio/content_fetcher.py @@ -118,6 +118,7 @@ class base_html_playwright(Fetcher): ignore_status_codes=False): from playwright.sync_api import sync_playwright + from playwright._impl._api_types import Error, TimeoutError with sync_playwright() as p: browser_type = getattr(p, self.browser_type) @@ -134,10 +135,16 @@ class base_html_playwright(Fetcher): ) page = context.new_page() page.set_viewport_size({"width": 1280, "height": 1024}) - response = page.goto(url, timeout=timeout * 1000) - - extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) - page.wait_for_timeout(extra_wait * 1000) + try: + response = page.goto(url, timeout=timeout * 1000, wait_until='commit') + # Wait_until = commit + # - `'commit'` - consider operation to be finished when network response is received and the document started loading. + # Better to not use any smarts from Playwright and just wait an arbitrary number of seconds + # This seemed to solve nearly all 'TimeoutErrors' + extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + page.wait_for_timeout(extra_wait * 1000) + except playwright._impl._api_types.TimeoutError as e: + raise EmptyReply(url=url, status_code=None) if response is None: raise EmptyReply(url=url, status_code=None)