From d4b52371034ccaed13a3996332bca1d22c6d6024 Mon Sep 17 00:00:00 2001
From: dgtlmoon <dgtlmoon@gmail.com>
Date: Sat, 7 May 2022 17:14:40 +0200
Subject: [PATCH] Playwright fetcher - more reliable by just waiting arbitrary
 seconds after the last network IO

---
 changedetectionio/content_fetcher.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/changedetectionio/content_fetcher.py b/changedetectionio/content_fetcher.py
index 9e0b889a..eace751b 100644
--- a/changedetectionio/content_fetcher.py
+++ b/changedetectionio/content_fetcher.py
@@ -118,6 +118,7 @@ class base_html_playwright(Fetcher):
             ignore_status_codes=False):
 
         from playwright.sync_api import sync_playwright
+        from playwright._impl._api_types import Error, TimeoutError
 
         with sync_playwright() as p:
             browser_type = getattr(p, self.browser_type)
@@ -134,10 +135,16 @@ class base_html_playwright(Fetcher):
             )
             page = context.new_page()
             page.set_viewport_size({"width": 1280, "height": 1024})
-            response = page.goto(url, timeout=timeout * 1000)
-
-            extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5))
-            page.wait_for_timeout(extra_wait * 1000)
+            try:
+                response = page.goto(url, timeout=timeout * 1000, wait_until='commit')
+                # Wait_until = commit
+                # - `'commit'` - consider operation to be finished when network response is received and the document started loading.
+                # Better to not use any smarts from Playwright and just wait an arbitrary number of seconds
+                # This seemed to solve nearly all 'TimeoutErrors'
+                extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5))
+                page.wait_for_timeout(extra_wait * 1000)
+            except playwright._impl._api_types.TimeoutError as e:
+                raise EmptyReply(url=url, status_code=None)
 
             if response is None:
                 raise EmptyReply(url=url, status_code=None)