From 00fe4393513de41bf5ae4ce523e2d4c725ff9f8e Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Mon, 8 May 2023 18:27:18 +0200 Subject: [PATCH] Fallback screenshot --- changedetectionio/content_fetcher.py | 32 +++++++++++++++++++++------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/changedetectionio/content_fetcher.py b/changedetectionio/content_fetcher.py index 262ac2b4..b0c76676 100644 --- a/changedetectionio/content_fetcher.py +++ b/changedetectionio/content_fetcher.py @@ -299,7 +299,7 @@ class base_html_playwright(Fetcher): // https://ourcodeworld.com/articles/read/1106/how-to-solve-puppeteer-timeouterror-navigation-timeout-of-30000-ms-exceeded await page.setDefaultNavigationTimeout(0); - + if(proxy_username) {{ await page.authenticate({{ username: proxy_username, @@ -325,7 +325,6 @@ class base_html_playwright(Fetcher): await page.waitForTimeout(200); }} - var html = await page.content(); var xpath_data; var instock_data; try {{ @@ -337,13 +336,26 @@ class base_html_playwright(Fetcher): // Protocol error (Page.captureScreenshot): Cannot take screenshot with 0 width can come from a proxy auth failure // Wrap it here (for now) - var b64s; - try {{ + + var b64s = false; + try {{ b64s = await page.screenshot({{ encoding: "base64", fullPage: true, quality: screenshot_quality, type: 'jpeg' }}); }} catch (e) {{ console.log(e); }} - + + // May fail on very large pages with 'WARNING: tile memory limits exceeded, some content may not draw' + if (!b64s) {{ + console.error('ERROR: content-fetcher page was maybe too large for a screenshot, reverting to viewport only screenshot'); + try {{ + b64s = await page.screenshot({{ encoding: "base64", quality: screenshot_quality, type: 'jpeg' }}); + }} catch (e) {{ + console.log(e); + }} + }} + + + var html = await page.content(); return {{ data: {{ 'content': html, @@ -375,7 +387,7 @@ class base_html_playwright(Fetcher): # Actual authentication handled by Puppeteer/node o = urlparse(self.proxy.get('server')) proxy_url = urllib.parse.quote(o._replace(netloc="{}:{}".format(o.hostname, o.port)).geturl()) - browserless_function_url = f"{browserless_function_url}&--proxy-server={proxy_url}" + browserless_function_url = f"{browserless_function_url}&--proxy-server={proxy_url}&dumpio=true" try: @@ -396,9 +408,9 @@ class base_html_playwright(Fetcher): } }, # @todo /function needs adding ws:// to http:// rebuild this - url=browserless_function_url+"&--disable-features=AudioServiceOutOfProcess&dumpio=true", + url=browserless_function_url+"&--disable-features=AudioServiceOutOfProcess&dumpio=true&--disable-remote-fonts", timeout=wait_browserless_seconds) - +# 'ziparchive::addglob() will throw an instance of error instead of resulting in a fatal error if glob support is not available.' except ReadTimeout: raise PageUnloadable(url=url, status_code=None, message=f"No response from browserless in {wait_browserless_seconds}s") except ConnectTimeout: @@ -410,6 +422,10 @@ class base_html_playwright(Fetcher): x = response.json() if not x.get('screenshot'): + # https://github.com/puppeteer/puppeteer/blob/v1.0.0/docs/troubleshooting.md#tips + # https://github.com/puppeteer/puppeteer/issues/1834 + # https://github.com/puppeteer/puppeteer/issues/1834#issuecomment-381047051 + # Check your memory is shared and big enough raise ScreenshotUnavailable(url=url, status_code=None) if not x.get('content', '').strip():