From 1a608d0ae60510a0383c634e1475e4c31c6b0dd0 Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Mon, 12 Feb 2024 12:40:31 +0100 Subject: [PATCH] Puppeteer - client fixes for proxy and caching (#2181) --- changedetectionio/content_fetchers/puppeteer.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/changedetectionio/content_fetchers/puppeteer.py b/changedetectionio/content_fetchers/puppeteer.py index 9d455284..87f4985c 100644 --- a/changedetectionio/content_fetchers/puppeteer.py +++ b/changedetectionio/content_fetchers/puppeteer.py @@ -41,15 +41,13 @@ class fetcher(Fetcher): self.proxy = {'username': parsed.username, 'password': parsed.password} # Add the proxy server chrome start option, the username and password never gets added here # (It always goes in via await self.page.authenticate(self.proxy)) - import urllib.parse + # @todo filter some injection attack? - # check /somepath?thisandthat # check scheme when no scheme - h = urllib.parse.quote(parsed.scheme + "://") if parsed.scheme else '' - h += urllib.parse.quote(f"{parsed.hostname}:{parsed.port}{parsed.path}?{parsed.query}", safe='') - + proxy_url = parsed.scheme + "://" if parsed.scheme else '' + proxy_url += f"{parsed.hostname}:{parsed.port}{parsed.path}?{parsed.query}" r = "?" if not '?' in self.browser_connection_url else '&' - self.browser_connection_url += f"{r}--proxy-server={h}" + self.browser_connection_url += f"{r}--proxy-server={proxy_override}" # def screenshot_step(self, step_n=''): # screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=85) @@ -88,7 +86,7 @@ class fetcher(Fetcher): # @todo timeout try: browser = await pyppeteer_instance.connect(browserWSEndpoint=self.browser_connection_url, - defaultViewport={"width": 1024, "height": 768} + ignoreHTTPSErrors=True ) except websockets.exceptions.InvalidStatusCode as e: raise BrowserConnectError(msg=f"Error while trying to connect the browser, Code {e.status_code} (check your access)") @@ -107,7 +105,7 @@ class fetcher(Fetcher): # SOCKS5 with authentication is not supported (yet) # https://github.com/microsoft/playwright/issues/10567 self.page.setDefaultNavigationTimeout(0) - + self.page.setCacheEnabled(True) if self.proxy: # Setting Proxy-Authentication header is deprecated, and doing so can trigger header change errors from Puppeteer # https://github.com/puppeteer/puppeteer/issues/676 ?