diff --git a/changedetectionio/content_fetchers/puppeteer.py b/changedetectionio/content_fetchers/puppeteer.py index a497cb16..725be3b3 100644 --- a/changedetectionio/content_fetchers/puppeteer.py +++ b/changedetectionio/content_fetchers/puppeteer.py @@ -92,15 +92,39 @@ class fetcher(Fetcher): ignoreHTTPSErrors=True ) except websockets.exceptions.InvalidStatusCode as e: - raise BrowserConnectError(msg=f"Error while trying to connect the browser, Code {e.status_code} (check your access)") + raise BrowserConnectError(msg=f"Error while trying to connect the browser, Code {e.status_code} (check your access, whitelist IP, password etc)") except websockets.exceptions.InvalidURI: raise BrowserConnectError(msg=f"Error connecting to the browser, check your browser connection address (should be ws:// or wss://") except Exception as e: raise BrowserConnectError(msg=f"Error connecting to the browser {str(e)}") + + # Better is to launch chrome with the URL as arg + # non-headless - newPage() will launch an extra tab/window, .browser should already contain 1 page/tab + # headless - ask a new page + self.page = (pages := await browser.pages) and len(pages) or await browser.newPage() + + try: + from pyppeteerstealth import inject_evasions_into_page + except ImportError: + logger.debug("pyppeteerstealth module not available, skipping") + pass else: - self.page = await browser.newPage() + # I tried hooking events via self.page.on(Events.Page.DOMContentLoaded, inject_evasions_requiring_obj_to_page) + # But I could never get it to fire reliably, so we just inject it straight after + await inject_evasions_into_page(self.page) - await self.page.setUserAgent(manage_user_agent(headers=request_headers, current_ua=await self.page.evaluate('navigator.userAgent'))) + # This user agent is similar to what was used when tweaking the evasions in inject_evasions_into_page(..) + user_agent = None + if request_headers: + user_agent = next((value for key, value in request_headers.items() if key.lower().strip() == 'user-agent'), None) + if user_agent: + await self.page.setUserAgent(user_agent) + # Remove it so it's not sent again with headers after + [request_headers.pop(key) for key in list(request_headers) if key.lower().strip() == 'user-agent'.lower().strip()] + + if not user_agent: + # Attempt to strip 'HeadlessChrome' etc + await self.page.setUserAgent(manage_user_agent(headers=request_headers, current_ua=await self.page.evaluate('navigator.userAgent'))) await self.page.setBypassCSP(True) if request_headers: diff --git a/changedetectionio/flask_app.py b/changedetectionio/flask_app.py index 13d3dfb6..41f80a77 100644 --- a/changedetectionio/flask_app.py +++ b/changedetectionio/flask_app.py @@ -771,7 +771,7 @@ def changedetection_app(config=None, datastore_o=None): jq_support=jq_support, playwright_enabled=os.getenv('PLAYWRIGHT_DRIVER_URL', False), settings_application=datastore.data['settings']['application'], - using_global_webdriver_wait=default['webdriver_delay'] is None, + using_global_webdriver_wait=not default['webdriver_delay'], uuid=uuid, visualselector_enabled=visualselector_enabled, watch=watch diff --git a/requirements.txt b/requirements.txt index 88f2c3c4..d6267bfe 100644 --- a/requirements.txt +++ b/requirements.txt @@ -77,8 +77,8 @@ jq~=1.3; python_version >= "3.8" and sys_platform == "linux" pillow # playwright is installed at Dockerfile build time because it's not available on all platforms -# experimental release pyppeteer-ng==2.0.0rc5 +pyppeteerstealth>=0.0.4 # Include pytest, so if theres a support issue we can ask them to run these tests on their setup pytest ~=7.2