|
|
@ -92,15 +92,39 @@ class fetcher(Fetcher):
|
|
|
|
ignoreHTTPSErrors=True
|
|
|
|
ignoreHTTPSErrors=True
|
|
|
|
)
|
|
|
|
)
|
|
|
|
except websockets.exceptions.InvalidStatusCode as e:
|
|
|
|
except websockets.exceptions.InvalidStatusCode as e:
|
|
|
|
raise BrowserConnectError(msg=f"Error while trying to connect the browser, Code {e.status_code} (check your access)")
|
|
|
|
raise BrowserConnectError(msg=f"Error while trying to connect the browser, Code {e.status_code} (check your access, whitelist IP, password etc)")
|
|
|
|
except websockets.exceptions.InvalidURI:
|
|
|
|
except websockets.exceptions.InvalidURI:
|
|
|
|
raise BrowserConnectError(msg=f"Error connecting to the browser, check your browser connection address (should be ws:// or wss://")
|
|
|
|
raise BrowserConnectError(msg=f"Error connecting to the browser, check your browser connection address (should be ws:// or wss://")
|
|
|
|
except Exception as e:
|
|
|
|
except Exception as e:
|
|
|
|
raise BrowserConnectError(msg=f"Error connecting to the browser {str(e)}")
|
|
|
|
raise BrowserConnectError(msg=f"Error connecting to the browser {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Better is to launch chrome with the URL as arg
|
|
|
|
|
|
|
|
# non-headless - newPage() will launch an extra tab/window, .browser should already contain 1 page/tab
|
|
|
|
|
|
|
|
# headless - ask a new page
|
|
|
|
|
|
|
|
self.page = (pages := await browser.pages) and len(pages) or await browser.newPage()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
|
|
from pyppeteerstealth import inject_evasions_into_page
|
|
|
|
|
|
|
|
except ImportError:
|
|
|
|
|
|
|
|
logger.debug("pyppeteerstealth module not available, skipping")
|
|
|
|
|
|
|
|
pass
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
self.page = await browser.newPage()
|
|
|
|
# I tried hooking events via self.page.on(Events.Page.DOMContentLoaded, inject_evasions_requiring_obj_to_page)
|
|
|
|
|
|
|
|
# But I could never get it to fire reliably, so we just inject it straight after
|
|
|
|
|
|
|
|
await inject_evasions_into_page(self.page)
|
|
|
|
|
|
|
|
|
|
|
|
await self.page.setUserAgent(manage_user_agent(headers=request_headers, current_ua=await self.page.evaluate('navigator.userAgent')))
|
|
|
|
# This user agent is similar to what was used when tweaking the evasions in inject_evasions_into_page(..)
|
|
|
|
|
|
|
|
user_agent = None
|
|
|
|
|
|
|
|
if request_headers:
|
|
|
|
|
|
|
|
user_agent = next((value for key, value in request_headers.items() if key.lower().strip() == 'user-agent'), None)
|
|
|
|
|
|
|
|
if user_agent:
|
|
|
|
|
|
|
|
await self.page.setUserAgent(user_agent)
|
|
|
|
|
|
|
|
# Remove it so it's not sent again with headers after
|
|
|
|
|
|
|
|
[request_headers.pop(key) for key in list(request_headers) if key.lower().strip() == 'user-agent'.lower().strip()]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if not user_agent:
|
|
|
|
|
|
|
|
# Attempt to strip 'HeadlessChrome' etc
|
|
|
|
|
|
|
|
await self.page.setUserAgent(manage_user_agent(headers=request_headers, current_ua=await self.page.evaluate('navigator.userAgent')))
|
|
|
|
|
|
|
|
|
|
|
|
await self.page.setBypassCSP(True)
|
|
|
|
await self.page.setBypassCSP(True)
|
|
|
|
if request_headers:
|
|
|
|
if request_headers:
|
|
|
|