Also make user-agent detect string case insensitive

1809-wrong-useragent-playwright-puppeteer
dgtlmoon 1 year ago
parent e1f19a3265
commit ce1d4b039c

@ -343,8 +343,8 @@ class base_html_playwright(Fetcher):
'req_headers': request_headers,
'screenshot_quality': int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72)),
'url': url,
'user_agent': request_headers.get('User-Agent', None),
'proxy_username': self.proxy.get('username','') if self.proxy else False,
'user_agent': {k.lower(): v for k, v in request_headers.items()}.get('user-agent', None),
'proxy_username': self.proxy.get('username', '') if self.proxy else False,
'proxy_password': self.proxy.get('password', '') if self.proxy else False,
'no_cache_list': [
'twitter',
@ -443,7 +443,7 @@ class base_html_playwright(Fetcher):
# Set user agent to prevent Cloudflare from blocking the browser
# Use the default one configured in the App.py model that's passed from fetch_site_status.py
context = browser.new_context(
user_agent=request_headers.get('User-Agent', None),
user_agent={k.lower(): v for k, v in request_headers.items()}.get('user-agent', None),
proxy=self.proxy,
# This is needed to enable JavaScript execution on GitHub and others
bypass_csp=True,
@ -684,7 +684,7 @@ class html_requests(Fetcher):
is_binary=False):
# Make requests use a more modern looking user-agent
if not 'User-Agent' in request_headers:
if not {k.lower(): v for k, v in request_headers.items()}.get('user-agent', None):
request_headers['User-Agent'] = os.getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT",
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36')

@ -1,7 +1,8 @@
version: '3.2'
services:
changedetection:
image: ghcr.io/dgtlmoon/changedetection.io
build: .
# image: ghcr.io/dgtlmoon/changedetection.io
container_name: changedetection
hostname: changedetection
volumes:
@ -9,7 +10,7 @@ services:
# Configurable proxy list support, see https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#proxy-list-support
# - ./proxies.json:/datastore/proxies.json
# environment:
environment:
# Default listening port, can also be changed with the -p option
# - PORT=5000
@ -26,7 +27,7 @@ services:
# https://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.common.proxy
#
# Alternative Playwright URL, do not use "'s or 's!
# - PLAYWRIGHT_DRIVER_URL=ws://playwright-chrome:3000/?stealth=1&--disable-web-security=true
- PLAYWRIGHT_DRIVER_URL=ws://playwright-chrome:3000/?stealth=1&--disable-web-security=true
#
# Playwright proxy settings playwright_proxy_server, playwright_proxy_bypass, playwright_proxy_username, playwright_proxy_password
#
@ -82,24 +83,24 @@ services:
# Used for fetching pages via Playwright+Chrome where you need Javascript support.
# playwright-chrome:
# hostname: playwright-chrome
# image: browserless/chrome
# restart: unless-stopped
# environment:
# - SCREEN_WIDTH=1920
# - SCREEN_HEIGHT=1024
# - SCREEN_DEPTH=16
# - ENABLE_DEBUGGER=false
# - PREBOOT_CHROME=true
# - CONNECTION_TIMEOUT=300000
# - MAX_CONCURRENT_SESSIONS=10
# - CHROME_REFRESH_TIME=600000
# - DEFAULT_BLOCK_ADS=true
# - DEFAULT_STEALTH=true
playwright-chrome:
hostname: playwright-chrome
image: browserless/chrome
restart: unless-stopped
environment:
- SCREEN_WIDTH=1920
- SCREEN_HEIGHT=1024
- SCREEN_DEPTH=16
- ENABLE_DEBUGGER=false
- PREBOOT_CHROME=true
- CONNECTION_TIMEOUT=300000
- MAX_CONCURRENT_SESSIONS=10
- CHROME_REFRESH_TIME=600000
- DEFAULT_BLOCK_ADS=true
- DEFAULT_STEALTH=true
#
# Ignore HTTPS errors, like for self-signed certs
# - DEFAULT_IGNORE_HTTPS_ERRORS=true
- DEFAULT_IGNORE_HTTPS_ERRORS=true
#
volumes:
changedetection-data:

Loading…
Cancel
Save