Also make user-agent detect string case insensitive

1809-wrong-useragent-playwright-puppeteer
dgtlmoon 1 year ago
parent e1f19a3265
commit ce1d4b039c

@ -343,8 +343,8 @@ class base_html_playwright(Fetcher):
'req_headers': request_headers, 'req_headers': request_headers,
'screenshot_quality': int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72)), 'screenshot_quality': int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72)),
'url': url, 'url': url,
'user_agent': request_headers.get('User-Agent', None), 'user_agent': {k.lower(): v for k, v in request_headers.items()}.get('user-agent', None),
'proxy_username': self.proxy.get('username','') if self.proxy else False, 'proxy_username': self.proxy.get('username', '') if self.proxy else False,
'proxy_password': self.proxy.get('password', '') if self.proxy else False, 'proxy_password': self.proxy.get('password', '') if self.proxy else False,
'no_cache_list': [ 'no_cache_list': [
'twitter', 'twitter',
@ -443,7 +443,7 @@ class base_html_playwright(Fetcher):
# Set user agent to prevent Cloudflare from blocking the browser # Set user agent to prevent Cloudflare from blocking the browser
# Use the default one configured in the App.py model that's passed from fetch_site_status.py # Use the default one configured in the App.py model that's passed from fetch_site_status.py
context = browser.new_context( context = browser.new_context(
user_agent=request_headers.get('User-Agent', None), user_agent={k.lower(): v for k, v in request_headers.items()}.get('user-agent', None),
proxy=self.proxy, proxy=self.proxy,
# This is needed to enable JavaScript execution on GitHub and others # This is needed to enable JavaScript execution on GitHub and others
bypass_csp=True, bypass_csp=True,
@ -684,7 +684,7 @@ class html_requests(Fetcher):
is_binary=False): is_binary=False):
# Make requests use a more modern looking user-agent # Make requests use a more modern looking user-agent
if not 'User-Agent' in request_headers: if not {k.lower(): v for k, v in request_headers.items()}.get('user-agent', None):
request_headers['User-Agent'] = os.getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT", request_headers['User-Agent'] = os.getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT",
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36') 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36')

@ -1,7 +1,8 @@
version: '3.2' version: '3.2'
services: services:
changedetection: changedetection:
image: ghcr.io/dgtlmoon/changedetection.io build: .
# image: ghcr.io/dgtlmoon/changedetection.io
container_name: changedetection container_name: changedetection
hostname: changedetection hostname: changedetection
volumes: volumes:
@ -9,7 +10,7 @@ services:
# Configurable proxy list support, see https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#proxy-list-support # Configurable proxy list support, see https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#proxy-list-support
# - ./proxies.json:/datastore/proxies.json # - ./proxies.json:/datastore/proxies.json
# environment: environment:
# Default listening port, can also be changed with the -p option # Default listening port, can also be changed with the -p option
# - PORT=5000 # - PORT=5000
@ -26,7 +27,7 @@ services:
# https://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.common.proxy # https://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.common.proxy
# #
# Alternative Playwright URL, do not use "'s or 's! # Alternative Playwright URL, do not use "'s or 's!
# - PLAYWRIGHT_DRIVER_URL=ws://playwright-chrome:3000/?stealth=1&--disable-web-security=true - PLAYWRIGHT_DRIVER_URL=ws://playwright-chrome:3000/?stealth=1&--disable-web-security=true
# #
# Playwright proxy settings playwright_proxy_server, playwright_proxy_bypass, playwright_proxy_username, playwright_proxy_password # Playwright proxy settings playwright_proxy_server, playwright_proxy_bypass, playwright_proxy_username, playwright_proxy_password
# #
@ -82,24 +83,24 @@ services:
# Used for fetching pages via Playwright+Chrome where you need Javascript support. # Used for fetching pages via Playwright+Chrome where you need Javascript support.
# playwright-chrome: playwright-chrome:
# hostname: playwright-chrome hostname: playwright-chrome
# image: browserless/chrome image: browserless/chrome
# restart: unless-stopped restart: unless-stopped
# environment: environment:
# - SCREEN_WIDTH=1920 - SCREEN_WIDTH=1920
# - SCREEN_HEIGHT=1024 - SCREEN_HEIGHT=1024
# - SCREEN_DEPTH=16 - SCREEN_DEPTH=16
# - ENABLE_DEBUGGER=false - ENABLE_DEBUGGER=false
# - PREBOOT_CHROME=true - PREBOOT_CHROME=true
# - CONNECTION_TIMEOUT=300000 - CONNECTION_TIMEOUT=300000
# - MAX_CONCURRENT_SESSIONS=10 - MAX_CONCURRENT_SESSIONS=10
# - CHROME_REFRESH_TIME=600000 - CHROME_REFRESH_TIME=600000
# - DEFAULT_BLOCK_ADS=true - DEFAULT_BLOCK_ADS=true
# - DEFAULT_STEALTH=true - DEFAULT_STEALTH=true
# #
# Ignore HTTPS errors, like for self-signed certs # Ignore HTTPS errors, like for self-signed certs
# - DEFAULT_IGNORE_HTTPS_ERRORS=true - DEFAULT_IGNORE_HTTPS_ERRORS=true
# #
volumes: volumes:
changedetection-data: changedetection-data:

Loading…
Cancel
Save