diff --git a/changedetectionio/content_fetcher.py b/changedetectionio/content_fetcher.py index db9e23dc..d7a4f835 100644 --- a/changedetectionio/content_fetcher.py +++ b/changedetectionio/content_fetcher.py @@ -343,8 +343,8 @@ class base_html_playwright(Fetcher): 'req_headers': request_headers, 'screenshot_quality': int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72)), 'url': url, - 'user_agent': request_headers.get('User-Agent', 'Mozilla/5.0'), - 'proxy_username': self.proxy.get('username','') if self.proxy else False, + 'user_agent': {k.lower(): v for k, v in request_headers.items()}.get('user-agent', None), + 'proxy_username': self.proxy.get('username', '') if self.proxy else False, 'proxy_password': self.proxy.get('password', '') if self.proxy else False, 'no_cache_list': [ 'twitter', @@ -443,7 +443,7 @@ class base_html_playwright(Fetcher): # Set user agent to prevent Cloudflare from blocking the browser # Use the default one configured in the App.py model that's passed from fetch_site_status.py context = browser.new_context( - user_agent=request_headers.get('User-Agent', 'Mozilla/5.0'), + user_agent={k.lower(): v for k, v in request_headers.items()}.get('user-agent', None), proxy=self.proxy, # This is needed to enable JavaScript execution on GitHub and others bypass_csp=True, @@ -684,7 +684,7 @@ class html_requests(Fetcher): is_binary=False): # Make requests use a more modern looking user-agent - if not 'User-Agent' in request_headers: + if not {k.lower(): v for k, v in request_headers.items()}.get('user-agent', None): request_headers['User-Agent'] = os.getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT", 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36') diff --git a/changedetectionio/res/puppeteer_fetch.js b/changedetectionio/res/puppeteer_fetch.js index d376729f..6015fbe8 100644 --- a/changedetectionio/res/puppeteer_fetch.js +++ b/changedetectionio/res/puppeteer_fetch.js @@ -18,7 +18,9 @@ module.exports = async ({page, context}) => { await page.setBypassCSP(true) await page.setExtraHTTPHeaders(req_headers); - await page.setUserAgent(user_agent); + if (user_agent) { + await page.setUserAgent(user_agent); + } // https://ourcodeworld.com/articles/read/1106/how-to-solve-puppeteer-timeouterror-navigation-timeout-of-30000-ms-exceeded await page.setDefaultNavigationTimeout(0);