Dont use default Requests `user-agent` and `accept` headers in playwright+selenium requests, breaks sites such as united.com. (#1004)

jq-arm7-build
dgtlmoon 2 years ago committed by GitHub
parent 17d37fb626
commit 669fd3ae0b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -575,6 +575,11 @@ class html_requests(Fetcher):
ignore_status_codes=False,
current_css_filter=None):
# Make requests use a more modern looking user-agent
if not 'User-Agent' in request_headers:
request_headers['User-Agent'] = os.getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT",
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36')
proxies = {}
# Allows override the proxy on a per-request basis

@ -13,10 +13,6 @@ class model(dict):
'watching': {},
'settings': {
'headers': {
'User-Agent': getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT", 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36'),
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate', # No support for brolti in python requests yet.
'Accept-Language': 'en-GB,en-US;q=0.9,en;'
},
'requests': {
'timeout': int(getenv("DEFAULT_SETTINGS_REQUESTS_TIMEOUT", "45")), # Default 45 seconds

@ -575,3 +575,11 @@ class ChangeDetectionStore:
continue
return
# We incorrectly used common header overrides that should only apply to Requests
# These are now handled in content_fetcher::html_requests and shouldnt be passed to Playwright/Selenium
def update_7(self):
# These were hard-coded in early versions
for v in ['User-Agent', 'Accept', 'Accept-Encoding', 'Accept-Language']:
if self.data['settings']['headers'].get(v):
del self.data['settings']['headers'][v]

@ -10,7 +10,10 @@ flask_restful
pytz
# Set these versions together to avoid a RequestsDependencyWarning
requests[socks] ~= 2.26
# >= 2.26 also adds Brotli support if brotli is installed
brotli ~= 1.0
requests[socks] ~= 2.28
urllib3 > 1.26
chardet > 2.3.0

Loading…
Cancel
Save