From dc6f76ba64530b4d2fcf123ce267551fde1a2e24 Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Sat, 7 May 2022 16:37:56 +0200 Subject: [PATCH] Make proxy configuration more consistent - see https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration (#585) --- changedetectionio/content_fetcher.py | 23 +++++++++++++++++------ docker-compose.yml | 4 ++-- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/changedetectionio/content_fetcher.py b/changedetectionio/content_fetcher.py index e8dc057e..9e0b889a 100644 --- a/changedetectionio/content_fetcher.py +++ b/changedetectionio/content_fetcher.py @@ -25,6 +25,8 @@ class Fetcher(): # Will be needed in the future by the VisualSelector, always get this where possible. screenshot = False fetcher_description = "No description" + system_http_proxy = os.getenv('HTTP_PROXY') + system_https_proxy = os.getenv('HTTPS_PROXY') @abstractmethod def get_error(self): @@ -80,17 +82,12 @@ class base_html_playwright(Fetcher): if os.getenv("PLAYWRIGHT_DRIVER_URL"): fetcher_description += " via '{}'".format(os.getenv("PLAYWRIGHT_DRIVER_URL")) -# try: -# from playwright.sync_api import sync_playwright -# except ModuleNotFoundError: -# fetcher_enabled = False - browser_type = '' command_executor = '' # Configs for Proxy setup # In the ENV vars, is prefixed with "playwright_proxy_", so it is for example "playwright_proxy_server" - playwright_proxy_settings_mappings = ['server', 'bypass', 'username', 'password'] + playwright_proxy_settings_mappings = ['bypass', 'server', 'username', 'password'] proxy = None @@ -185,6 +182,12 @@ class base_html_webdriver(Fetcher): if v: proxy_args[k] = v.strip('"') + # Map back standard HTTP_ and HTTPS_PROXY to webDriver httpProxy/sslProxy + if not proxy_args.get('webdriver_httpProxy') and self.system_http_proxy: + proxy_args['httpProxy'] = self.system_http_proxy + if not proxy_args.get('webdriver_sslProxy') and self.system_https_proxy: + proxy_args['httpsProxy'] = self.system_https_proxy + if proxy_args: self.proxy = SeleniumProxy(raw=proxy_args) @@ -260,11 +263,19 @@ class html_requests(Fetcher): request_method, ignore_status_codes=False): + # Map back standard HTTP_ and HTTPS_PROXY to requests http/https proxy + proxies={} + if self.system_http_proxy: + proxies['http'] = self.system_http_proxy + if self.system_https_proxy: + proxies['https'] = self.system_https_proxy + r = requests.request(method=request_method, data=request_body, url=url, headers=request_headers, timeout=timeout, + proxies=proxies, verify=False) # If the response did not tell us what encoding format to expect, Then use chardet to override what `requests` thinks. diff --git a/docker-compose.yml b/docker-compose.yml index 88ee8a76..d0d5547d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -17,8 +17,8 @@ services: # Alternative WebDriver/selenium URL, do not use "'s or 's! # - WEBDRIVER_URL=http://browser-chrome:4444/wd/hub # - # WebDriver proxy settings webdriver_proxyType, webdriver_ftpProxy, webdriver_httpProxy, webdriver_noProxy, - # webdriver_proxyAutoconfigUrl, webdriver_sslProxy, webdriver_autodetect, + # WebDriver proxy settings webdriver_proxyType, webdriver_ftpProxy, webdriver_noProxy, + # webdriver_proxyAutoconfigUrl, webdriver_autodetect, # webdriver_socksProxy, webdriver_socksUsername, webdriver_socksVersion, webdriver_socksPassword # # https://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.common.proxy