diff --git a/README.md b/README.md index 0c132010..19f19483 100644 --- a/README.md +++ b/README.md @@ -136,24 +136,9 @@ When you enable a `json:` filter, you can even automatically extract and parse e `json:$.price` would give `23.50`, or you can extract the whole structure -### Proxy +### Proxy configuration -A proxy for ChangeDetection.io can be configured by setting environment the -`HTTP_PROXY`, `HTTPS_PROXY` variables, examples are also in the `docker-compose.yml` - -`NO_PROXY` exclude list can be specified by following `"localhost,192.168.0.0/24"` - -as `docker run` with `-e` - -``` -docker run -d --restart always -e HTTPS_PROXY="socks5h://10.10.1.10:1080" -p "127.0.0.1:5000:5000" -v datastore-volume:/datastore --name changedetection.io dgtlmoon/changedetection.io -``` - -With `docker-compose`, see the `Proxy support example` in docker-compose.yml. - -For more information see https://docs.python-requests.org/en/master/user/advanced/#proxies - -This proxy support also extends to the notifications https://github.com/caronc/apprise/issues/387#issuecomment-841718867 +See the wiki https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration ### RaspberriPi support? diff --git a/changedetectionio/content_fetcher.py b/changedetectionio/content_fetcher.py index 1f6ef14a..d1eb41b9 100644 --- a/changedetectionio/content_fetcher.py +++ b/changedetectionio/content_fetcher.py @@ -3,6 +3,7 @@ import time from abc import ABC, abstractmethod from selenium import webdriver from selenium.webdriver.common.desired_capabilities import DesiredCapabilities +from selenium.webdriver.common.proxy import Proxy as SeleniumProxy from selenium.common.exceptions import WebDriverException import urllib3.exceptions @@ -65,15 +66,34 @@ class html_webdriver(Fetcher): command_executor = '' + # Configs for Proxy setup + # In the ENV vars, is prefixed with "webdriver_", so it is for example "webdriver_sslProxy" + selenium_proxy_settings_mappings = ['ftpProxy', 'httpProxy', 'noProxy', + 'proxyAutoconfigUrl', 'sslProxy', 'autodetect', + 'socksProxy', 'socksUsername', 'socksPassword'] + proxy=None + def __init__(self): - self.command_executor = os.getenv("WEBDRIVER_URL", 'http://browser-chrome:4444/wd/hub') + # .strip('"') is going to save someone a lot of time when they accidently wrap the env value + self.command_executor = os.getenv("WEBDRIVER_URL", 'http://browser-chrome:4444/wd/hub').strip('"') + + # If any proxy settings are enabled, then we should setup the proxy object + proxy_args = {} + for k in self.selenium_proxy_settings_mappings: + v = os.getenv('webdriver_' + k, False) + if v: + proxy_args[k] = v.strip('"') + + if proxy_args: + self.proxy = SeleniumProxy(raw=proxy_args) def run(self, url, timeout, request_headers): # check env for WEBDRIVER_URL driver = webdriver.Remote( command_executor=self.command_executor, - desired_capabilities=DesiredCapabilities.CHROME) + desired_capabilities=DesiredCapabilities.CHROME, + proxy=self.proxy) try: driver.get(url) diff --git a/changedetectionio/forms.py b/changedetectionio/forms.py index bc650165..1028c213 100644 --- a/changedetectionio/forms.py +++ b/changedetectionio/forms.py @@ -106,10 +106,12 @@ class ValidateContentFetcherIsReady(object): except urllib3.exceptions.MaxRetryError as e: driver_url = some_object.command_executor message = field.gettext('Content fetcher \'%s\' did not respond.' % (field.data)) - message += '
'+field.gettext('Be sure that the selenium/webdriver runner is running and accessible via network from this container/host.') + message += '
' + field.gettext( + 'Be sure that the selenium/webdriver runner is running and accessible via network from this container/host.') message += '
' + field.gettext('Did you follow the instructions in the wiki?') message += '

' + field.gettext('WebDriver Host: %s' % (driver_url)) message += '
Go here for more information' + message += '
'+field.gettext('Content fetcher did not respond properly, unable to use it.\n %s' % (str(e))) raise ValidationError(message) diff --git a/docker-compose.yml b/docker-compose.yml index d06de402..1060ed58 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -13,13 +13,23 @@ services: # - PUID=1000 # - PGID=1000 - # # Alternative WebDriver/selenium URL, do not use "'s or 's! + # + # Alternative WebDriver/selenium URL, do not use "'s or 's! # - WEBDRIVER_URL=http://browser-chrome:4444/wd/hub - # Proxy support example. + # + # WebDriver proxy settings webdriver_ftpProxy, webdriver_httpProxy, webdriver_noProxy, + # webdriver_proxyAutoconfigUrl, webdriver_sslProxy, webdriver_autodetect, + # webdriver_socksProxy, webdriver_socksUsername, webdriver_socksPassword + # + # https://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.common.proxy + # + # Plain requsts - proxy support example. # - HTTP_PROXY=socks5h://10.10.1.10:1080 # - HTTPS_PROXY=socks5h://10.10.1.10:1080 + # # An exclude list (useful for notification URLs above) can be specified by with # - NO_PROXY="localhost,192.168.0.0/24" + # # Base URL of your changedetection.io install (Added to the notification alert) # - BASE_URL=https://mysite.com