diff --git a/changedetectionio/__init__.py b/changedetectionio/__init__.py index a9f66f37..eeddb8cc 100644 --- a/changedetectionio/__init__.py +++ b/changedetectionio/__init__.py @@ -712,7 +712,7 @@ def changedetection_app(config=None, datastore_o=None): system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver' is_html_webdriver = False - if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver': + if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'): is_html_webdriver = True # Only works reliably with Playwright @@ -975,7 +975,7 @@ def changedetection_app(config=None, datastore_o=None): system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver' is_html_webdriver = False - if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver': + if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'): is_html_webdriver = True password_enabled_and_share_is_off = False @@ -1029,7 +1029,7 @@ def changedetection_app(config=None, datastore_o=None): is_html_webdriver = False - if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver': + if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'): is_html_webdriver = True # Never requested successfully, but we detected a fetch error diff --git a/changedetectionio/content_fetcher.py b/changedetectionio/content_fetcher.py index d9c14590..509731bd 100644 --- a/changedetectionio/content_fetcher.py +++ b/changedetectionio/content_fetcher.py @@ -96,6 +96,7 @@ class Fetcher(): content = None error = None fetcher_description = "No description" + browser_connection_url = None headers = {} status_code = None webdriver_js_execute_code = None @@ -251,14 +252,17 @@ class base_html_playwright(Fetcher): proxy = None - def __init__(self, proxy_override=None): + def __init__(self, proxy_override=None, browser_connection_url=None): super().__init__() # .strip('"') is going to save someone a lot of time when they accidently wrap the env value self.browser_type = os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').strip('"') - self.command_executor = os.getenv( - "PLAYWRIGHT_DRIVER_URL", - 'ws://playwright-chrome:3000' - ).strip('"') + + self.browser_connection_url = browser_connection_url + if not browser_connection_url: + self.browser_connection_url = os.getenv( + "PLAYWRIGHT_DRIVER_URL", + 'ws://playwright-chrome:3000' + ).strip('"') # If any proxy settings are enabled, then we should setup the proxy object proxy_args = {} @@ -444,7 +448,7 @@ class base_html_playwright(Fetcher): # Seemed to cause a connection Exception even tho I can see it connect # self.browser = browser_type.connect(self.command_executor, timeout=timeout*1000) # 60,000 connection timeout only - browser = browser_type.connect_over_cdp(self.command_executor, timeout=60000) + browser = browser_type.connect_over_cdp(self.browser_connection_url, timeout=60000) # SOCKS5 with authentication is not supported (yet) # https://github.com/microsoft/playwright/issues/10567 @@ -504,7 +508,11 @@ class base_html_playwright(Fetcher): self.status_code = response.status if self.status_code != 200 and not ignore_status_codes: - raise Non200ErrorCodeReceived(url=url, status_code=self.status_code) + + screenshot=self.page.screenshot(type='jpeg', full_page=True, + quality=int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72))) + + raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot) if len(self.page.content().strip()) == 0: context.close() @@ -555,8 +563,6 @@ class base_html_webdriver(Fetcher): else: fetcher_description = "WebDriver Chrome/Javascript" - command_executor = '' - # Configs for Proxy setup # In the ENV vars, is prefixed with "webdriver_", so it is for example "webdriver_sslProxy" selenium_proxy_settings_mappings = ['proxyType', 'ftpProxy', 'httpProxy', 'noProxy', @@ -564,12 +570,13 @@ class base_html_webdriver(Fetcher): 'socksProxy', 'socksVersion', 'socksUsername', 'socksPassword'] proxy = None - def __init__(self, proxy_override=None): + def __init__(self, proxy_override=None, browser_connection_url=None): super().__init__() from selenium.webdriver.common.proxy import Proxy as SeleniumProxy # .strip('"') is going to save someone a lot of time when they accidently wrap the env value - self.command_executor = os.getenv("WEBDRIVER_URL", 'http://browser-chrome:4444/wd/hub').strip('"') + if not self.browser_connection_url: + self.browser_connection_url = os.getenv("WEBDRIVER_URL", 'http://browser-chrome:4444/wd/hub').strip('"') # If any proxy settings are enabled, then we should setup the proxy object proxy_args = {} @@ -611,7 +618,7 @@ class base_html_webdriver(Fetcher): options.proxy = self.proxy self.driver = webdriver.Remote( - command_executor=self.command_executor, + command_executor=self.browser_connection_url, options=options) try: @@ -666,9 +673,10 @@ class base_html_webdriver(Fetcher): class html_requests(Fetcher): fetcher_description = "Basic fast Plaintext/HTTP Client" - def __init__(self, proxy_override=None): + def __init__(self, proxy_override=None, browser_connection_url=None): super().__init__() self.proxy_override = proxy_override + # browser_connection_url is none because its always 'launched locally' def run(self, url, diff --git a/changedetectionio/forms.py b/changedetectionio/forms.py index 16ffd0b0..b3de842b 100644 --- a/changedetectionio/forms.py +++ b/changedetectionio/forms.py @@ -168,7 +168,9 @@ class ValidateContentFetcherIsReady(object): def __call__(self, form, field): import urllib3.exceptions from changedetectionio import content_fetcher + return +# AttributeError: module 'changedetectionio.content_fetcher' has no attribute 'extra_browser_unlocked<>ASDF213r123r' # Better would be a radiohandler that keeps a reference to each class if field.data is not None and field.data != 'system': klass = getattr(content_fetcher, field.data) diff --git a/changedetectionio/processors/__init__.py b/changedetectionio/processors/__init__.py index caed4a36..c719d7a2 100644 --- a/changedetectionio/processors/__init__.py +++ b/changedetectionio/processors/__init__.py @@ -40,6 +40,18 @@ class difference_detection_processor(): if not prefer_fetch_backend or prefer_fetch_backend == 'system': prefer_fetch_backend = self.datastore.data['settings']['application'].get('fetch_backend') + # In the case that the preferred fetcher was a browser config with custom connection URL.. + # @todo - on save watch, if its extra_browser_ then it should be obvious it will use playwright (like if its requests now..) + browser_connection_url = None + if prefer_fetch_backend.startswith('extra_browser_'): + (t, key) = prefer_fetch_backend.split('extra_browser_') + connection = list( + filter(lambda s: (s['browser_name'] == key), self.datastore.data['settings']['requests'].get('extra_browsers', []))) + if connection: + prefer_fetch_backend = 'base_html_playwright' + browser_connection_url = connection[0].get('browser_connection_url') + + # Grab the right kind of 'fetcher', (playwright, requests, etc) if hasattr(content_fetcher, prefer_fetch_backend): fetcher_obj = getattr(content_fetcher, prefer_fetch_backend) @@ -54,8 +66,9 @@ class difference_detection_processor(): print(f"Using proxy Key: {preferred_proxy_id} as Proxy URL {proxy_url}") # Now call the fetcher (playwright/requests/etc) with arguments that only a fetcher would need. + # When browser_connection_url is None, it method should default to working out whats the best defaults (os env vars etc) self.fetcher = fetcher_obj(proxy_override=proxy_url, - #browser_url_extra/configurable browser url=... + browser_connection_url=browser_connection_url ) if self.watch.has_browser_steps: diff --git a/changedetectionio/templates/watch-overview.html b/changedetectionio/templates/watch-overview.html index 5e2cc090..4b04ead0 100644 --- a/changedetectionio/templates/watch-overview.html +++ b/changedetectionio/templates/watch-overview.html @@ -104,8 +104,9 @@ {% if watch.get_fetch_backend == "html_webdriver" or ( watch.get_fetch_backend == "system" and system_default_fetcher == 'html_webdriver' ) + or "extra_browser_" in watch.get_fetch_backend %} - + {% endif %} {%if watch.is_pdf %}{% endif %}