diff --git a/changedetectionio/content_fetchers/puppeteer.py b/changedetectionio/content_fetchers/puppeteer.py index cad1b6b8..ebefabba 100644 --- a/changedetectionio/content_fetchers/puppeteer.py +++ b/changedetectionio/content_fetchers/puppeteer.py @@ -9,7 +9,10 @@ from loguru import logger from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, BrowserFetchTimedOut, BrowserConnectError +<<<<<<< HEAD +======= +>>>>>>> db8f2d3b (Ability to set default UA for either fetching types) class fetcher(Fetcher): fetcher_description = "Puppeteer/direct {}/Javascript".format( os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').capitalize() diff --git a/changedetectionio/forms.py b/changedetectionio/forms.py index 2d64a227..673be9ca 100644 --- a/changedetectionio/forms.py +++ b/changedetectionio/forms.py @@ -526,6 +526,10 @@ class SingleExtraBrowser(Form): browser_connection_url = StringField('Browser connection URL', [validators.Optional()], render_kw={"placeholder": "wss://brightdata... wss://oxylabs etc", "size":50}) # @todo do the validation here instead +class DefaultUAInputForm(Form): + html_requests = StringField('Plaintext requests', validators=[validators.Optional()], render_kw={"placeholder": ""}) + if os.getenv("PLAYWRIGHT_DRIVER_URL") or os.getenv("WEBDRIVER_URL"): + html_webdriver = StringField('Chrome requests', validators=[validators.Optional()], render_kw={"placeholder": ""}) # datastore.data['settings']['requests'].. class globalSettingsRequestForm(Form): @@ -537,6 +541,8 @@ class globalSettingsRequestForm(Form): extra_proxies = FieldList(FormField(SingleExtraProxy), min_entries=5) extra_browsers = FieldList(FormField(SingleExtraBrowser), min_entries=5) + default_ua = FormField(DefaultUAInputForm, label="Default User-Agent overrides") + def validate_extra_proxies(self, extra_validators=None): for e in self.data['extra_proxies']: if e.get('proxy_name') or e.get('proxy_url'): diff --git a/changedetectionio/model/App.py b/changedetectionio/model/App.py index 1202d5db..a8491890 100644 --- a/changedetectionio/model/App.py +++ b/changedetectionio/model/App.py @@ -22,6 +22,10 @@ class model(dict): 'time_between_check': {'weeks': None, 'days': None, 'hours': 3, 'minutes': None, 'seconds': None}, 'timeout': int(getenv("DEFAULT_SETTINGS_REQUESTS_TIMEOUT", "45")), # Default 45 seconds 'workers': int(getenv("DEFAULT_SETTINGS_REQUESTS_WORKERS", "10")), # Number of threads, lower is better for slow connections + 'default_ua': { + 'html_requests': None, + 'html_webdriver': None, + } }, 'application': { # Custom notification content diff --git a/changedetectionio/processors/__init__.py b/changedetectionio/processors/__init__.py index e2b54481..8702ee5d 100644 --- a/changedetectionio/processors/__init__.py +++ b/changedetectionio/processors/__init__.py @@ -97,6 +97,10 @@ class difference_detection_processor(): request_headers.update(self.datastore.get_all_base_headers()) request_headers.update(self.datastore.get_all_headers_in_textfile_for_watch(uuid=self.watch.get('uuid'))) + ua = self.datastore.data['settings']['requests'].get('default_ua') + if ua and ua.get(prefer_fetch_backend): + request_headers.update({'User-Agent': ua.get(prefer_fetch_backend)}) + # https://github.com/psf/requests/issues/4525 # Requests doesnt yet support brotli encoding, so don't put 'br' here, be totally sure that the user cannot # do this by accident. diff --git a/changedetectionio/store.py b/changedetectionio/store.py index 884c617a..afa6b2ae 100644 --- a/changedetectionio/store.py +++ b/changedetectionio/store.py @@ -554,7 +554,6 @@ class ChangeDetectionStore: return os.path.isfile(filepath) def get_all_base_headers(self): - from .model.App import parse_headers_from_text_file headers = {} # Global app settings headers.update(self.data['settings'].get('headers', {})) diff --git a/changedetectionio/templates/settings.html b/changedetectionio/templates/settings.html index e72c7818..c72a1fb6 100644 --- a/changedetectionio/templates/settings.html +++ b/changedetectionio/templates/settings.html @@ -108,8 +108,6 @@

Use the Basic method (default) where your watched sites don't need Javascript to render.

The Chrome/Javascript method requires a network connection to a running WebDriver+Chrome server, set by the ENV var 'WEBDRIVER_URL'.

-
- Tip: Connect using Bright Data and Oxylabs Proxies, find out more here.
@@ -121,6 +119,18 @@ {{ render_field(form.application.form.webdriver_delay) }}
+
+ {{ render_field(form.requests.form.default_ua) }} + + Applied to all requests.

+ Note: Simply changing the User-Agent often does not defeat anti-robot technologies, it's important to consider all of the ways that the browser is detected. +
+
+
@@ -190,7 +200,7 @@ nav - + Chrome Chrome Webstore