Ability to set default UA for either fetching types

easy-override-ua
dgtlmoon 7 months ago
parent f0ed4f64e8
commit 92e50f58dd

@ -9,7 +9,10 @@ from loguru import logger
from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, BrowserFetchTimedOut, BrowserConnectError from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, BrowserFetchTimedOut, BrowserConnectError
<<<<<<< HEAD
=======
>>>>>>> db8f2d3b (Ability to set default UA for either fetching types)
class fetcher(Fetcher): class fetcher(Fetcher):
fetcher_description = "Puppeteer/direct {}/Javascript".format( fetcher_description = "Puppeteer/direct {}/Javascript".format(
os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').capitalize() os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').capitalize()

@ -526,6 +526,10 @@ class SingleExtraBrowser(Form):
browser_connection_url = StringField('Browser connection URL', [validators.Optional()], render_kw={"placeholder": "wss://brightdata... wss://oxylabs etc", "size":50}) browser_connection_url = StringField('Browser connection URL', [validators.Optional()], render_kw={"placeholder": "wss://brightdata... wss://oxylabs etc", "size":50})
# @todo do the validation here instead # @todo do the validation here instead
class DefaultUAInputForm(Form):
html_requests = StringField('Plaintext requests', validators=[validators.Optional()], render_kw={"placeholder": "<default>"})
if os.getenv("PLAYWRIGHT_DRIVER_URL") or os.getenv("WEBDRIVER_URL"):
html_webdriver = StringField('Chrome requests', validators=[validators.Optional()], render_kw={"placeholder": "<default>"})
# datastore.data['settings']['requests'].. # datastore.data['settings']['requests']..
class globalSettingsRequestForm(Form): class globalSettingsRequestForm(Form):
@ -537,6 +541,8 @@ class globalSettingsRequestForm(Form):
extra_proxies = FieldList(FormField(SingleExtraProxy), min_entries=5) extra_proxies = FieldList(FormField(SingleExtraProxy), min_entries=5)
extra_browsers = FieldList(FormField(SingleExtraBrowser), min_entries=5) extra_browsers = FieldList(FormField(SingleExtraBrowser), min_entries=5)
default_ua = FormField(DefaultUAInputForm, label="Default User-Agent overrides")
def validate_extra_proxies(self, extra_validators=None): def validate_extra_proxies(self, extra_validators=None):
for e in self.data['extra_proxies']: for e in self.data['extra_proxies']:
if e.get('proxy_name') or e.get('proxy_url'): if e.get('proxy_name') or e.get('proxy_url'):

@ -22,6 +22,10 @@ class model(dict):
'time_between_check': {'weeks': None, 'days': None, 'hours': 3, 'minutes': None, 'seconds': None}, 'time_between_check': {'weeks': None, 'days': None, 'hours': 3, 'minutes': None, 'seconds': None},
'timeout': int(getenv("DEFAULT_SETTINGS_REQUESTS_TIMEOUT", "45")), # Default 45 seconds 'timeout': int(getenv("DEFAULT_SETTINGS_REQUESTS_TIMEOUT", "45")), # Default 45 seconds
'workers': int(getenv("DEFAULT_SETTINGS_REQUESTS_WORKERS", "10")), # Number of threads, lower is better for slow connections 'workers': int(getenv("DEFAULT_SETTINGS_REQUESTS_WORKERS", "10")), # Number of threads, lower is better for slow connections
'default_ua': {
'html_requests': None,
'html_webdriver': None,
}
}, },
'application': { 'application': {
# Custom notification content # Custom notification content

@ -97,6 +97,10 @@ class difference_detection_processor():
request_headers.update(self.datastore.get_all_base_headers()) request_headers.update(self.datastore.get_all_base_headers())
request_headers.update(self.datastore.get_all_headers_in_textfile_for_watch(uuid=self.watch.get('uuid'))) request_headers.update(self.datastore.get_all_headers_in_textfile_for_watch(uuid=self.watch.get('uuid')))
ua = self.datastore.data['settings']['requests'].get('default_ua')
if ua and ua.get(prefer_fetch_backend):
request_headers.update({'User-Agent': ua.get(prefer_fetch_backend)})
# https://github.com/psf/requests/issues/4525 # https://github.com/psf/requests/issues/4525
# Requests doesnt yet support brotli encoding, so don't put 'br' here, be totally sure that the user cannot # Requests doesnt yet support brotli encoding, so don't put 'br' here, be totally sure that the user cannot
# do this by accident. # do this by accident.

@ -554,7 +554,6 @@ class ChangeDetectionStore:
return os.path.isfile(filepath) return os.path.isfile(filepath)
def get_all_base_headers(self): def get_all_base_headers(self):
from .model.App import parse_headers_from_text_file
headers = {} headers = {}
# Global app settings # Global app settings
headers.update(self.data['settings'].get('headers', {})) headers.update(self.data['settings'].get('headers', {}))

@ -108,8 +108,6 @@
<p>Use the <strong>Basic</strong> method (default) where your watched sites don't need Javascript to render.</p> <p>Use the <strong>Basic</strong> method (default) where your watched sites don't need Javascript to render.</p>
<p>The <strong>Chrome/Javascript</strong> method requires a network connection to a running WebDriver+Chrome server, set by the ENV var 'WEBDRIVER_URL'. </p> <p>The <strong>Chrome/Javascript</strong> method requires a network connection to a running WebDriver+Chrome server, set by the ENV var 'WEBDRIVER_URL'. </p>
</span> </span>
<br>
Tip: <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">Connect using Bright Data and Oxylabs Proxies, find out more here.</a>
</div> </div>
<fieldset class="pure-group" id="webdriver-override-options" data-visible-for="application-fetch_backend=html_webdriver"> <fieldset class="pure-group" id="webdriver-override-options" data-visible-for="application-fetch_backend=html_webdriver">
<div class="pure-form-message-inline"> <div class="pure-form-message-inline">
@ -121,6 +119,18 @@
{{ render_field(form.application.form.webdriver_delay) }} {{ render_field(form.application.form.webdriver_delay) }}
</div> </div>
</fieldset> </fieldset>
<div class="pure-control-group inline-radio">
{{ render_field(form.requests.form.default_ua) }}
<span class="pure-form-message-inline">
Applied to all requests.<br><br>
Note: Simply changing the User-Agent often does not defeat anti-robot technologies, it's important to consider <a href="">all of the ways that the browser is detected</a>.
</span>
</div>
<div class="pure-control-group">
<br>
Tip: <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">Connect using Bright Data and Oxylabs Proxies, find out more here.</a>
</div>
</div> </div>
<div class="tab-pane-inner" id="filters"> <div class="tab-pane-inner" id="filters">
@ -190,7 +200,7 @@ nav
<a id="chrome-extension-link" <a id="chrome-extension-link"
title="Try our new Chrome Extension!" title="Try our new Chrome Extension!"
href="https://chromewebstore.google.com/detail/changedetectionio-website/kefcfmgmlhmankjmnbijimhofdjekbop"> href="https://chromewebstore.google.com/detail/changedetectionio-website/kefcfmgmlhmankjmnbijimhofdjekbop">
<img src="{{ url_for('static_content', group='images', filename='Google-Chrome-icon.png') }}"> <img src="{{ url_for('static_content', group='images', filename='Google-Chrome-icon.png') }}" alt="Chrome">
Chrome Webstore Chrome Webstore
</a> </a>
</p> </p>

Loading…
Cancel
Save