Merge branch 'master' into 2039-restock-use-itemprop

piwheels-rpi-crypto
dgtlmoon 7 months ago
commit afc88d654b

@ -28,7 +28,7 @@ def manage_user_agent(headers, current_ua=''):
:return: :return:
""" """
# Ask it what the user agent is, if its obviously ChromeHeadless, switch it to the default # Ask it what the user agent is, if its obviously ChromeHeadless, switch it to the default
ua_in_custom_headers = next((v for k, v in headers.items() if k.lower() == "user-agent"), None) ua_in_custom_headers = headers.get('User-Agent')
if ua_in_custom_headers: if ua_in_custom_headers:
return ua_in_custom_headers return ua_in_custom_headers

@ -115,12 +115,11 @@ class fetcher(Fetcher):
# This user agent is similar to what was used when tweaking the evasions in inject_evasions_into_page(..) # This user agent is similar to what was used when tweaking the evasions in inject_evasions_into_page(..)
user_agent = None user_agent = None
if request_headers: if request_headers and request_headers.get('User-Agent'):
user_agent = next((value for key, value in request_headers.items() if key.lower().strip() == 'user-agent'), None) # Request_headers should now be CaaseInsensitiveDict
if user_agent:
await self.page.setUserAgent(user_agent)
# Remove it so it's not sent again with headers after # Remove it so it's not sent again with headers after
[request_headers.pop(key) for key in list(request_headers) if key.lower().strip() == 'user-agent'.lower().strip()] user_agent = request_headers.pop('User-Agent').strip()
await self.page.setUserAgent(user_agent)
if not user_agent: if not user_agent:
# Attempt to strip 'HeadlessChrome' etc # Attempt to strip 'HeadlessChrome' etc

@ -342,7 +342,7 @@ def changedetection_app(config=None, datastore_o=None):
# @todo needs a .itemsWithTag() or something - then we can use that in Jinaj2 and throw this away # @todo needs a .itemsWithTag() or something - then we can use that in Jinaj2 and throw this away
for uuid, watch in datastore.data['watching'].items(): for uuid, watch in datastore.data['watching'].items():
# @todo tag notification_muted skip also (improve Watch model) # @todo tag notification_muted skip also (improve Watch model)
if watch.get('notification_muted'): if datastore.data['settings']['application'].get('rss_hide_muted_watches') and watch.get('notification_muted'):
continue continue
if limit_tag and not limit_tag in watch['tags']: if limit_tag and not limit_tag in watch['tags']:
continue continue
@ -475,7 +475,7 @@ def changedetection_app(config=None, datastore_o=None):
# Don't link to hosting when we're on the hosting environment # Don't link to hosting when we're on the hosting environment
active_tag=active_tag, active_tag=active_tag,
active_tag_uuid=active_tag_uuid, active_tag_uuid=active_tag_uuid,
app_rss_token=datastore.data['settings']['application']['rss_access_token'], app_rss_token=datastore.data['settings']['application'].get('rss_access_token'),
datastore=datastore, datastore=datastore,
errored_count=errored_count, errored_count=errored_count,
form=form, form=form,

@ -630,6 +630,8 @@ class globalSettingsApplicationForm(commonSettingsForm):
removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"}) removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"})
render_anchor_tag_content = BooleanField('Render anchor tag content', default=False) render_anchor_tag_content = BooleanField('Render anchor tag content', default=False)
shared_diff_access = BooleanField('Allow access to view diff page when password is enabled', default=False, validators=[validators.Optional()]) shared_diff_access = BooleanField('Allow access to view diff page when password is enabled', default=False, validators=[validators.Optional()])
rss_hide_muted_watches = BooleanField('Hide muted watches from RSS feed', default=True,
validators=[validators.Optional()])
filter_failure_notification_threshold_attempts = IntegerField('Number of times the filter can be missing before sending a notification', filter_failure_notification_threshold_attempts = IntegerField('Number of times the filter can be missing before sending a notification',
render_kw={"style": "width: 5em;"}, render_kw={"style": "width: 5em;"},
validators=[validators.NumberRange(min=0, validators=[validators.NumberRange(min=0,

@ -46,6 +46,8 @@ class model(dict):
'pager_size': 50, 'pager_size': 50,
'password': False, 'password': False,
'render_anchor_tag_content': False, 'render_anchor_tag_content': False,
'rss_access_token': None,
'rss_hide_muted_watches': True,
'schema_version' : 0, 'schema_version' : 0,
'shared_diff_access': False, 'shared_diff_access': False,
'webdriver_delay': None , # Extra delay in seconds before extracting text 'webdriver_delay': None , # Extra delay in seconds before extracting text

@ -1,10 +1,10 @@
from abc import abstractmethod from abc import abstractmethod
import os
import hashlib
import re
from copy import deepcopy
from changedetectionio.strtobool import strtobool from changedetectionio.strtobool import strtobool
from copy import deepcopy
from loguru import logger from loguru import logger
import hashlib
import os
import re
class difference_detection_processor(): class difference_detection_processor():
@ -21,7 +21,7 @@ class difference_detection_processor():
self.watch = deepcopy(self.datastore.data['watching'].get(watch_uuid)) self.watch = deepcopy(self.datastore.data['watching'].get(watch_uuid))
def call_browser(self): def call_browser(self):
from requests.structures import CaseInsensitiveDict
# Protect against file:// access # Protect against file:// access
if re.search(r'^file://', self.watch.get('url', '').strip(), re.IGNORECASE): if re.search(r'^file://', self.watch.get('url', '').strip(), re.IGNORECASE):
if not strtobool(os.getenv('ALLOW_FILE_URI', 'false')): if not strtobool(os.getenv('ALLOW_FILE_URI', 'false')):
@ -93,14 +93,16 @@ class difference_detection_processor():
self.fetcher.browser_steps_screenshot_path = os.path.join(self.datastore.datastore_path, self.watch.get('uuid')) self.fetcher.browser_steps_screenshot_path = os.path.join(self.datastore.datastore_path, self.watch.get('uuid'))
# Tweak the base config with the per-watch ones # Tweak the base config with the per-watch ones
request_headers = self.watch.get('headers', []) request_headers = CaseInsensitiveDict()
request_headers.update(self.datastore.get_all_base_headers())
request_headers.update(self.datastore.get_all_headers_in_textfile_for_watch(uuid=self.watch.get('uuid')))
ua = self.datastore.data['settings']['requests'].get('default_ua') ua = self.datastore.data['settings']['requests'].get('default_ua')
if ua and ua.get(prefer_fetch_backend): if ua and ua.get(prefer_fetch_backend):
request_headers.update({'User-Agent': ua.get(prefer_fetch_backend)}) request_headers.update({'User-Agent': ua.get(prefer_fetch_backend)})
request_headers.update(self.watch.get('headers', {}))
request_headers.update(self.datastore.get_all_base_headers())
request_headers.update(self.datastore.get_all_headers_in_textfile_for_watch(uuid=self.watch.get('uuid')))
# https://github.com/psf/requests/issues/4525 # https://github.com/psf/requests/issues/4525
# Requests doesnt yet support brotli encoding, so don't put 'br' here, be totally sure that the user cannot # Requests doesnt yet support brotli encoding, so don't put 'br' here, be totally sure that the user cannot
# do this by accident. # do this by accident.

@ -1026,6 +1026,11 @@ ul {
border-radius: 10px; border-radius: 10px;
margin-bottom: 1em; margin-bottom: 1em;
display: none; display: none;
button {
/* some space if they wrap the page */
margin-bottom: 3px;
margin-top: 3px;
}
} }
.checkbox-uuid { .checkbox-uuid {

@ -1130,6 +1130,10 @@ ul {
border-radius: 10px; border-radius: 10px;
margin-bottom: 1em; margin-bottom: 1em;
display: none; } display: none; }
#checkbox-operations button {
/* some space if they wrap the page */
margin-bottom: 3px;
margin-top: 3px; }
.checkbox-uuid > * { .checkbox-uuid > * {
vertical-align: middle; } vertical-align: middle; }

@ -124,12 +124,12 @@ class ChangeDetectionStore:
self.__data['app_guid'] = str(uuid_builder.uuid4()) self.__data['app_guid'] = str(uuid_builder.uuid4())
# Generate the URL access token for RSS feeds # Generate the URL access token for RSS feeds
if not 'rss_access_token' in self.__data['settings']['application']: if not self.__data['settings']['application'].get('rss_access_token'):
secret = secrets.token_hex(16) secret = secrets.token_hex(16)
self.__data['settings']['application']['rss_access_token'] = secret self.__data['settings']['application']['rss_access_token'] = secret
# Generate the API access token # Generate the API access token
if not 'api_access_token' in self.__data['settings']['application']: if not self.__data['settings']['application'].get('api_access_token'):
secret = secrets.token_hex(16) secret = secrets.token_hex(16)
self.__data['settings']['application']['api_access_token'] = secret self.__data['settings']['application']['api_access_token'] = secret

@ -62,6 +62,9 @@
<span class="pure-form-message-inline">Allow access to view watch diff page when password is enabled (Good for sharing the diff page) <span class="pure-form-message-inline">Allow access to view watch diff page when password is enabled (Good for sharing the diff page)
</span> </span>
</div> </div>
<div class="pure-control-group">
{{ render_checkbox_field(form.application.form.rss_hide_muted_watches) }}
</div>
<div class="pure-control-group"> <div class="pure-control-group">
{{ render_field(form.application.form.pager_size) }} {{ render_field(form.application.form.pager_size) }}
<span class="pure-form-message-inline">Number of items per page in the watch overview list, 0 to disable.</span> <span class="pure-form-message-inline">Number of items per page in the watch overview list, 0 to disable.</span>

@ -253,6 +253,62 @@ def test_method_in_request(client, live_server):
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data assert b'Deleted' in res.data
# Re #2408 - user-agent override test, also should handle case-insensitive header deduplication
def test_ua_global_override(client, live_server):
# live_server_setup(live_server)
test_url = url_for('test_headers', _external=True)
res = client.post(
url_for("settings_page"),
data={
"application-fetch_backend": "html_requests",
"application-minutes_between_check": 180,
"requests-default_ua-html_requests": "html-requests-user-agent"
},
follow_redirects=True
)
assert b'Settings updated' in res.data
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
wait_for_all_checks(client)
res = client.get(
url_for("preview_page", uuid="first"),
follow_redirects=True
)
assert b"html-requests-user-agent" in res.data
# default user-agent should have shown by now
# now add a custom one in the headers
# Add some headers to a request
res = client.post(
url_for("edit_page", uuid="first"),
data={
"url": test_url,
"tags": "testtag",
"fetch_backend": 'html_requests',
# Important - also test case-insensitive
"headers": "User-AGent: agent-from-watch"},
follow_redirects=True
)
assert b"Updated watch." in res.data
wait_for_all_checks(client)
res = client.get(
url_for("preview_page", uuid="first"),
follow_redirects=True
)
assert b"agent-from-watch" in res.data
assert b"html-requests-user-agent" not in res.data
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
def test_headers_textfile_in_request(client, live_server): def test_headers_textfile_in_request(client, live_server):
#live_server_setup(live_server) #live_server_setup(live_server)
# Add our URL to the import page # Add our URL to the import page
@ -333,7 +389,7 @@ def test_headers_textfile_in_request(client, live_server):
# Not needed anymore # Not needed anymore
os.unlink('test-datastore/headers.txt') os.unlink('test-datastore/headers.txt')
os.unlink('test-datastore/headers-testtag.txt') os.unlink('test-datastore/headers-testtag.txt')
os.unlink('test-datastore/' + extract_UUID_from_client(client) + '/headers.txt')
# The service should echo back the request verb # The service should echo back the request verb
res = client.get( res = client.get(
url_for("preview_page", uuid="first"), url_for("preview_page", uuid="first"),

Loading…
Cancel
Save