Fetcher - Ability to specify headers from a textfile per watch, global or per tag ( https://github.com/dgtlmoon/changedetection.io/wiki/Adding-headers-from-an-external-file )

pull/1592/head
dgtlmoon 2 years ago committed by GitHub
parent 955835df72
commit 9e954532d6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -700,6 +700,7 @@ def changedetection_app(config=None, datastore_o=None):
form=form, form=form,
has_default_notification_urls=True if len(datastore.data['settings']['application']['notification_urls']) else False, has_default_notification_urls=True if len(datastore.data['settings']['application']['notification_urls']) else False,
has_empty_checktime=using_default_check_time, has_empty_checktime=using_default_check_time,
has_extra_headers_file=watch.has_extra_headers_file or datastore.has_extra_headers_file,
is_html_webdriver=is_html_webdriver, is_html_webdriver=is_html_webdriver,
jq_support=jq_support, jq_support=jq_support,
playwright_enabled=os.getenv('PLAYWRIGHT_DRIVER_URL', False), playwright_enabled=os.getenv('PLAYWRIGHT_DRIVER_URL', False),
@ -1444,6 +1445,7 @@ def check_for_new_version():
# Check daily # Check daily
app.config.exit.wait(86400) app.config.exit.wait(86400)
def notification_runner(): def notification_runner():
global notification_debug_log global notification_debug_log
from datetime import datetime from datetime import datetime

@ -49,3 +49,15 @@ class model(dict):
def __init__(self, *arg, **kw): def __init__(self, *arg, **kw):
super(model, self).__init__(*arg, **kw) super(model, self).__init__(*arg, **kw)
self.update(self.base_config) self.update(self.base_config)
def parse_headers_from_text_file(filepath):
headers = {}
with open(filepath, 'r') as f:
for l in f.readlines():
l = l.strip()
if not l.startswith('#') and ':' in l:
(k, v) = l.split(':')
headers[k.strip()] = v.strip()
return headers

@ -473,6 +473,40 @@ class model(dict):
# None is set # None is set
return False return False
@property
def has_extra_headers_file(self):
if os.path.isfile(os.path.join(self.watch_data_dir, 'headers.txt')):
return True
for f in self.all_tags:
fname = "headers-"+re.sub(r'[\W_]', '', f).lower().strip() + ".txt"
filepath = os.path.join(self.__datastore_path, fname)
if os.path.isfile(filepath):
return True
return False
def get_all_headers(self):
from .App import parse_headers_from_text_file
headers = self.get('headers', {}).copy()
# Available headers on the disk could 'headers.txt' in the watch data dir
filepath = os.path.join(self.watch_data_dir, 'headers.txt')
try:
if os.path.isfile(filepath):
headers.update(parse_headers_from_text_file(filepath))
except Exception as e:
print(f"ERROR reading headers.txt at {filepath}", str(e))
# Or each by tag, as tagname.txt in the main datadir
for f in self.all_tags:
fname = "headers-"+re.sub(r'[\W_]', '', f).lower().strip() + ".txt"
filepath = os.path.join(self.__datastore_path, fname)
try:
if os.path.isfile(filepath):
headers.update(parse_headers_from_text_file(filepath))
except Exception as e:
print(f"ERROR reading headers.txt at {filepath}", str(e))
return headers
def get_last_fetched_before_filters(self): def get_last_fetched_before_filters(self):
import brotli import brotli

@ -70,10 +70,9 @@ class perform_site_check(difference_detection_processor):
# Unset any existing notification error # Unset any existing notification error
update_obj = {'last_notification_error': False, 'last_error': False} update_obj = {'last_notification_error': False, 'last_error': False}
extra_headers = watch.get('headers', [])
# Tweak the base config with the per-watch ones # Tweak the base config with the per-watch ones
request_headers = deepcopy(self.datastore.data['settings']['headers']) extra_headers = watch.get_all_headers()
request_headers = self.datastore.get_all_headers()
request_headers.update(extra_headers) request_headers.update(extra_headers)
# https://github.com/psf/requests/issues/4525 # https://github.com/psf/requests/issues/4525

@ -3,7 +3,7 @@ from flask import (
) )
from . model import App, Watch from . model import App, Watch
from copy import deepcopy from copy import deepcopy, copy
from os import path, unlink from os import path, unlink
from threading import Lock from threading import Lock
import json import json
@ -474,8 +474,6 @@ class ChangeDetectionStore:
return proxy_list if len(proxy_list) else None return proxy_list if len(proxy_list) else None
def get_preferred_proxy_for_watch(self, uuid): def get_preferred_proxy_for_watch(self, uuid):
""" """
Returns the preferred proxy by ID key Returns the preferred proxy by ID key
@ -507,6 +505,25 @@ class ChangeDetectionStore:
return None return None
@property
def has_extra_headers_file(self):
filepath = os.path.join(self.datastore_path, 'headers.txt')
return os.path.isfile(filepath)
def get_all_headers(self):
from .model.App import parse_headers_from_text_file
headers = copy(self.data['settings'].get('headers', {}))
filepath = os.path.join(self.datastore_path, 'headers.txt')
try:
if os.path.isfile(filepath):
headers.update(parse_headers_from_text_file(filepath))
except Exception as e:
print(f"ERROR reading headers.txt at {filepath}", str(e))
return headers
# Run all updates # Run all updates
# IMPORTANT - Each update could be run even when they have a new install and the schema is correct # IMPORTANT - Each update could be run even when they have a new install and the schema is correct
# So therefor - each `update_n` should be very careful about checking if it needs to actually run # So therefor - each `update_n` should be very careful about checking if it needs to actually run

@ -152,6 +152,15 @@
{{ render_field(form.headers, rows=5, placeholder="Example {{ render_field(form.headers, rows=5, placeholder="Example
Cookie: foobar Cookie: foobar
User-Agent: wonderbra 1.0") }} User-Agent: wonderbra 1.0") }}
<div class="pure-form-message-inline">
{% if has_extra_headers_file %}
<strong>Alert! Extra headers file found and will be added to this watch!</strong>
{% else %}
Headers can be also read from a file in your data-directory <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Adding-headers-from-an-external-file">Read more here</a>
{% endif %}
</div>
</div> </div>
<div class="pure-control-group" id="request-body"> <div class="pure-control-group" id="request-body">
{{ render_field(form.body, rows=5, placeholder="Example {{ render_field(form.body, rows=5, placeholder="Example

@ -14,13 +14,16 @@ global app
def cleanup(datastore_path): def cleanup(datastore_path):
# Unlink test output files # Unlink test output files
files = ['output.txt', files = [
'url-watches.json', 'count.txt',
'secret.txt', 'endpoint-content.txt'
'notification.txt', 'headers.txt',
'count.txt', 'headers-testtag.txt',
'endpoint-content.txt' 'notification.txt',
] 'secret.txt',
'url-watches.json',
'output.txt',
]
for file in files: for file in files:
try: try:
os.unlink("{}/{}".format(datastore_path, file)) os.unlink("{}/{}".format(datastore_path, file))

@ -1,7 +1,8 @@
import json import json
import os
import time import time
from flask import url_for from flask import url_for
from . util import set_original_response, set_modified_response, live_server_setup from . util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks, extract_UUID_from_client
def test_setup(live_server): def test_setup(live_server):
live_server_setup(live_server) live_server_setup(live_server)
@ -234,3 +235,72 @@ def test_method_in_request(client, live_server):
# Should be only one with method set to PATCH # Should be only one with method set to PATCH
assert watches_with_method == 1 assert watches_with_method == 1
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
def test_headers_textfile_in_request(client, live_server):
#live_server_setup(live_server)
# Add our URL to the import page
test_url = url_for('test_headers', _external=True)
# Add the test URL twice, we will check
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
time.sleep(1)
# Add some headers to a request
res = client.post(
url_for("edit_page", uuid="first"),
data={
"url": test_url,
"tag": "testtag",
"fetch_backend": "html_requests",
"headers": "xxx:ooo\ncool:yeah\r\n"},
follow_redirects=True
)
assert b"Updated watch." in res.data
wait_for_all_checks(client)
with open('test-datastore/headers-testtag.txt', 'w') as f:
f.write("tag-header: test")
with open('test-datastore/headers.txt', 'w') as f:
f.write("global-header: nice\r\nnext-global-header: nice")
with open('test-datastore/'+extract_UUID_from_client(client)+'/headers.txt', 'w') as f:
f.write("watch-header: nice")
client.get(url_for("form_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
wait_for_all_checks(client)
res = client.get(url_for("edit_page", uuid="first"))
assert b"Extra headers file found and will be added to this watch" in res.data
# Not needed anymore
os.unlink('test-datastore/headers.txt')
os.unlink('test-datastore/headers-testtag.txt')
os.unlink('test-datastore/'+extract_UUID_from_client(client)+'/headers.txt')
# The service should echo back the request verb
res = client.get(
url_for("preview_page", uuid="first"),
follow_redirects=True
)
assert b"Global-Header:nice" in res.data
assert b"Next-Global-Header:nice" in res.data
assert b"Xxx:ooo" in res.data
assert b"Watch-Header:nice" in res.data
assert b"Tag-Header:test" in res.data
#unlink headers.txt on start/stop
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
Loading…
Cancel
Save