Fetcher - Ability to specify headers from a textfile per watch, global or per tag ( https://github.com/dgtlmoon/changedetection.io/wiki/Adding-headers-from-an-external-file )

pull/1592/head
dgtlmoon 2 years ago committed by GitHub
parent 955835df72
commit 9e954532d6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -700,6 +700,7 @@ def changedetection_app(config=None, datastore_o=None):
form=form,
has_default_notification_urls=True if len(datastore.data['settings']['application']['notification_urls']) else False,
has_empty_checktime=using_default_check_time,
has_extra_headers_file=watch.has_extra_headers_file or datastore.has_extra_headers_file,
is_html_webdriver=is_html_webdriver,
jq_support=jq_support,
playwright_enabled=os.getenv('PLAYWRIGHT_DRIVER_URL', False),
@ -1444,6 +1445,7 @@ def check_for_new_version():
# Check daily
app.config.exit.wait(86400)
def notification_runner():
global notification_debug_log
from datetime import datetime

@ -49,3 +49,15 @@ class model(dict):
def __init__(self, *arg, **kw):
super(model, self).__init__(*arg, **kw)
self.update(self.base_config)
def parse_headers_from_text_file(filepath):
headers = {}
with open(filepath, 'r') as f:
for l in f.readlines():
l = l.strip()
if not l.startswith('#') and ':' in l:
(k, v) = l.split(':')
headers[k.strip()] = v.strip()
return headers

@ -473,6 +473,40 @@ class model(dict):
# None is set
return False
@property
def has_extra_headers_file(self):
if os.path.isfile(os.path.join(self.watch_data_dir, 'headers.txt')):
return True
for f in self.all_tags:
fname = "headers-"+re.sub(r'[\W_]', '', f).lower().strip() + ".txt"
filepath = os.path.join(self.__datastore_path, fname)
if os.path.isfile(filepath):
return True
return False
def get_all_headers(self):
from .App import parse_headers_from_text_file
headers = self.get('headers', {}).copy()
# Available headers on the disk could 'headers.txt' in the watch data dir
filepath = os.path.join(self.watch_data_dir, 'headers.txt')
try:
if os.path.isfile(filepath):
headers.update(parse_headers_from_text_file(filepath))
except Exception as e:
print(f"ERROR reading headers.txt at {filepath}", str(e))
# Or each by tag, as tagname.txt in the main datadir
for f in self.all_tags:
fname = "headers-"+re.sub(r'[\W_]', '', f).lower().strip() + ".txt"
filepath = os.path.join(self.__datastore_path, fname)
try:
if os.path.isfile(filepath):
headers.update(parse_headers_from_text_file(filepath))
except Exception as e:
print(f"ERROR reading headers.txt at {filepath}", str(e))
return headers
def get_last_fetched_before_filters(self):
import brotli

@ -70,10 +70,9 @@ class perform_site_check(difference_detection_processor):
# Unset any existing notification error
update_obj = {'last_notification_error': False, 'last_error': False}
extra_headers = watch.get('headers', [])
# Tweak the base config with the per-watch ones
request_headers = deepcopy(self.datastore.data['settings']['headers'])
extra_headers = watch.get_all_headers()
request_headers = self.datastore.get_all_headers()
request_headers.update(extra_headers)
# https://github.com/psf/requests/issues/4525

@ -3,7 +3,7 @@ from flask import (
)
from . model import App, Watch
from copy import deepcopy
from copy import deepcopy, copy
from os import path, unlink
from threading import Lock
import json
@ -474,8 +474,6 @@ class ChangeDetectionStore:
return proxy_list if len(proxy_list) else None
def get_preferred_proxy_for_watch(self, uuid):
"""
Returns the preferred proxy by ID key
@ -507,6 +505,25 @@ class ChangeDetectionStore:
return None
@property
def has_extra_headers_file(self):
filepath = os.path.join(self.datastore_path, 'headers.txt')
return os.path.isfile(filepath)
def get_all_headers(self):
from .model.App import parse_headers_from_text_file
headers = copy(self.data['settings'].get('headers', {}))
filepath = os.path.join(self.datastore_path, 'headers.txt')
try:
if os.path.isfile(filepath):
headers.update(parse_headers_from_text_file(filepath))
except Exception as e:
print(f"ERROR reading headers.txt at {filepath}", str(e))
return headers
# Run all updates
# IMPORTANT - Each update could be run even when they have a new install and the schema is correct
# So therefor - each `update_n` should be very careful about checking if it needs to actually run

@ -152,6 +152,15 @@
{{ render_field(form.headers, rows=5, placeholder="Example
Cookie: foobar
User-Agent: wonderbra 1.0") }}
<div class="pure-form-message-inline">
{% if has_extra_headers_file %}
<strong>Alert! Extra headers file found and will be added to this watch!</strong>
{% else %}
Headers can be also read from a file in your data-directory <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Adding-headers-from-an-external-file">Read more here</a>
{% endif %}
</div>
</div>
<div class="pure-control-group" id="request-body">
{{ render_field(form.body, rows=5, placeholder="Example

@ -14,13 +14,16 @@ global app
def cleanup(datastore_path):
# Unlink test output files
files = ['output.txt',
'url-watches.json',
'secret.txt',
'notification.txt',
'count.txt',
'endpoint-content.txt'
]
files = [
'count.txt',
'endpoint-content.txt'
'headers.txt',
'headers-testtag.txt',
'notification.txt',
'secret.txt',
'url-watches.json',
'output.txt',
]
for file in files:
try:
os.unlink("{}/{}".format(datastore_path, file))

@ -1,7 +1,8 @@
import json
import os
import time
from flask import url_for
from . util import set_original_response, set_modified_response, live_server_setup
from . util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks, extract_UUID_from_client
def test_setup(live_server):
live_server_setup(live_server)
@ -234,3 +235,72 @@ def test_method_in_request(client, live_server):
# Should be only one with method set to PATCH
assert watches_with_method == 1
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
def test_headers_textfile_in_request(client, live_server):
#live_server_setup(live_server)
# Add our URL to the import page
test_url = url_for('test_headers', _external=True)
# Add the test URL twice, we will check
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
time.sleep(1)
# Add some headers to a request
res = client.post(
url_for("edit_page", uuid="first"),
data={
"url": test_url,
"tag": "testtag",
"fetch_backend": "html_requests",
"headers": "xxx:ooo\ncool:yeah\r\n"},
follow_redirects=True
)
assert b"Updated watch." in res.data
wait_for_all_checks(client)
with open('test-datastore/headers-testtag.txt', 'w') as f:
f.write("tag-header: test")
with open('test-datastore/headers.txt', 'w') as f:
f.write("global-header: nice\r\nnext-global-header: nice")
with open('test-datastore/'+extract_UUID_from_client(client)+'/headers.txt', 'w') as f:
f.write("watch-header: nice")
client.get(url_for("form_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
wait_for_all_checks(client)
res = client.get(url_for("edit_page", uuid="first"))
assert b"Extra headers file found and will be added to this watch" in res.data
# Not needed anymore
os.unlink('test-datastore/headers.txt')
os.unlink('test-datastore/headers-testtag.txt')
os.unlink('test-datastore/'+extract_UUID_from_client(client)+'/headers.txt')
# The service should echo back the request verb
res = client.get(
url_for("preview_page", uuid="first"),
follow_redirects=True
)
assert b"Global-Header:nice" in res.data
assert b"Next-Global-Header:nice" in res.data
assert b"Xxx:ooo" in res.data
assert b"Watch-Header:nice" in res.data
assert b"Tag-Header:test" in res.data
#unlink headers.txt on start/stop
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
Loading…
Cancel
Save