From 9e954532d602c61a07b9f6d4d2453b997355cf57 Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Mon, 22 May 2023 17:19:52 +0200 Subject: [PATCH] Fetcher - Ability to specify headers from a textfile per watch, global or per tag ( https://github.com/dgtlmoon/changedetection.io/wiki/Adding-headers-from-an-external-file ) --- changedetectionio/__init__.py | 2 + changedetectionio/model/App.py | 12 ++++ changedetectionio/model/Watch.py | 34 +++++++++ .../processors/text_json_diff.py | 5 +- changedetectionio/store.py | 23 +++++- changedetectionio/templates/edit.html | 9 +++ changedetectionio/tests/conftest.py | 17 +++-- changedetectionio/tests/test_request.py | 72 ++++++++++++++++++- 8 files changed, 160 insertions(+), 14 deletions(-) diff --git a/changedetectionio/__init__.py b/changedetectionio/__init__.py index 8b3e8e39..67d2f0be 100644 --- a/changedetectionio/__init__.py +++ b/changedetectionio/__init__.py @@ -700,6 +700,7 @@ def changedetection_app(config=None, datastore_o=None): form=form, has_default_notification_urls=True if len(datastore.data['settings']['application']['notification_urls']) else False, has_empty_checktime=using_default_check_time, + has_extra_headers_file=watch.has_extra_headers_file or datastore.has_extra_headers_file, is_html_webdriver=is_html_webdriver, jq_support=jq_support, playwright_enabled=os.getenv('PLAYWRIGHT_DRIVER_URL', False), @@ -1444,6 +1445,7 @@ def check_for_new_version(): # Check daily app.config.exit.wait(86400) + def notification_runner(): global notification_debug_log from datetime import datetime diff --git a/changedetectionio/model/App.py b/changedetectionio/model/App.py index 7c7cac9f..54580b3d 100644 --- a/changedetectionio/model/App.py +++ b/changedetectionio/model/App.py @@ -49,3 +49,15 @@ class model(dict): def __init__(self, *arg, **kw): super(model, self).__init__(*arg, **kw) self.update(self.base_config) + + +def parse_headers_from_text_file(filepath): + headers = {} + with open(filepath, 'r') as f: + for l in f.readlines(): + l = l.strip() + if not l.startswith('#') and ':' in l: + (k, v) = l.split(':') + headers[k.strip()] = v.strip() + + return headers \ No newline at end of file diff --git a/changedetectionio/model/Watch.py b/changedetectionio/model/Watch.py index ca654d04..77c07497 100644 --- a/changedetectionio/model/Watch.py +++ b/changedetectionio/model/Watch.py @@ -473,6 +473,40 @@ class model(dict): # None is set return False + @property + def has_extra_headers_file(self): + if os.path.isfile(os.path.join(self.watch_data_dir, 'headers.txt')): + return True + + for f in self.all_tags: + fname = "headers-"+re.sub(r'[\W_]', '', f).lower().strip() + ".txt" + filepath = os.path.join(self.__datastore_path, fname) + if os.path.isfile(filepath): + return True + + return False + + def get_all_headers(self): + from .App import parse_headers_from_text_file + headers = self.get('headers', {}).copy() + # Available headers on the disk could 'headers.txt' in the watch data dir + filepath = os.path.join(self.watch_data_dir, 'headers.txt') + try: + if os.path.isfile(filepath): + headers.update(parse_headers_from_text_file(filepath)) + except Exception as e: + print(f"ERROR reading headers.txt at {filepath}", str(e)) + + # Or each by tag, as tagname.txt in the main datadir + for f in self.all_tags: + fname = "headers-"+re.sub(r'[\W_]', '', f).lower().strip() + ".txt" + filepath = os.path.join(self.__datastore_path, fname) + try: + if os.path.isfile(filepath): + headers.update(parse_headers_from_text_file(filepath)) + except Exception as e: + print(f"ERROR reading headers.txt at {filepath}", str(e)) + return headers def get_last_fetched_before_filters(self): import brotli diff --git a/changedetectionio/processors/text_json_diff.py b/changedetectionio/processors/text_json_diff.py index cf85522a..f767703b 100644 --- a/changedetectionio/processors/text_json_diff.py +++ b/changedetectionio/processors/text_json_diff.py @@ -70,10 +70,9 @@ class perform_site_check(difference_detection_processor): # Unset any existing notification error update_obj = {'last_notification_error': False, 'last_error': False} - extra_headers = watch.get('headers', []) - # Tweak the base config with the per-watch ones - request_headers = deepcopy(self.datastore.data['settings']['headers']) + extra_headers = watch.get_all_headers() + request_headers = self.datastore.get_all_headers() request_headers.update(extra_headers) # https://github.com/psf/requests/issues/4525 diff --git a/changedetectionio/store.py b/changedetectionio/store.py index f69eb907..5e071ce5 100644 --- a/changedetectionio/store.py +++ b/changedetectionio/store.py @@ -3,7 +3,7 @@ from flask import ( ) from . model import App, Watch -from copy import deepcopy +from copy import deepcopy, copy from os import path, unlink from threading import Lock import json @@ -474,8 +474,6 @@ class ChangeDetectionStore: return proxy_list if len(proxy_list) else None - - def get_preferred_proxy_for_watch(self, uuid): """ Returns the preferred proxy by ID key @@ -507,6 +505,25 @@ class ChangeDetectionStore: return None + @property + def has_extra_headers_file(self): + filepath = os.path.join(self.datastore_path, 'headers.txt') + return os.path.isfile(filepath) + + def get_all_headers(self): + from .model.App import parse_headers_from_text_file + headers = copy(self.data['settings'].get('headers', {})) + + filepath = os.path.join(self.datastore_path, 'headers.txt') + try: + if os.path.isfile(filepath): + headers.update(parse_headers_from_text_file(filepath)) + except Exception as e: + print(f"ERROR reading headers.txt at {filepath}", str(e)) + + return headers + + # Run all updates # IMPORTANT - Each update could be run even when they have a new install and the schema is correct # So therefor - each `update_n` should be very careful about checking if it needs to actually run diff --git a/changedetectionio/templates/edit.html b/changedetectionio/templates/edit.html index 805c79c7..40b1101f 100644 --- a/changedetectionio/templates/edit.html +++ b/changedetectionio/templates/edit.html @@ -152,6 +152,15 @@ {{ render_field(form.headers, rows=5, placeholder="Example Cookie: foobar User-Agent: wonderbra 1.0") }} + +
+ {% if has_extra_headers_file %} + Alert! Extra headers file found and will be added to this watch! + {% else %} + Headers can be also read from a file in your data-directory Read more here + {% endif %} +
+
{{ render_field(form.body, rows=5, placeholder="Example diff --git a/changedetectionio/tests/conftest.py b/changedetectionio/tests/conftest.py index 948c5792..754ec1fc 100644 --- a/changedetectionio/tests/conftest.py +++ b/changedetectionio/tests/conftest.py @@ -14,13 +14,16 @@ global app def cleanup(datastore_path): # Unlink test output files - files = ['output.txt', - 'url-watches.json', - 'secret.txt', - 'notification.txt', - 'count.txt', - 'endpoint-content.txt' - ] + files = [ + 'count.txt', + 'endpoint-content.txt' + 'headers.txt', + 'headers-testtag.txt', + 'notification.txt', + 'secret.txt', + 'url-watches.json', + 'output.txt', + ] for file in files: try: os.unlink("{}/{}".format(datastore_path, file)) diff --git a/changedetectionio/tests/test_request.py b/changedetectionio/tests/test_request.py index b3205d15..06e518e1 100644 --- a/changedetectionio/tests/test_request.py +++ b/changedetectionio/tests/test_request.py @@ -1,7 +1,8 @@ import json +import os import time from flask import url_for -from . util import set_original_response, set_modified_response, live_server_setup +from . util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks, extract_UUID_from_client def test_setup(live_server): live_server_setup(live_server) @@ -234,3 +235,72 @@ def test_method_in_request(client, live_server): # Should be only one with method set to PATCH assert watches_with_method == 1 + res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) + assert b'Deleted' in res.data + +def test_headers_textfile_in_request(client, live_server): + #live_server_setup(live_server) + # Add our URL to the import page + test_url = url_for('test_headers', _external=True) + + # Add the test URL twice, we will check + res = client.post( + url_for("import_page"), + data={"urls": test_url}, + follow_redirects=True + ) + assert b"1 Imported" in res.data + + time.sleep(1) + + + # Add some headers to a request + res = client.post( + url_for("edit_page", uuid="first"), + data={ + "url": test_url, + "tag": "testtag", + "fetch_backend": "html_requests", + "headers": "xxx:ooo\ncool:yeah\r\n"}, + follow_redirects=True + ) + assert b"Updated watch." in res.data + wait_for_all_checks(client) + + with open('test-datastore/headers-testtag.txt', 'w') as f: + f.write("tag-header: test") + + with open('test-datastore/headers.txt', 'w') as f: + f.write("global-header: nice\r\nnext-global-header: nice") + + with open('test-datastore/'+extract_UUID_from_client(client)+'/headers.txt', 'w') as f: + f.write("watch-header: nice") + + client.get(url_for("form_watch_checknow"), follow_redirects=True) + + # Give the thread time to pick it up + wait_for_all_checks(client) + + res = client.get(url_for("edit_page", uuid="first")) + assert b"Extra headers file found and will be added to this watch" in res.data + + # Not needed anymore + os.unlink('test-datastore/headers.txt') + os.unlink('test-datastore/headers-testtag.txt') + os.unlink('test-datastore/'+extract_UUID_from_client(client)+'/headers.txt') + # The service should echo back the request verb + res = client.get( + url_for("preview_page", uuid="first"), + follow_redirects=True + ) + + assert b"Global-Header:nice" in res.data + assert b"Next-Global-Header:nice" in res.data + assert b"Xxx:ooo" in res.data + assert b"Watch-Header:nice" in res.data + assert b"Tag-Header:test" in res.data + + + #unlink headers.txt on start/stop + res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) + assert b'Deleted' in res.data \ No newline at end of file