From bad0909cc2e40e0b4c280a05b76b4c94a693de4f Mon Sep 17 00:00:00 2001 From: bwees Date: Wed, 19 Oct 2022 18:42:04 -0400 Subject: [PATCH 01/10] added external header server --- changedetectionio/fetch_site_status.py | 15 ++++++++++++++- changedetectionio/forms.py | 1 + changedetectionio/model/Watch.py | 1 + changedetectionio/templates/edit.html | 6 ++++++ 4 files changed, 22 insertions(+), 1 deletion(-) diff --git a/changedetectionio/fetch_site_status.py b/changedetectionio/fetch_site_status.py index 6c3dbec8..1b1a4827 100644 --- a/changedetectionio/fetch_site_status.py +++ b/changedetectionio/fetch_site_status.py @@ -4,7 +4,8 @@ import os import re import time import urllib3 - +import requests +import simplejson from changedetectionio import content_fetcher, html_tools urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) @@ -67,6 +68,18 @@ class perform_site_check(): # Tweak the base config with the per-watch ones request_headers = self.datastore.data['settings']['headers'].copy() + + if self.datastore.get_val(uuid, 'external_header_server') is not None: + try: + resp = requests.get(self.datastore.get_val(uuid, 'external_header_server')) + if resp.status_code != 200: + raise Exception("External header server returned non-200 response. Please check the URL for the server") + + request_headers.update(resp.json()) + + except simplejson.errors.JSONDecodeError: + raise Exception("Failed to decode JSON response from external header server") + request_headers.update(extra_headers) # https://github.com/psf/requests/issues/4525 diff --git a/changedetectionio/forms.py b/changedetectionio/forms.py index 4ad1b1a7..96ac0e68 100644 --- a/changedetectionio/forms.py +++ b/changedetectionio/forms.py @@ -336,6 +336,7 @@ class watchForm(commonSettingsForm): title = StringField('Title', default='') ignore_text = StringListField('Ignore text', [ValidateListRegex()]) + external_header_server = fields.URLField('External Header Server', validators=[validateURL()]) headers = StringDictKeyValue('Request headers') body = TextAreaField('Request body', [validators.Optional()]) method = SelectField('Request method', choices=valid_method, default=default_method) diff --git a/changedetectionio/model/Watch.py b/changedetectionio/model/Watch.py index acfd9117..9e436b79 100644 --- a/changedetectionio/model/Watch.py +++ b/changedetectionio/model/Watch.py @@ -25,6 +25,7 @@ class model(dict): 'previous_md5': False, 'uuid': str(uuid_builder.uuid4()), 'headers': {}, # Extra headers to send + 'external_header_server': None, # URL to a server that will return headers 'body': None, 'method': 'GET', #'history': {}, # Dict of timestamp and output stripped filename diff --git a/changedetectionio/templates/edit.html b/changedetectionio/templates/edit.html index b13afe46..0012adbb 100644 --- a/changedetectionio/templates/edit.html +++ b/changedetectionio/templates/edit.html @@ -109,6 +109,12 @@
{{ render_field(form.method) }}
+
+ {{ render_field(form.external_header_server, placeholder="http://example.com/watch1") }} +
+ The watch will perform a GET request before each check to this URL and will use the headers in addition to the ones listed below and in global settings. More help and examples here +
+
{{ render_field(form.headers, rows=5, placeholder="Example Cookie: foobar From 0d5820932fd12b3e533fd2d18a420225e48dbf78 Mon Sep 17 00:00:00 2001 From: bwees Date: Wed, 19 Oct 2022 18:45:43 -0400 Subject: [PATCH 02/10] rename branch --- changedetectionio/fetch_site_status.py | 1 - 1 file changed, 1 deletion(-) diff --git a/changedetectionio/fetch_site_status.py b/changedetectionio/fetch_site_status.py index c577c563..2fb16ed4 100644 --- a/changedetectionio/fetch_site_status.py +++ b/changedetectionio/fetch_site_status.py @@ -10,7 +10,6 @@ from changedetectionio import content_fetcher, html_tools urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - # Some common stuff here that can be moved to a base class # (set_proxy_from_list) class perform_site_check(): From 495e322c9ec41b619e30fa9699615ef25f2b9e17 Mon Sep 17 00:00:00 2001 From: bwees Date: Wed, 19 Oct 2022 18:55:05 -0400 Subject: [PATCH 03/10] fixed import errors --- changedetectionio/fetch_site_status.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/changedetectionio/fetch_site_status.py b/changedetectionio/fetch_site_status.py index 2fb16ed4..61b9a40a 100644 --- a/changedetectionio/fetch_site_status.py +++ b/changedetectionio/fetch_site_status.py @@ -2,10 +2,9 @@ import hashlib import logging import os import re -import time import urllib3 import requests -import simplejson +import json from changedetectionio import content_fetcher, html_tools urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) @@ -57,17 +56,18 @@ class perform_site_check(): # Tweak the base config with the per-watch ones request_headers = self.datastore.data['settings']['headers'].copy() - if self.datastore.get_val(uuid, 'external_header_server') is not None: + if self.datastore.data['watching'][uuid].get('external_header_server') is not None: try: - resp = requests.get(self.datastore.get_val(uuid, 'external_header_server')) + resp = requests.get(self.datastore.data['watching'][uuid].get('external_header_server')) if resp.status_code != 200: raise Exception("External header server returned non-200 response. Please check the URL for the server") + data = json.loads(resp.text.strip()) request_headers.update(resp.json()) - except simplejson.errors.JSONDecodeError: + except json.decoder.JSONDecodeError: raise Exception("Failed to decode JSON response from external header server") - + request_headers.update(extra_headers) # https://github.com/psf/requests/issues/4525 From 0a2644d0c37faeb189af9c949a5f8c7e0635a0d4 Mon Sep 17 00:00:00 2001 From: bwees Date: Wed, 19 Oct 2022 18:58:54 -0400 Subject: [PATCH 04/10] fix tests --- changedetectionio/tests/test_auth.py | 2 +- changedetectionio/tests/test_css_selector.py | 2 +- changedetectionio/tests/test_ignorestatuscode.py | 2 +- changedetectionio/tests/test_share_watch.py | 2 +- changedetectionio/tests/test_xpath_selector.py | 10 +++++----- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/changedetectionio/tests/test_auth.py b/changedetectionio/tests/test_auth.py index f8d1437e..38f89721 100644 --- a/changedetectionio/tests/test_auth.py +++ b/changedetectionio/tests/test_auth.py @@ -23,7 +23,7 @@ def test_basic_auth(client, live_server): # Check form validation res = client.post( url_for("edit_page", uuid="first"), - data={"css_filter": "", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, + data={"css_filter": "", "url": test_url, "tag": "", "headers": "", "external_header_server": "", 'fetch_backend': "html_requests"}, follow_redirects=True ) assert b"Updated watch." in res.data diff --git a/changedetectionio/tests/test_css_selector.py b/changedetectionio/tests/test_css_selector.py index ab234ddb..d7d0a1af 100644 --- a/changedetectionio/tests/test_css_selector.py +++ b/changedetectionio/tests/test_css_selector.py @@ -98,7 +98,7 @@ def test_check_markup_css_filter_restriction(client, live_server): # Add our URL to the import page res = client.post( url_for("edit_page", uuid="first"), - data={"css_filter": css_filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, + data={"css_filter": css_filter, "url": test_url, "tag": "", "headers": "", "external_header_server": "",'fetch_backend': "html_requests"}, follow_redirects=True ) assert b"Updated watch." in res.data diff --git a/changedetectionio/tests/test_ignorestatuscode.py b/changedetectionio/tests/test_ignorestatuscode.py index aeafcdaa..15aa1d78 100644 --- a/changedetectionio/tests/test_ignorestatuscode.py +++ b/changedetectionio/tests/test_ignorestatuscode.py @@ -114,7 +114,7 @@ def test_403_page_check_works_with_ignore_status_code(client, live_server): # Add our URL to the import page res = client.post( url_for("edit_page", uuid="first"), - data={"ignore_status_codes": "y", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, + data={"ignore_status_codes": "y", "url": test_url, "tag": "", "headers": "", "external_header_server": "",'fetch_backend': "html_requests"}, follow_redirects=True ) assert b"Updated watch." in res.data diff --git a/changedetectionio/tests/test_share_watch.py b/changedetectionio/tests/test_share_watch.py index 620bda03..5f7eff36 100644 --- a/changedetectionio/tests/test_share_watch.py +++ b/changedetectionio/tests/test_share_watch.py @@ -29,7 +29,7 @@ def test_share_watch(client, live_server): # Add our URL to the import page res = client.post( url_for("edit_page", uuid="first"), - data={"css_filter": css_filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, + data={"css_filter": css_filter, "url": test_url, "tag": "", "headers": "", "external_header_server": "",'fetch_backend': "html_requests"}, follow_redirects=True ) assert b"Updated watch." in res.data diff --git a/changedetectionio/tests/test_xpath_selector.py b/changedetectionio/tests/test_xpath_selector.py index 4e417a74..9be957d6 100644 --- a/changedetectionio/tests/test_xpath_selector.py +++ b/changedetectionio/tests/test_xpath_selector.py @@ -89,7 +89,7 @@ def test_check_xpath_filter_utf8(client, live_server): time.sleep(1) res = client.post( url_for("edit_page", uuid="first"), - data={"css_filter": filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, + data={"css_filter": filter, "url": test_url, "tag": "", "headers": "", "external_header_server": "",'fetch_backend': "html_requests"}, follow_redirects=True ) assert b"Updated watch." in res.data @@ -143,7 +143,7 @@ def test_check_xpath_text_function_utf8(client, live_server): time.sleep(1) res = client.post( url_for("edit_page", uuid="first"), - data={"css_filter": filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, + data={"css_filter": filter, "url": test_url, "tag": "", "headers": "", "external_header_server": "",'fetch_backend': "html_requests"}, follow_redirects=True ) assert b"Updated watch." in res.data @@ -192,7 +192,7 @@ def test_check_markup_xpath_filter_restriction(client, live_server): # Add our URL to the import page res = client.post( url_for("edit_page", uuid="first"), - data={"css_filter": xpath_filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, + data={"css_filter": xpath_filter, "url": test_url, "tag": "", "headers": "", "external_header_server": "",'fetch_backend': "html_requests"}, follow_redirects=True ) assert b"Updated watch." in res.data @@ -233,7 +233,7 @@ def test_xpath_validation(client, live_server): res = client.post( url_for("edit_page", uuid="first"), - data={"css_filter": "/something horrible", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, + data={"css_filter": "/something horrible", "url": test_url, "tag": "", "headers": "", "external_header_server": "",'fetch_backend': "html_requests"}, follow_redirects=True ) assert b"is not a valid XPath expression" in res.data @@ -263,7 +263,7 @@ def test_check_with_prefix_css_filter(client, live_server): res = client.post( url_for("edit_page", uuid="first"), - data={"css_filter": "xpath://*[contains(@class, 'sametext')]", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, + data={"css_filter": "xpath://*[contains(@class, 'sametext')]", "url": test_url, "tag": "", "headers": "", "external_header_server": "",'fetch_backend': "html_requests"}, follow_redirects=True ) From 296c7c46cbc14d242326237ee77612d2cff48277 Mon Sep 17 00:00:00 2001 From: bwees Date: Wed, 19 Oct 2022 19:00:38 -0400 Subject: [PATCH 05/10] fixed empty field errors --- changedetectionio/fetch_site_status.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changedetectionio/fetch_site_status.py b/changedetectionio/fetch_site_status.py index 61b9a40a..75118ef5 100644 --- a/changedetectionio/fetch_site_status.py +++ b/changedetectionio/fetch_site_status.py @@ -56,7 +56,7 @@ class perform_site_check(): # Tweak the base config with the per-watch ones request_headers = self.datastore.data['settings']['headers'].copy() - if self.datastore.data['watching'][uuid].get('external_header_server') is not None: + if self.datastore.data['watching'][uuid].get('external_header_server') is not None or self.datastore.data['watching'][uuid].get('external_header_server') != "": try: resp = requests.get(self.datastore.data['watching'][uuid].get('external_header_server')) if resp.status_code != 200: From 83161e4fa36467ada9b75eb4c06ac48172706862 Mon Sep 17 00:00:00 2001 From: bwees Date: Wed, 19 Oct 2022 19:03:01 -0400 Subject: [PATCH 06/10] fixed string None case --- changedetectionio/fetch_site_status.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changedetectionio/fetch_site_status.py b/changedetectionio/fetch_site_status.py index 75118ef5..743c2bc9 100644 --- a/changedetectionio/fetch_site_status.py +++ b/changedetectionio/fetch_site_status.py @@ -56,7 +56,7 @@ class perform_site_check(): # Tweak the base config with the per-watch ones request_headers = self.datastore.data['settings']['headers'].copy() - if self.datastore.data['watching'][uuid].get('external_header_server') is not None or self.datastore.data['watching'][uuid].get('external_header_server') != "": + if self.datastore.data['watching'][uuid].get('external_header_server') is not None or self.datastore.data['watching'][uuid].get('external_header_server') != "" and self.datastore.data['watching'][uuid].get('external_header_server') != "None": try: resp = requests.get(self.datastore.data['watching'][uuid].get('external_header_server')) if resp.status_code != 200: From 76fd27dfabb3d2575e022844ef932db104943a5d Mon Sep 17 00:00:00 2001 From: bwees Date: Wed, 19 Oct 2022 19:10:01 -0400 Subject: [PATCH 07/10] fix logic error --- changedetectionio/fetch_site_status.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changedetectionio/fetch_site_status.py b/changedetectionio/fetch_site_status.py index 743c2bc9..32d8c8a8 100644 --- a/changedetectionio/fetch_site_status.py +++ b/changedetectionio/fetch_site_status.py @@ -56,7 +56,7 @@ class perform_site_check(): # Tweak the base config with the per-watch ones request_headers = self.datastore.data['settings']['headers'].copy() - if self.datastore.data['watching'][uuid].get('external_header_server') is not None or self.datastore.data['watching'][uuid].get('external_header_server') != "" and self.datastore.data['watching'][uuid].get('external_header_server') != "None": + if self.datastore.data['watching'][uuid].get('external_header_server') is not None and self.datastore.data['watching'][uuid].get('external_header_server') != "" and self.datastore.data['watching'][uuid].get('external_header_server') != "None": try: resp = requests.get(self.datastore.data['watching'][uuid].get('external_header_server')) if resp.status_code != 200: From 852a69862959a20c04ae1045ca3cf22e09492a97 Mon Sep 17 00:00:00 2001 From: bwees Date: Wed, 19 Oct 2022 19:14:01 -0400 Subject: [PATCH 08/10] add optional for field --- changedetectionio/forms.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changedetectionio/forms.py b/changedetectionio/forms.py index 627c8561..34c54bda 100644 --- a/changedetectionio/forms.py +++ b/changedetectionio/forms.py @@ -358,7 +358,7 @@ class watchForm(commonSettingsForm): title = StringField('Title', default='') ignore_text = StringListField('Ignore text', [ValidateListRegex()]) - external_header_server = fields.URLField('External Header Server', validators=[validateURL()]) + external_header_server = fields.URLField('External Header Server', validators=[validators.Optional(), validateURL()]) headers = StringDictKeyValue('Request headers') body = TextAreaField('Request body', [validators.Optional()]) method = SelectField('Request method', choices=valid_method, default=default_method) From 724cb172247ce279d9acd0063a347fa82a7ca5bf Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Mon, 24 Oct 2022 23:20:39 +0200 Subject: [PATCH 09/10] Re #1052 - Dynamic URLs, use variables in the URL (such as the current date, the date in a month, and other logic see https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL ) (#1057) --- changedetectionio/fetch_site_status.py | 8 ++++++- changedetectionio/templates/edit.html | 3 ++- changedetectionio/tests/test_jinja2.py | 33 ++++++++++++++++++++++++++ changedetectionio/tests/util.py | 5 ++++ requirements.txt | 4 ++++ 5 files changed, 51 insertions(+), 2 deletions(-) create mode 100644 changedetectionio/tests/test_jinja2.py diff --git a/changedetectionio/fetch_site_status.py b/changedetectionio/fetch_site_status.py index 0f84da16..76c821be 100644 --- a/changedetectionio/fetch_site_status.py +++ b/changedetectionio/fetch_site_status.py @@ -35,6 +35,8 @@ class perform_site_check(): def run(self, uuid): + from jinja2 import Environment + changed_detected = False screenshot = False # as bytes stripped_text_from_html = "" @@ -65,7 +67,11 @@ class perform_site_check(): request_headers['Accept-Encoding'] = request_headers['Accept-Encoding'].replace(', br', '') timeout = self.datastore.data['settings']['requests'].get('timeout') - url = watch.get('url') + + # Jinja2 available in URLs along with https://pypi.org/project/jinja2-time/ + jinja2_env = Environment(extensions=['jinja2_time.TimeExtension']) + url = str(jinja2_env.from_string(watch.get('url')).render()) + request_body = self.datastore.data['watching'][uuid].get('body') request_method = self.datastore.data['watching'][uuid].get('method') ignore_status_codes = self.datastore.data['watching'][uuid].get('ignore_status_codes', False) diff --git a/changedetectionio/templates/edit.html b/changedetectionio/templates/edit.html index 59d95317..66286314 100644 --- a/changedetectionio/templates/edit.html +++ b/changedetectionio/templates/edit.html @@ -40,7 +40,8 @@
{{ render_field(form.url, placeholder="https://...", required=true, class="m-d") }} - Some sites use JavaScript to create the content, for this you should use the Chrome/WebDriver Fetcher + Some sites use JavaScript to create the content, for this you should use the Chrome/WebDriver Fetcher
+ You can use variables in the URL, perfect for inserting the current date and other logic, help and examples here
{{ render_field(form.title, class="m-d") }} diff --git a/changedetectionio/tests/test_jinja2.py b/changedetectionio/tests/test_jinja2.py new file mode 100644 index 00000000..9c6baa9f --- /dev/null +++ b/changedetectionio/tests/test_jinja2.py @@ -0,0 +1,33 @@ +#!/usr/bin/python3 + +import time +from flask import url_for +from .util import live_server_setup + + +# If there was only a change in the whitespacing, then we shouldnt have a change detected +def test_jinja2_in_url_query(client, live_server): + live_server_setup(live_server) + + # Give the endpoint time to spin up + time.sleep(1) + + # Add our URL to the import page + test_url = url_for('test_return_query', _external=True) + + # because url_for() will URL-encode the var, but we dont here + full_url = "{}?{}".format(test_url, + "date={% now 'Europe/Berlin', '%Y' %}.{% now 'Europe/Berlin', '%m' %}.{% now 'Europe/Berlin', '%d' %}", ) + res = client.post( + url_for("form_quick_watch_add"), + data={"url": full_url, "tag": "test"}, + follow_redirects=True + ) + assert b"Watch added" in res.data + time.sleep(3) + # It should report nothing found (no new 'unviewed' class) + res = client.get( + url_for("preview_page", uuid="first"), + follow_redirects=True + ) + assert b'date=2' in res.data diff --git a/changedetectionio/tests/util.py b/changedetectionio/tests/util.py index e93d9a40..b943a606 100644 --- a/changedetectionio/tests/util.py +++ b/changedetectionio/tests/util.py @@ -159,5 +159,10 @@ def live_server_setup(live_server): ret = " ".join([auth.username, auth.password, auth.type]) return ret + # Just return some GET var + @live_server.app.route('/test-return-query', methods=['GET']) + def test_return_query(): + return request.query_string + live_server.start() diff --git a/requirements.txt b/requirements.txt index 500f45f9..4bf29823 100644 --- a/requirements.txt +++ b/requirements.txt @@ -46,5 +46,9 @@ selenium ~= 4.1.0 # need to revisit flask login versions werkzeug ~= 2.0.0 +# Templating, so far just in the URLs but in the future can be for the notifications also +jinja2 +jinja2-time + # playwright is installed at Dockerfile build time because it's not available on all platforms From 72834a42fd1be52a9502b49ae6c5cba8880e350e Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Thu, 27 Oct 2022 12:35:26 +0200 Subject: [PATCH 10/10] Backups and Snapshots - Data directory now fully portable, (all paths are relative) , refactored backup zip export creation --- changedetectionio/__init__.py | 15 +++--- changedetectionio/download.zip | Bin 0 -> 2083 bytes changedetectionio/model/Watch.py | 71 ++++++++++++++++--------- changedetectionio/tests/test_backup.py | 32 +++++++++-- 4 files changed, 81 insertions(+), 37 deletions(-) create mode 100644 changedetectionio/download.zip diff --git a/changedetectionio/__init__.py b/changedetectionio/__init__.py index 8bbb747d..383c5bf7 100644 --- a/changedetectionio/__init__.py +++ b/changedetectionio/__init__.py @@ -987,9 +987,6 @@ def changedetection_app(config=None, datastore_o=None): # create a ZipFile object backupname = "changedetection-backup-{}.zip".format(int(time.time())) - - # We only care about UUIDS from the current index file - uuids = list(datastore.data['watching'].keys()) backup_filepath = os.path.join(datastore_o.datastore_path, backupname) with zipfile.ZipFile(backup_filepath, "w", @@ -1005,12 +1002,12 @@ def changedetection_app(config=None, datastore_o=None): # Add the flask app secret zipObj.write(os.path.join(datastore_o.datastore_path, "secret.txt"), arcname="secret.txt") - # Add any snapshot data we find, use the full path to access the file, but make the file 'relative' in the Zip. - for txt_file_path in Path(datastore_o.datastore_path).rglob('*.txt'): - parent_p = txt_file_path.parent - if parent_p.name in uuids: - zipObj.write(txt_file_path, - arcname=str(txt_file_path).replace(datastore_o.datastore_path, ''), + # Add any data in the watch data directory. + for uuid, w in datastore.data['watching'].items(): + for f in Path(w.watch_data_dir).glob('*'): + zipObj.write(f, + # Use the full path to access the file, but make the file 'relative' in the Zip. + arcname=os.path.join(f.parts[-2], f.parts[-1]), compress_type=zipfile.ZIP_DEFLATED, compresslevel=8) diff --git a/changedetectionio/download.zip b/changedetectionio/download.zip new file mode 100644 index 0000000000000000000000000000000000000000..b4658ada34a5d6b6c022ae832c7ba91ec5aaf905 GIT binary patch literal 2083 zcmWIWW@Zs#U|`^2*c=rdy5X@{*$rj}hEg5|1_7XGX;F@Dd16U&MryHMR&jpb(Jl#XMT?%S zo|t^kqb_1|*)L_SzkFX#A%A>Hs9AwmsYJj{p?KAhF5F3UT(klK0iNi%EJXwkN>63~kT#MUF z?uYDZxHI!!(Z2Mo-i@*?S03H(KbI`dnX@1~KkDh36|et3+Fkry;%vA3?i*1VYd3N~ zYnjaYPMop0)>h+0bqb$o?~IL`7gTDOTW{&^J5>HG>azC16&7L2i_aCWNZ%^Gq%zg? zf=5Q-tZjaV-SuBh;`5@OuB^M(5aJ^6hR3?RXo7E|@h!#o?YqvMy?f?|&0>*Rjg8gs zx^9YdPCiqms~Nd$iJZ+j&39`nBVI`ATdQ>ZVGNwV;vp~p+zQ^0=g#huIKtE?zT}I< zx27GFXZ?ED#Kf4f<;SW5wfTOZ8M6~Pvrn^VMQHqTec7G-Ltq}eqRvRxE}3k9dj7gNQMawt{=bjR)t^#n*FFFDx3hYj6Mf{} z6z>0-ue;%2|NZ~nUlNbR=`1@S-?P_Rar#p6V^6Zw9C&{3m5P3#duYRS9t-zHp7|ZV z5?L#*IxN5UE#FOO&f<`1C$=rKZt4AP|4&_}@$NnOn{s6@Bm`0!tz!+jU;USp{WVQu zovF(q2YshVmJ`d{kNM4%68U{hPj>6Qy^3z-?O{RTZyO)~5suKBxM}(4wWeGA65K@= z{LS<~llb|sw(Y;f1AmTRpZ++1y~tvfDjB}x558&E9skxJ|I%HzgX7riFTXbXO?tAq zPGnQ&ykqtsfAh8aGyZ=WrRMaaK`?w*@b&5Uop(%Yd-OG@B_pCEG;n$5-6h!`n{%R{ zhn`!wW}A(ESb&f5%yZ!%0zYpOoSl2Bd(#>ty-i=Ux+mmsoAEhU&1H|ns$|QYJ(D}@ z*I!;QZo_MNyLb7sf|Spq@z3|HnPeJ&-7{|8>Xg|}SGPQRdelsNvk^!yYqOfLHR7%^|UFGXT3E~oIj)e@F^3Tfs7o-5;TAYsR6MVl7YJAnI##zC5h?9L>e9V0BklR zlL#~J8U$zo7;I?-QRwwafHy)nwv-3a%fPUuk&Oj89U|+)maGu^5`p@NiZo&q8M@Wz zDFk7)AS;npBc&H))3L=q!t}{N(?P)rOAF{a(W4om^9hnp)JPBTW@Q7ZV*|pojKBf_ G%mV Creating data dir {}".format(target_path)) - os.mkdir(target_path) + if not os.path.isdir(self.watch_data_dir): + print ("> Creating data dir {}".format(self.watch_data_dir)) + os.mkdir(self.watch_data_dir) @property def label(self): @@ -109,18 +110,39 @@ class model(dict): @property def history(self): + """History index is just a text file as a list + {watch-uuid}/history.txt + + contains a list like + + {epoch-time},{filename}\n + + We read in this list as the history information + + """ tmp_history = {} - import logging - import time # Read the history file as a dict - fname = os.path.join(self.__datastore_path, self.get('uuid'), "history.txt") + fname = os.path.join(self.watch_data_dir, "history.txt") if os.path.isfile(fname): logging.debug("Reading history index " + str(time.time())) with open(fname, "r") as f: for i in f.readlines(): if ',' in i: k, v = i.strip().split(',', 2) + + # The index history could contain a relative path, so we need to make the fullpath + # so that python can read it + if not '/' in v and not '\'' in v: + v = os.path.join(self.watch_data_dir, v) + else: + # It's possible that they moved the datadir on older versions + # So the snapshot exists but is in a different path + snapshot_fname = v.split('/')[-1] + proposed_new_path = os.path.join(self.watch_data_dir, snapshot_fname) + if not os.path.exists(v) and os.path.exists(proposed_new_path): + v = proposed_new_path + tmp_history[k] = v if len(tmp_history): @@ -132,7 +154,7 @@ class model(dict): @property def has_history(self): - fname = os.path.join(self.__datastore_path, self.get('uuid'), "history.txt") + fname = os.path.join(self.watch_data_dir, "history.txt") return os.path.isfile(fname) # Returns the newest key, but if theres only 1 record, then it's counted as not being new, so return 0. @@ -151,25 +173,19 @@ class model(dict): # Save some text file to the appropriate path and bump the history # result_obj from fetch_site_status.run() def save_history_text(self, contents, timestamp): - import uuid - import logging - - output_path = os.path.join(self.__datastore_path, self['uuid']) self.ensure_data_dir_exists() - snapshot_fname = os.path.join(output_path, str(uuid.uuid4())) - - logging.debug("Saving history text {}".format(snapshot_fname)) + snapshot_fname = "{}.txt".format(str(uuid.uuid4())) # in /diff/ and /preview/ we are going to assume for now that it's UTF-8 when reading # most sites are utf-8 and some are even broken utf-8 - with open(snapshot_fname, 'wb') as f: + with open(os.path.join(self.watch_data_dir, snapshot_fname), 'wb') as f: f.write(contents) f.close() # Append to index # @todo check last char was \n - index_fname = os.path.join(output_path, "history.txt") + index_fname = os.path.join(self.watch_data_dir, "history.txt") with open(index_fname, 'a') as f: f.write("{},{}\n".format(timestamp, snapshot_fname)) f.close() @@ -210,14 +226,14 @@ class model(dict): return not local_lines.issubset(existing_history) def get_screenshot(self): - fname = os.path.join(self.__datastore_path, self['uuid'], "last-screenshot.png") + fname = os.path.join(self.watch_data_dir, "last-screenshot.png") if os.path.isfile(fname): return fname return False def __get_file_ctime(self, filename): - fname = os.path.join(self.__datastore_path, self['uuid'], filename) + fname = os.path.join(self.watch_data_dir, filename) if os.path.isfile(fname): return int(os.path.getmtime(fname)) return False @@ -242,9 +258,14 @@ class model(dict): def snapshot_error_screenshot_ctime(self): return self.__get_file_ctime('last-error-screenshot.png') + @property + def watch_data_dir(self): + # The base dir of the watch data + return os.path.join(self.__datastore_path, self['uuid']) + def get_error_text(self): """Return the text saved from a previous request that resulted in a non-200 error""" - fname = os.path.join(self.__datastore_path, self['uuid'], "last-error.txt") + fname = os.path.join(self.watch_data_dir, "last-error.txt") if os.path.isfile(fname): with open(fname, 'r') as f: return f.read() @@ -252,7 +273,7 @@ class model(dict): def get_error_snapshot(self): """Return path to the screenshot that resulted in a non-200 error""" - fname = os.path.join(self.__datastore_path, self['uuid'], "last-error-screenshot.png") + fname = os.path.join(self.watch_data_dir, "last-error-screenshot.png") if os.path.isfile(fname): return fname return False diff --git a/changedetectionio/tests/test_backup.py b/changedetectionio/tests/test_backup.py index 787d7fc0..89fd66a5 100644 --- a/changedetectionio/tests/test_backup.py +++ b/changedetectionio/tests/test_backup.py @@ -1,18 +1,31 @@ #!/usr/bin/python3 -import time +from .util import set_original_response, set_modified_response, live_server_setup from flask import url_for from urllib.request import urlopen -from . util import set_original_response, set_modified_response, live_server_setup +from zipfile import ZipFile +import re +import time def test_backup(client, live_server): - live_server_setup(live_server) + set_original_response() + # Give the endpoint time to spin up time.sleep(1) + # Add our URL to the import page + res = client.post( + url_for("import_page"), + data={"urls": url_for('test_endpoint', _external=True)}, + follow_redirects=True + ) + + assert b"1 Imported" in res.data + time.sleep(3) + res = client.get( url_for("get_backup"), follow_redirects=True @@ -20,6 +33,19 @@ def test_backup(client, live_server): # Should get the right zip content type assert res.content_type == "application/zip" + # Should be PK/ZIP stream assert res.data.count(b'PK') >= 2 + # ZipFile from buffer seems non-obvious, just save it instead + with open("download.zip", 'wb') as f: + f.write(res.data) + + zip = ZipFile('download.zip') + l = zip.namelist() + uuid4hex = re.compile('^[a-f0-9]{8}-?[a-f0-9]{4}-?4[a-f0-9]{3}-?[89ab][a-f0-9]{3}-?[a-f0-9]{12}.*txt', re.I) + newlist = list(filter(uuid4hex.match, l)) # Read Note below + + # Should be two txt files in the archive (history and the snapshot) + assert len(newlist) == 2 +