From 72834a42fd1be52a9502b49ae6c5cba8880e350e Mon Sep 17 00:00:00 2001
From: dgtlmoon <dgtlmoon@gmail.com>
Date: Thu, 27 Oct 2022 12:35:26 +0200
Subject: [PATCH] Backups and Snapshots - Data directory now fully portable,
 (all paths are relative) , refactored backup zip export creation

---
 changedetectionio/__init__.py          |  15 +++---
 changedetectionio/download.zip         | Bin 0 -> 2083 bytes
 changedetectionio/model/Watch.py       |  71 ++++++++++++++++---------
 changedetectionio/tests/test_backup.py |  32 +++++++++--
 4 files changed, 81 insertions(+), 37 deletions(-)
 create mode 100644 changedetectionio/download.zip
diff --git a/changedetectionio/__init__.py b/changedetectionio/__init__.py
index 8bbb747d..383c5bf7 100644
--- a/changedetectionio/__init__.py
+++ b/changedetectionio/__init__.py
@@ -987,9 +987,6 @@ def changedetection_app(config=None, datastore_o=None):
 
         # create a ZipFile object
         backupname = "changedetection-backup-{}.zip".format(int(time.time()))
-
-        # We only care about UUIDS from the current index file
-        uuids = list(datastore.data['watching'].keys())
         backup_filepath = os.path.join(datastore_o.datastore_path, backupname)
 
         with zipfile.ZipFile(backup_filepath, "w",
@@ -1005,12 +1002,12 @@ def changedetection_app(config=None, datastore_o=None):
             # Add the flask app secret
             zipObj.write(os.path.join(datastore_o.datastore_path, "secret.txt"), arcname="secret.txt")
 
-            # Add any snapshot data we find, use the full path to access the file, but make the file 'relative' in the Zip.
-            for txt_file_path in Path(datastore_o.datastore_path).rglob('*.txt'):
-                parent_p = txt_file_path.parent
-                if parent_p.name in uuids:
-                    zipObj.write(txt_file_path,
-                                 arcname=str(txt_file_path).replace(datastore_o.datastore_path, ''),
+            # Add any data in the watch data directory.
+            for uuid, w in datastore.data['watching'].items():
+                for f in Path(w.watch_data_dir).glob('*'):
+                    zipObj.write(f,
+                                 # Use the full path to access the file, but make the file 'relative' in the Zip.
+                                 arcname=os.path.join(f.parts[-2], f.parts[-1]),
                                  compress_type=zipfile.ZIP_DEFLATED,
                                  compresslevel=8)
 
diff --git a/changedetectionio/download.zip b/changedetectionio/download.zip
new file mode 100644
index 0000000000000000000000000000000000000000..b4658ada34a5d6b6c022ae832c7ba91ec5aaf905
GIT binary patch
literal 2083
zcmWIWW@Zs#U|`^2*c=rdy5X@{*$rj}hEg5|1_7XGX;F@Dd16U&MryHMR&jpb(J<fQ
z+hziLe}@~`w3}%LE_soCJG1)b(w(NVXM`fUIvgEX9ZpO2|F0GoNscyIS#has)$ASj
z`FH1k|ERSneogQ3A7)+kj!Vz9zqqbuHOuai?h9GBeN*+amK<8ae78C1c>l#XMT?%S
zo|t^kqb_1|*<Uv=l+|UedJ-p9tLv*`deZbb|HtFOXP7JQdF@@iqyO;qhuZ5mMy6l2
z5>)L_SzkFX#A%A>Hs9AwmsYJj{p?KAhF5F3UT(klK0iNi%EJXwkN><?^EEf0XXG8w
zy?5fR{)_W<ZCh9#y-a64XM9g-zq8|~H5y&H8&9oHTC~0W9<LOyMtiZ}J5@Qh-V&+E
zDp7~)dxL`4i@jg^Qug%`wUp}&=U4d#ui1FYb=uOJt0t*|oO%`8P8rQfyZ_zDrAB^+
z@NF^21*MFSmv^o(dt@=cpnadI^NIQOzf8Vrl;4U-k(0N{{PXd-i_czl>63~kT#MUF
z?uYDZxHI!!(Z2Mo-i@*?S03H(KbI`dnX@1~KkDh36|et3+Fkry;%vA3?i*1VYd3N~
zYnjaYPMop0)>h+0bqb$o?~IL`7gTDOTW{&^J5>HG>azC16&7L2i_aCWNZ%^Gq%zg?
zf=5Q-tZjaV-SuBh;`5@OuB^M(5aJ^6hR3?RXo7E|@h!#o?YqvMy?f?|&0>*Rjg8gs
zx^9YdPCiqms~Nd$iJZ+j&39`nBVI`ATdQ>ZVGNwV;vp~p+zQ^0=g#huIKtE?zT}I<
zx27GFXZ?ED#Kf4f<;SW5wfTOZ8M6~Pvrn^VMQHqTec7G-Ltq}eq<K=}pSF8;)vvvK
z|2b{yDQkZ3d~3DI+_$?M>RvRxE}3k9dj7gNQMawt{=bjR)t^#n*FFFDx3hYj6Mf{}
z6z>0-ue;%2|NZ~nUlNbR=`1@S-?P_Rar#p6V^6Zw9C&{3m5P3#duYRS9t-zHp7|ZV
z5?L#*IxN5UE#FOO&f<`1C$=rKZt4AP|4&_}@$NnOn{s6@Bm`0!tz!+jU;USp{WVQu
zovF(q2YshVmJ`d{kNM4%68U{hPj>6Qy^3z-?O{RTZyO)~5suKBxM}(4wWeGA65K@=
z{LS<~llb|sw(Y;f1AmTRpZ++1y~tvfDjB}x558&E9skxJ|I%HzgX7riFTXbXO?tAq
zPGnQ&ykqtsfAh8aGyZ=WrRMaaK`?w*@b&5Uop(%Yd-OG@B_pCEG;n$5-6h!`n{%R{
zhn`!wW}A(ESb&f5%yZ!%0zYpOoSl2Bd(#>ty-i=Ux+mmsoAEhU&1H|ns$|QYJ(D}@
z*I!;QZo_MNyLb7sf|Spq@z3|HnPeJ&-7{|8>Xg|}SGPQRdelsNvk^<xFX;eyDX<Y(
z3cND2Ic*Hg{SH9P1;oXv$wjFpdL<PltOpqx85B5}H+aeHy=TF3!@fy+SsTB8<MbZ0
zO~;HTm%q~amKpx*i1!1nZB3p^-Bnv3Gb4=N1T;Fk<N7vNppm{%oSc-JW^8C_teaw<
zn4)Wvlx(7FnUZ3zYiMDRWR_@PnU-Xprf+C!V3}lLZmFAOm};(TVqj^ao0wz<WTly!
zCnj5(Bqp00KwM|d)!@LxdcbaZ#oW2mZyvJJ;TJklXCP?1&p4O&(DLco3w}&Jk$R7J
z-i5mLK0WHE$^{-y*1ROvRH`!RNOqLL-Py+H<x$;Rzy0%m6QEl`*Z_!$a&JavaY=qr
zCCEv}CdS5_4UCO8np`$CG2P&^$zb~xUn3vW>!yYqOfLHR7%^|U<a1VdvaYYtWMAGd
zURqu|7!WQ170Lgz7admz+N=u1z)~4pB<BDv0GWPTLsN6ZrR(QU`um+Zc|u=rqmikB
z!Bu@vUA>FGXT3E~oIj)e@F^3Tfs7o-5;TAYsR6MVl7YJAnI##zC5h?9L>e9V0BklR
zlL#~J8U$zo7;I?-QRwwafHy)nwv-3a%fPUuk&Oj89U|+)maGu^5`p@NiZo&q8M@Wz
zDFk7)AS;npBc&H))3L=q!t}{N(?P)rOAF{a(W4om^9hnp)JPBTW@Q7ZV*|pojKBf_
G%mV<OKO@Kh

literal 0
HcmV?d00001

diff --git a/changedetectionio/model/Watch.py b/changedetectionio/model/Watch.py
index 5834b532..ce4b7814 100644
--- a/changedetectionio/model/Watch.py
+++ b/changedetectionio/model/Watch.py
@@ -1,6 +1,8 @@
-import os
-import uuid as uuid_builder
 from distutils.util import strtobool
+import logging
+import os
+import time
+import uuid
 
 minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 60))
 mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
@@ -22,7 +24,7 @@ class model(dict):
             #'newest_history_key': 0,
             'title': None,
             'previous_md5': False,
-            'uuid': str(uuid_builder.uuid4()),
+            'uuid': str(uuid.uuid4()),
             'headers': {},  # Extra headers to send
             'body': None,
             'method': 'GET',
@@ -60,7 +62,7 @@ class model(dict):
         self.update(self.__base_config)
         self.__datastore_path = kw['datastore_path']
 
-        self['uuid'] = str(uuid_builder.uuid4())
+        self['uuid'] = str(uuid.uuid4())
 
         del kw['datastore_path']
 
@@ -82,10 +84,9 @@ class model(dict):
         return False
 
     def ensure_data_dir_exists(self):
-        target_path = os.path.join(self.__datastore_path, self['uuid'])
-        if not os.path.isdir(target_path):
-            print ("> Creating data dir {}".format(target_path))
-            os.mkdir(target_path)
+        if not os.path.isdir(self.watch_data_dir):
+            print ("> Creating data dir {}".format(self.watch_data_dir))
+            os.mkdir(self.watch_data_dir)
 
     @property
     def label(self):
@@ -109,18 +110,39 @@ class model(dict):
 
     @property
     def history(self):
+        """History index is just a text file as a list
+            {watch-uuid}/history.txt
+
+            contains a list like
+
+            {epoch-time},{filename}\n
+
+            We read in this list as the history information
+
+        """
         tmp_history = {}
-        import logging
-        import time
 
         # Read the history file as a dict
-        fname = os.path.join(self.__datastore_path, self.get('uuid'), "history.txt")
+        fname = os.path.join(self.watch_data_dir, "history.txt")
         if os.path.isfile(fname):
             logging.debug("Reading history index " + str(time.time()))
             with open(fname, "r") as f:
                 for i in f.readlines():
                     if ',' in i:
                         k, v = i.strip().split(',', 2)
+
+                        # The index history could contain a relative path, so we need to make the fullpath
+                        # so that python can read it
+                        if not '/' in v and not '\'' in v:
+                            v = os.path.join(self.watch_data_dir, v)
+                        else:
+                            # It's possible that they moved the datadir on older versions
+                            # So the snapshot exists but is in a different path
+                            snapshot_fname = v.split('/')[-1]
+                            proposed_new_path = os.path.join(self.watch_data_dir, snapshot_fname)
+                            if not os.path.exists(v) and os.path.exists(proposed_new_path):
+                                v = proposed_new_path
+
                         tmp_history[k] = v
 
         if len(tmp_history):
@@ -132,7 +154,7 @@ class model(dict):
 
     @property
     def has_history(self):
-        fname = os.path.join(self.__datastore_path, self.get('uuid'), "history.txt")
+        fname = os.path.join(self.watch_data_dir, "history.txt")
         return os.path.isfile(fname)
 
     # Returns the newest key, but if theres only 1 record, then it's counted as not being new, so return 0.
@@ -151,25 +173,19 @@ class model(dict):
     # Save some text file to the appropriate path and bump the history
     # result_obj from fetch_site_status.run()
     def save_history_text(self, contents, timestamp):
-        import uuid
-        import logging
-
-        output_path = os.path.join(self.__datastore_path, self['uuid'])
 
         self.ensure_data_dir_exists()
-        snapshot_fname = os.path.join(output_path, str(uuid.uuid4()))
-
-        logging.debug("Saving history text {}".format(snapshot_fname))
+        snapshot_fname = "{}.txt".format(str(uuid.uuid4()))
 
         # in /diff/ and /preview/ we are going to assume for now that it's UTF-8 when reading
         # most sites are utf-8 and some are even broken utf-8
-        with open(snapshot_fname, 'wb') as f:
+        with open(os.path.join(self.watch_data_dir, snapshot_fname), 'wb') as f:
             f.write(contents)
             f.close()
 
         # Append to index
         # @todo check last char was \n
-        index_fname = os.path.join(output_path, "history.txt")
+        index_fname = os.path.join(self.watch_data_dir, "history.txt")
         with open(index_fname, 'a') as f:
             f.write("{},{}\n".format(timestamp, snapshot_fname))
             f.close()
@@ -210,14 +226,14 @@ class model(dict):
         return not local_lines.issubset(existing_history)
 
     def get_screenshot(self):
-        fname = os.path.join(self.__datastore_path, self['uuid'], "last-screenshot.png")
+        fname = os.path.join(self.watch_data_dir, "last-screenshot.png")
         if os.path.isfile(fname):
             return fname
 
         return False
 
     def __get_file_ctime(self, filename):
-        fname = os.path.join(self.__datastore_path, self['uuid'], filename)
+        fname = os.path.join(self.watch_data_dir, filename)
         if os.path.isfile(fname):
             return int(os.path.getmtime(fname))
         return False
@@ -242,9 +258,14 @@ class model(dict):
     def snapshot_error_screenshot_ctime(self):
         return self.__get_file_ctime('last-error-screenshot.png')
 
+    @property
+    def watch_data_dir(self):
+        # The base dir of the watch data
+        return os.path.join(self.__datastore_path, self['uuid'])
+    
     def get_error_text(self):
         """Return the text saved from a previous request that resulted in a non-200 error"""
-        fname = os.path.join(self.__datastore_path, self['uuid'], "last-error.txt")
+        fname = os.path.join(self.watch_data_dir, "last-error.txt")
         if os.path.isfile(fname):
             with open(fname, 'r') as f:
                 return f.read()
@@ -252,7 +273,7 @@ class model(dict):
 
     def get_error_snapshot(self):
         """Return path to the screenshot that resulted in a non-200 error"""
-        fname = os.path.join(self.__datastore_path, self['uuid'], "last-error-screenshot.png")
+        fname = os.path.join(self.watch_data_dir, "last-error-screenshot.png")
         if os.path.isfile(fname):
             return fname
         return False
diff --git a/changedetectionio/tests/test_backup.py b/changedetectionio/tests/test_backup.py
index 787d7fc0..89fd66a5 100644
--- a/changedetectionio/tests/test_backup.py
+++ b/changedetectionio/tests/test_backup.py
@@ -1,18 +1,31 @@
 #!/usr/bin/python3
 
-import time
+from .util import set_original_response, set_modified_response, live_server_setup
 from flask import url_for
 from urllib.request import urlopen
-from . util import set_original_response, set_modified_response, live_server_setup
+from zipfile import ZipFile
+import re
+import time
 
 
 def test_backup(client, live_server):
-
     live_server_setup(live_server)
 
+    set_original_response()
+
     # Give the endpoint time to spin up
     time.sleep(1)
 
+    # Add our URL to the import page
+    res = client.post(
+        url_for("import_page"),
+        data={"urls": url_for('test_endpoint', _external=True)},
+        follow_redirects=True
+    )
+
+    assert b"1 Imported" in res.data
+    time.sleep(3)
+
     res = client.get(
         url_for("get_backup"),
         follow_redirects=True
@@ -20,6 +33,19 @@ def test_backup(client, live_server):
 
     # Should get the right zip content type
     assert res.content_type == "application/zip"
+
     # Should be PK/ZIP stream
     assert res.data.count(b'PK') >= 2
 
+    # ZipFile from buffer seems non-obvious, just save it instead
+    with open("download.zip", 'wb') as f:
+        f.write(res.data)
+
+    zip = ZipFile('download.zip')
+    l = zip.namelist()
+    uuid4hex = re.compile('^[a-f0-9]{8}-?[a-f0-9]{4}-?4[a-f0-9]{3}-?[89ab][a-f0-9]{3}-?[a-f0-9]{12}.*txt', re.I)
+    newlist = list(filter(uuid4hex.match, l))  # Read Note below
+
+    # Should be two txt files in the archive (history and the snapshot)
+    assert len(newlist) == 2
+