Dont rewrite/resave snapshot when its the same data, just bump the history index, saves disk space. (#1414)

2 years ago · 6f4fd011e3
parent 900dc5ee78
commit 6f4fd011e3
2 changed files with 16 additions and 14 deletions
--- a/changedetectionio/model/Watch.py
+++ b/changedetectionio/model/Watch.py
@ -241,7 +241,7 @@ class model(dict):

    # Save some text file to the appropriate path and bump the history
    # result_obj from fetch_site_status.run()
-    def save_history_text(self, contents, timestamp):
+    def save_history_text(self, contents, timestamp, snapshot_id):

        self.ensure_data_dir_exists()

@ -250,13 +250,16 @@ class model(dict):
        if self.__newest_history_key and int(timestamp) == int(self.__newest_history_key):
            time.sleep(timestamp - self.__newest_history_key)

-        snapshot_fname = "{}.txt".format(str(uuid.uuid4()))
+        snapshot_fname = f"{snapshot_id}.txt"

-        # in /diff/ and /preview/ we are going to assume for now that it's UTF-8 when reading
-        # most sites are utf-8 and some are even broken utf-8
-        with open(os.path.join(self.watch_data_dir, snapshot_fname), 'wb') as f:
-            f.write(contents)
-            f.close()
+        # Only write if it does not exist, this is so that we dont bother re-saving the same data by checksum under different filenames.
+        dest = os.path.join(self.watch_data_dir, snapshot_fname)
+        if not os.path.exists(dest):
+            # in /diff/ and /preview/ we are going to assume for now that it's UTF-8 when reading
+            # most sites are utf-8 and some are even broken utf-8
+            with open(dest, 'wb') as f:
+                f.write(contents)
+                f.close()

        # Append to index
        # @todo check last char was \n
--- a/changedetectionio/update_worker.py
+++ b/changedetectionio/update_worker.py
@ -319,15 +319,14 @@ class update_worker(threading.Thread):
                    # Different exceptions mean that we may or may not want to bump the snapshot, trigger notifications etc
                    if process_changedetection_results:
                        try:
-                            watch = self.datastore.data['watching'][uuid]
-                            fname = "" # Saved history text filename
+                            watch = self.datastore.data['watching'].get(uuid)
+                            self.datastore.update_watch(uuid=uuid, update_obj=update_obj)

-                            # For the FIRST time we check a site, or a change detected, save the snapshot.
+                            # Also save the snapshot on the first time checked
                            if changed_detected or not watch['last_checked']:
-                                # A change was detected
-                                watch.save_history_text(contents=contents, timestamp=str(round(time.time())))
-
-                            self.datastore.update_watch(uuid=uuid, update_obj=update_obj)
+                                watch.save_history_text(contents=contents,
+                                                        timestamp=str(round(time.time())),
+                                                        snapshot_id=update_obj.get('previous_md5', 'none'))

                            # A change was detected
                            if changed_detected: