Dont rewrite/resave snapshot when its the same data, just bump the history index, saves disk space. (#1414)

926-empty-change-alerts
dgtlmoon 2 years ago committed by GitHub
parent 900dc5ee78
commit 6f4fd011e3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -241,7 +241,7 @@ class model(dict):
# Save some text file to the appropriate path and bump the history # Save some text file to the appropriate path and bump the history
# result_obj from fetch_site_status.run() # result_obj from fetch_site_status.run()
def save_history_text(self, contents, timestamp): def save_history_text(self, contents, timestamp, snapshot_id):
self.ensure_data_dir_exists() self.ensure_data_dir_exists()
@ -250,11 +250,14 @@ class model(dict):
if self.__newest_history_key and int(timestamp) == int(self.__newest_history_key): if self.__newest_history_key and int(timestamp) == int(self.__newest_history_key):
time.sleep(timestamp - self.__newest_history_key) time.sleep(timestamp - self.__newest_history_key)
snapshot_fname = "{}.txt".format(str(uuid.uuid4())) snapshot_fname = f"{snapshot_id}.txt"
# Only write if it does not exist, this is so that we dont bother re-saving the same data by checksum under different filenames.
dest = os.path.join(self.watch_data_dir, snapshot_fname)
if not os.path.exists(dest):
# in /diff/ and /preview/ we are going to assume for now that it's UTF-8 when reading # in /diff/ and /preview/ we are going to assume for now that it's UTF-8 when reading
# most sites are utf-8 and some are even broken utf-8 # most sites are utf-8 and some are even broken utf-8
with open(os.path.join(self.watch_data_dir, snapshot_fname), 'wb') as f: with open(dest, 'wb') as f:
f.write(contents) f.write(contents)
f.close() f.close()

@ -319,15 +319,14 @@ class update_worker(threading.Thread):
# Different exceptions mean that we may or may not want to bump the snapshot, trigger notifications etc # Different exceptions mean that we may or may not want to bump the snapshot, trigger notifications etc
if process_changedetection_results: if process_changedetection_results:
try: try:
watch = self.datastore.data['watching'][uuid] watch = self.datastore.data['watching'].get(uuid)
fname = "" # Saved history text filename self.datastore.update_watch(uuid=uuid, update_obj=update_obj)
# For the FIRST time we check a site, or a change detected, save the snapshot. # Also save the snapshot on the first time checked
if changed_detected or not watch['last_checked']: if changed_detected or not watch['last_checked']:
# A change was detected watch.save_history_text(contents=contents,
watch.save_history_text(contents=contents, timestamp=str(round(time.time()))) timestamp=str(round(time.time())),
snapshot_id=update_obj.get('previous_md5', 'none'))
self.datastore.update_watch(uuid=uuid, update_obj=update_obj)
# A change was detected # A change was detected
if changed_detected: if changed_detected:

Loading…
Cancel
Save