From 6084b0f23dd66fd4da62421787a52b223be8abb2 Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Tue, 19 Nov 2024 17:28:21 +0100 Subject: [PATCH] VisualSelector - Use 'deflate' for storing elements.json, 90% file size reduction (#2794) --- changedetectionio/flask_app.py | 23 ++++++++++++++----- changedetectionio/model/Watch.py | 9 ++++---- .../static/js/visual-selector.js | 1 + changedetectionio/store.py | 17 +++++++++++++- .../tests/visualselector/test_fetch_data.py | 14 +++++++---- 5 files changed, 49 insertions(+), 15 deletions(-) diff --git a/changedetectionio/flask_app.py b/changedetectionio/flask_app.py index b6166006..04f9400b 100644 --- a/changedetectionio/flask_app.py +++ b/changedetectionio/flask_app.py @@ -1297,12 +1297,23 @@ def changedetection_app(config=None, datastore_o=None): # These files should be in our subdirectory try: - # set nocache, set content-type - response = make_response(send_from_directory(os.path.join(datastore_o.datastore_path, filename), "elements.json")) - response.headers['Content-type'] = 'application/json' - response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate' - response.headers['Pragma'] = 'no-cache' - response.headers['Expires'] = 0 + # set nocache, set content-type, + # `filename` is actually directory UUID of the watch + watch_directory = str(os.path.join(datastore_o.datastore_path, filename)) + response = None + if os.path.isfile(os.path.join(watch_directory, "elements.deflate")): + response = make_response(send_from_directory(watch_directory, "elements.deflate")) + response.headers['Content-Type'] = 'application/json' + response.headers['Content-Encoding'] = 'deflate' + else: + logger.error(f'Request elements.deflate at "{watch_directory}" but was notfound.') + abort(404) + + if response: + response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate' + response.headers['Pragma'] = 'no-cache' + response.headers['Expires'] = "0" + return response except FileNotFoundError: diff --git a/changedetectionio/model/Watch.py b/changedetectionio/model/Watch.py index 0898c98a..b9c3d39f 100644 --- a/changedetectionio/model/Watch.py +++ b/changedetectionio/model/Watch.py @@ -538,16 +538,17 @@ class model(watch_base): def save_xpath_data(self, data, as_error=False): import json + import zlib if as_error: - target_path = os.path.join(self.watch_data_dir, "elements-error.json") + target_path = os.path.join(str(self.watch_data_dir), "elements-error.deflate") else: - target_path = os.path.join(self.watch_data_dir, "elements.json") + target_path = os.path.join(str(self.watch_data_dir), "elements.deflate") self.ensure_data_dir_exists() - with open(target_path, 'w') as f: - f.write(json.dumps(data)) + with open(target_path, 'wb') as f: + f.write(zlib.compress(json.dumps(data).encode())) f.close() # Save as PNG, PNG is larger but better for doing visual diff in the future diff --git a/changedetectionio/static/js/visual-selector.js b/changedetectionio/static/js/visual-selector.js index 7cc54e86..f6f8e79c 100644 --- a/changedetectionio/static/js/visual-selector.js +++ b/changedetectionio/static/js/visual-selector.js @@ -132,6 +132,7 @@ $(document).ready(() => { }).done((data) => { $fetchingUpdateNoticeElem.html("Rendering.."); selectorData = data; + sortScrapedElementsBySize(); console.log(`Reported browser width from backend: ${data['browser_width']}`); diff --git a/changedetectionio/store.py b/changedetectionio/store.py index 697da5bc..efc29275 100644 --- a/changedetectionio/store.py +++ b/changedetectionio/store.py @@ -374,7 +374,7 @@ class ChangeDetectionStore: def visualselector_data_is_ready(self, watch_uuid): output_path = "{}/{}".format(self.datastore_path, watch_uuid) screenshot_filename = "{}/last-screenshot.png".format(output_path) - elements_index_filename = "{}/elements.json".format(output_path) + elements_index_filename = "{}/elements.deflate".format(output_path) if path.isfile(screenshot_filename) and path.isfile(elements_index_filename) : return True @@ -909,3 +909,18 @@ class ChangeDetectionStore: if self.data['watching'][uuid].get('in_stock_only'): del (self.data['watching'][uuid]['in_stock_only']) + # Compress old elements.json to elements.deflate, saving disk, this compression is pretty fast. + def update_19(self): + import zlib + + for uuid, watch in self.data['watching'].items(): + json_path = os.path.join(self.datastore_path, uuid, "elements.json") + deflate_path = os.path.join(self.datastore_path, uuid, "elements.deflate") + + if os.path.exists(json_path): + with open(json_path, "rb") as f_j: + with open(deflate_path, "wb") as f_d: + logger.debug(f"Compressing {str(json_path)} to {str(deflate_path)}..") + f_d.write(zlib.compress(f_j.read())) + os.unlink(json_path) + diff --git a/changedetectionio/tests/visualselector/test_fetch_data.py b/changedetectionio/tests/visualselector/test_fetch_data.py index de3b9030..e9d54466 100644 --- a/changedetectionio/tests/visualselector/test_fetch_data.py +++ b/changedetectionio/tests/visualselector/test_fetch_data.py @@ -54,15 +54,21 @@ def test_visual_selector_content_ready(client, live_server, measure_memory_usage assert os.path.isfile(os.path.join('test-datastore', uuid, 'last-screenshot.png')), "last-screenshot.png should exist" - assert os.path.isfile(os.path.join('test-datastore', uuid, 'elements.json')), "xpath elements.json data should exist" + assert os.path.isfile(os.path.join('test-datastore', uuid, 'elements.deflate')), "xpath elements.deflate data should exist" # Open it and see if it roughly looks correct - with open(os.path.join('test-datastore', uuid, 'elements.json'), 'r') as f: - json.load(f) + with open(os.path.join('test-datastore', uuid, 'elements.deflate'), 'rb') as f: + import zlib + compressed_data = f.read() + decompressed_data = zlib.decompress(compressed_data) + # See if any error was thrown + json_data = json.loads(decompressed_data.decode('utf-8')) # Attempt to fetch it via the web hook that the browser would use res = client.get(url_for('static_content', group='visual_selector_data', filename=uuid)) - json.loads(res.data) + decompressed_data = zlib.decompress(res.data) + json_data = json.loads(decompressed_data.decode('utf-8')) + assert res.mimetype == 'application/json' assert res.status_code == 200