VisualSelector - Use 'deflate' for storing elements.json, 90% file size reduction (#2794)

pull/2797/head
dgtlmoon 2 months ago committed by GitHub
parent 4e18aea5ff
commit 6084b0f23d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -1297,12 +1297,23 @@ def changedetection_app(config=None, datastore_o=None):
# These files should be in our subdirectory # These files should be in our subdirectory
try: try:
# set nocache, set content-type # set nocache, set content-type,
response = make_response(send_from_directory(os.path.join(datastore_o.datastore_path, filename), "elements.json")) # `filename` is actually directory UUID of the watch
response.headers['Content-type'] = 'application/json' watch_directory = str(os.path.join(datastore_o.datastore_path, filename))
response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate' response = None
response.headers['Pragma'] = 'no-cache' if os.path.isfile(os.path.join(watch_directory, "elements.deflate")):
response.headers['Expires'] = 0 response = make_response(send_from_directory(watch_directory, "elements.deflate"))
response.headers['Content-Type'] = 'application/json'
response.headers['Content-Encoding'] = 'deflate'
else:
logger.error(f'Request elements.deflate at "{watch_directory}" but was notfound.')
abort(404)
if response:
response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate'
response.headers['Pragma'] = 'no-cache'
response.headers['Expires'] = "0"
return response return response
except FileNotFoundError: except FileNotFoundError:

@ -538,16 +538,17 @@ class model(watch_base):
def save_xpath_data(self, data, as_error=False): def save_xpath_data(self, data, as_error=False):
import json import json
import zlib
if as_error: if as_error:
target_path = os.path.join(self.watch_data_dir, "elements-error.json") target_path = os.path.join(str(self.watch_data_dir), "elements-error.deflate")
else: else:
target_path = os.path.join(self.watch_data_dir, "elements.json") target_path = os.path.join(str(self.watch_data_dir), "elements.deflate")
self.ensure_data_dir_exists() self.ensure_data_dir_exists()
with open(target_path, 'w') as f: with open(target_path, 'wb') as f:
f.write(json.dumps(data)) f.write(zlib.compress(json.dumps(data).encode()))
f.close() f.close()
# Save as PNG, PNG is larger but better for doing visual diff in the future # Save as PNG, PNG is larger but better for doing visual diff in the future

@ -132,6 +132,7 @@ $(document).ready(() => {
}).done((data) => { }).done((data) => {
$fetchingUpdateNoticeElem.html("Rendering.."); $fetchingUpdateNoticeElem.html("Rendering..");
selectorData = data; selectorData = data;
sortScrapedElementsBySize(); sortScrapedElementsBySize();
console.log(`Reported browser width from backend: ${data['browser_width']}`); console.log(`Reported browser width from backend: ${data['browser_width']}`);

@ -374,7 +374,7 @@ class ChangeDetectionStore:
def visualselector_data_is_ready(self, watch_uuid): def visualselector_data_is_ready(self, watch_uuid):
output_path = "{}/{}".format(self.datastore_path, watch_uuid) output_path = "{}/{}".format(self.datastore_path, watch_uuid)
screenshot_filename = "{}/last-screenshot.png".format(output_path) screenshot_filename = "{}/last-screenshot.png".format(output_path)
elements_index_filename = "{}/elements.json".format(output_path) elements_index_filename = "{}/elements.deflate".format(output_path)
if path.isfile(screenshot_filename) and path.isfile(elements_index_filename) : if path.isfile(screenshot_filename) and path.isfile(elements_index_filename) :
return True return True
@ -909,3 +909,18 @@ class ChangeDetectionStore:
if self.data['watching'][uuid].get('in_stock_only'): if self.data['watching'][uuid].get('in_stock_only'):
del (self.data['watching'][uuid]['in_stock_only']) del (self.data['watching'][uuid]['in_stock_only'])
# Compress old elements.json to elements.deflate, saving disk, this compression is pretty fast.
def update_19(self):
import zlib
for uuid, watch in self.data['watching'].items():
json_path = os.path.join(self.datastore_path, uuid, "elements.json")
deflate_path = os.path.join(self.datastore_path, uuid, "elements.deflate")
if os.path.exists(json_path):
with open(json_path, "rb") as f_j:
with open(deflate_path, "wb") as f_d:
logger.debug(f"Compressing {str(json_path)} to {str(deflate_path)}..")
f_d.write(zlib.compress(f_j.read()))
os.unlink(json_path)

@ -54,15 +54,21 @@ def test_visual_selector_content_ready(client, live_server, measure_memory_usage
assert os.path.isfile(os.path.join('test-datastore', uuid, 'last-screenshot.png')), "last-screenshot.png should exist" assert os.path.isfile(os.path.join('test-datastore', uuid, 'last-screenshot.png')), "last-screenshot.png should exist"
assert os.path.isfile(os.path.join('test-datastore', uuid, 'elements.json')), "xpath elements.json data should exist" assert os.path.isfile(os.path.join('test-datastore', uuid, 'elements.deflate')), "xpath elements.deflate data should exist"
# Open it and see if it roughly looks correct # Open it and see if it roughly looks correct
with open(os.path.join('test-datastore', uuid, 'elements.json'), 'r') as f: with open(os.path.join('test-datastore', uuid, 'elements.deflate'), 'rb') as f:
json.load(f) import zlib
compressed_data = f.read()
decompressed_data = zlib.decompress(compressed_data)
# See if any error was thrown
json_data = json.loads(decompressed_data.decode('utf-8'))
# Attempt to fetch it via the web hook that the browser would use # Attempt to fetch it via the web hook that the browser would use
res = client.get(url_for('static_content', group='visual_selector_data', filename=uuid)) res = client.get(url_for('static_content', group='visual_selector_data', filename=uuid))
json.loads(res.data) decompressed_data = zlib.decompress(res.data)
json_data = json.loads(decompressed_data.decode('utf-8'))
assert res.mimetype == 'application/json' assert res.mimetype == 'application/json'
assert res.status_code == 200 assert res.status_code == 200

Loading…
Cancel
Save