This commit is contained in:
Thomas Forgione 2024-07-23 10:29:53 +02:00
parent c7e1e423f5
commit 505f7f8267
2 changed files with 82 additions and 80 deletions

104
app.py
View File

@ -8,8 +8,7 @@ import os
from os.path import join from os.path import join
import sqlite3 import sqlite3
import uuid import uuid
import tarfile from . import db, config, scanner, calibration, tar
from . import db, config, scanner, calibration
app = Flask(__name__) app = Flask(__name__)
@ -277,94 +276,39 @@ def download_object(id: int):
object = db.Object.get_from_id(id, conn).full(conn) object = db.Object.get_from_id(id, conn).full(conn)
# Group acquisitions sharing calibration # Group acquisitions sharing calibration
def keyfunc(x: db.Calibration) -> int: def keyfunc(x: db.Calibration) -> int:
return x.calibration_id return x.calibration_id
acquisitions_sorted = sorted(object.acquisitions, key=keyfunc) acquisitions_sorted = sorted(object.acquisitions, key=keyfunc)
acquisitions_grouped = [(db.Calibration.get_from_id(k, conn), list(g)) for k, g in itertools.groupby(acquisitions_sorted, key=keyfunc)] acquisitions_grouped = [
(db.Calibration.get_from_id(k, conn), list(g))
for k, g in itertools.groupby(acquisitions_sorted, key=keyfunc)
]
def generate(): # Create archive file to send
for calibration_index, (calib, acquisitions) in enumerate(acquisitions_grouped): archive = tar.TarSender()
# Send each image
calibration_dir = join(config.CALIBRATION_DIR, str(calib.id))
for image in os.listdir(calibration_dir):
# Generate tar header for file for calibration_index, (calib, acquisitions) in enumerate(acquisitions_grouped):
image_path = join(calibration_dir, image) calibration_dir = join(config.CALIBRATION_DIR, str(calib.id))
bytes = io.BytesIO()
stat = os.stat(image_path)
# Create dummy tar to extract tar header for file # Add calibration images
with tarfile.open(fileobj=bytes, mode='w') as buffer: for image in os.listdir(calibration_dir):
tar_info = tarfile.TarInfo(image_path) archive.add_file(
tar_info.name = f'object/{calibration_index}/calibration/{image}' f'object/{calibration_index}/calibration/{image}',
tar_info.size = stat.st_size join(calibration_dir, image)
buffer.addfile(tar_info) )
# Yield header # Add each acquisition
value = bytes.getvalue() for acquisition_index, acquisition in enumerate(acquisitions):
yield value[:512] acquisition_dir = join(config.OBJECT_DIR, str(object.id), str(acquisition.id))
# Yield file content, by chunks of 4MiB for image in os.listdir(acquisition_dir):
chunk_size = 4_194_304 archive.add_file(
bytes_len = 0 f'object/{calibration_index}/{acquisition_index}/{image}',
join(acquisition_dir, image)
)
with open(image_path, 'rb') as file: return archive.response()
while True:
bytes = file.read(chunk_size)
if len(bytes) == 0:
break
bytes_len += len(bytes)
yield bytes
yield b'\x00' * (512 - bytes_len % 512)
for acquisition_index, acquisition in enumerate(acquisitions):
acquisition_dir = join(config.OBJECT_DIR, str(object.id), str(acquisition.id))
# Send each image
for image in os.listdir(acquisition_dir):
# Generate tar header for file
image_path = join(acquisition_dir, image)
bytes = io.BytesIO()
stat = os.stat(image_path)
# Create dummy tar to extract tar header for file
with tarfile.open(fileobj=bytes, mode='w') as buffer:
tar_info = tarfile.TarInfo(image_path)
tar_info.name = f'object/{calibration_index}/{acquisition_index}/{image}'
tar_info.size = stat.st_size
buffer.addfile(tar_info)
# Yield header
value = bytes.getvalue()
yield value[:512]
# Yield file content, by chunks of 4MiB
chunk_size = 4_194_304
bytes_len = 0
with open(image_path, 'rb') as file:
while True:
bytes = file.read(chunk_size)
if len(bytes) == 0:
break
bytes_len += len(bytes)
yield bytes
yield b'\x00' * (512 - bytes_len % 512)
return app.response_class(
generate(),
mimetype='application/x-tar',
headers={'Content-Disposition': 'attachment; filename="archive.tar"'}
)
@app.route('/static/<path:path>') @app.route('/static/<path:path>')

58
tar.py Normal file
View File

@ -0,0 +1,58 @@
from flask import Response
import io
import os
import tarfile
# 4MiB chuns
CHUNK_SIZE = 4_194_304
def header_chunk(filename: str, filepath: str) -> bytes:
bytes = io.BytesIO()
stat = os.stat(filepath)
# Create dummy tar to extract tar header for file
with tarfile.open(fileobj=bytes, mode='w') as buffer:
tar_info = tarfile.TarInfo(filepath)
tar_info.name = filename
tar_info.size = stat.st_size
buffer.addfile(tar_info)
# TODO if we were able to build this chunk without tarfile, it would avoid
# whole file copy in memory
return bytes.getvalue()[:512]
class TarSender:
def __init__(self):
self.files: dict[str, str] = {}
def add_file(self, filename: str, filepath: str):
self.files[filename] = filepath
def response(self):
def generate():
for name, file in self.files.items():
yield header_chunk(name, file)
bytes_sent = 0
with open(file, 'rb') as f:
while True:
bytes = f.read(CHUNK_SIZE)
if len(bytes) == 0:
break
bytes_sent += len(bytes)
yield bytes
# Because tar use records of 512 bytes, we need to pad the
# file with zeroes to fill the last chunk
yield b'\x00' * (512 - bytes_sent % 512)
return Response(
generate(),
mimetype='application/x-tar',
headers={'Content-Disposition': 'attachment; filename="archive.tar"'}
)