This commit is contained in:
Thomas Forgione 2024-07-23 10:29:53 +02:00
parent c7e1e423f5
commit 505f7f8267
2 changed files with 82 additions and 80 deletions

104
app.py
View File

@ -8,8 +8,7 @@ import os
from os.path import join
import sqlite3
import uuid
import tarfile
from . import db, config, scanner, calibration
from . import db, config, scanner, calibration, tar
app = Flask(__name__)
@ -277,94 +276,39 @@ def download_object(id: int):
object = db.Object.get_from_id(id, conn).full(conn)
# Group acquisitions sharing calibration
def keyfunc(x: db.Calibration) -> int:
return x.calibration_id
acquisitions_sorted = sorted(object.acquisitions, key=keyfunc)
acquisitions_grouped = [(db.Calibration.get_from_id(k, conn), list(g)) for k, g in itertools.groupby(acquisitions_sorted, key=keyfunc)]
acquisitions_grouped = [
(db.Calibration.get_from_id(k, conn), list(g))
for k, g in itertools.groupby(acquisitions_sorted, key=keyfunc)
]
def generate():
for calibration_index, (calib, acquisitions) in enumerate(acquisitions_grouped):
# Send each image
calibration_dir = join(config.CALIBRATION_DIR, str(calib.id))
for image in os.listdir(calibration_dir):
# Create archive file to send
archive = tar.TarSender()
# Generate tar header for file
image_path = join(calibration_dir, image)
bytes = io.BytesIO()
stat = os.stat(image_path)
for calibration_index, (calib, acquisitions) in enumerate(acquisitions_grouped):
calibration_dir = join(config.CALIBRATION_DIR, str(calib.id))
# Create dummy tar to extract tar header for file
with tarfile.open(fileobj=bytes, mode='w') as buffer:
tar_info = tarfile.TarInfo(image_path)
tar_info.name = f'object/{calibration_index}/calibration/{image}'
tar_info.size = stat.st_size
buffer.addfile(tar_info)
# Add calibration images
for image in os.listdir(calibration_dir):
archive.add_file(
f'object/{calibration_index}/calibration/{image}',
join(calibration_dir, image)
)
# Yield header
value = bytes.getvalue()
yield value[:512]
# Add each acquisition
for acquisition_index, acquisition in enumerate(acquisitions):
acquisition_dir = join(config.OBJECT_DIR, str(object.id), str(acquisition.id))
# Yield file content, by chunks of 4MiB
chunk_size = 4_194_304
bytes_len = 0
for image in os.listdir(acquisition_dir):
archive.add_file(
f'object/{calibration_index}/{acquisition_index}/{image}',
join(acquisition_dir, image)
)
with open(image_path, 'rb') as file:
while True:
bytes = file.read(chunk_size)
if len(bytes) == 0:
break
bytes_len += len(bytes)
yield bytes
yield b'\x00' * (512 - bytes_len % 512)
for acquisition_index, acquisition in enumerate(acquisitions):
acquisition_dir = join(config.OBJECT_DIR, str(object.id), str(acquisition.id))
# Send each image
for image in os.listdir(acquisition_dir):
# Generate tar header for file
image_path = join(acquisition_dir, image)
bytes = io.BytesIO()
stat = os.stat(image_path)
# Create dummy tar to extract tar header for file
with tarfile.open(fileobj=bytes, mode='w') as buffer:
tar_info = tarfile.TarInfo(image_path)
tar_info.name = f'object/{calibration_index}/{acquisition_index}/{image}'
tar_info.size = stat.st_size
buffer.addfile(tar_info)
# Yield header
value = bytes.getvalue()
yield value[:512]
# Yield file content, by chunks of 4MiB
chunk_size = 4_194_304
bytes_len = 0
with open(image_path, 'rb') as file:
while True:
bytes = file.read(chunk_size)
if len(bytes) == 0:
break
bytes_len += len(bytes)
yield bytes
yield b'\x00' * (512 - bytes_len % 512)
return app.response_class(
generate(),
mimetype='application/x-tar',
headers={'Content-Disposition': 'attachment; filename="archive.tar"'}
)
return archive.response()
@app.route('/static/<path:path>')

58
tar.py Normal file
View File

@ -0,0 +1,58 @@
from flask import Response
import io
import os
import tarfile
# 4MiB chuns
CHUNK_SIZE = 4_194_304
def header_chunk(filename: str, filepath: str) -> bytes:
bytes = io.BytesIO()
stat = os.stat(filepath)
# Create dummy tar to extract tar header for file
with tarfile.open(fileobj=bytes, mode='w') as buffer:
tar_info = tarfile.TarInfo(filepath)
tar_info.name = filename
tar_info.size = stat.st_size
buffer.addfile(tar_info)
# TODO if we were able to build this chunk without tarfile, it would avoid
# whole file copy in memory
return bytes.getvalue()[:512]
class TarSender:
def __init__(self):
self.files: dict[str, str] = {}
def add_file(self, filename: str, filepath: str):
self.files[filename] = filepath
def response(self):
def generate():
for name, file in self.files.items():
yield header_chunk(name, file)
bytes_sent = 0
with open(file, 'rb') as f:
while True:
bytes = f.read(CHUNK_SIZE)
if len(bytes) == 0:
break
bytes_sent += len(bytes)
yield bytes
# Because tar use records of 512 bytes, we need to pad the
# file with zeroes to fill the last chunk
yield b'\x00' * (512 - bytes_sent % 512)
return Response(
generate(),
mimetype='application/x-tar',
headers={'Content-Disposition': 'attachment; filename="archive.tar"'}
)