diff --git a/app.py b/app.py index 19aeee0..bb504b3 100755 --- a/app.py +++ b/app.py @@ -8,8 +8,7 @@ import os from os.path import join import sqlite3 import uuid -import tarfile -from . import db, config, scanner, calibration +from . import db, config, scanner, calibration, tar app = Flask(__name__) @@ -277,94 +276,39 @@ def download_object(id: int): object = db.Object.get_from_id(id, conn).full(conn) # Group acquisitions sharing calibration - def keyfunc(x: db.Calibration) -> int: return x.calibration_id acquisitions_sorted = sorted(object.acquisitions, key=keyfunc) - acquisitions_grouped = [(db.Calibration.get_from_id(k, conn), list(g)) for k, g in itertools.groupby(acquisitions_sorted, key=keyfunc)] + acquisitions_grouped = [ + (db.Calibration.get_from_id(k, conn), list(g)) + for k, g in itertools.groupby(acquisitions_sorted, key=keyfunc) + ] - def generate(): - for calibration_index, (calib, acquisitions) in enumerate(acquisitions_grouped): - # Send each image - calibration_dir = join(config.CALIBRATION_DIR, str(calib.id)) - for image in os.listdir(calibration_dir): + # Create archive file to send + archive = tar.TarSender() - # Generate tar header for file - image_path = join(calibration_dir, image) - bytes = io.BytesIO() - stat = os.stat(image_path) + for calibration_index, (calib, acquisitions) in enumerate(acquisitions_grouped): + calibration_dir = join(config.CALIBRATION_DIR, str(calib.id)) - # Create dummy tar to extract tar header for file - with tarfile.open(fileobj=bytes, mode='w') as buffer: - tar_info = tarfile.TarInfo(image_path) - tar_info.name = f'object/{calibration_index}/calibration/{image}' - tar_info.size = stat.st_size - buffer.addfile(tar_info) + # Add calibration images + for image in os.listdir(calibration_dir): + archive.add_file( + f'object/{calibration_index}/calibration/{image}', + join(calibration_dir, image) + ) - # Yield header - value = bytes.getvalue() - yield value[:512] + # Add each acquisition + for acquisition_index, acquisition in enumerate(acquisitions): + acquisition_dir = join(config.OBJECT_DIR, str(object.id), str(acquisition.id)) - # Yield file content, by chunks of 4MiB - chunk_size = 4_194_304 - bytes_len = 0 + for image in os.listdir(acquisition_dir): + archive.add_file( + f'object/{calibration_index}/{acquisition_index}/{image}', + join(acquisition_dir, image) + ) - with open(image_path, 'rb') as file: - while True: - bytes = file.read(chunk_size) - - if len(bytes) == 0: - break - - bytes_len += len(bytes) - yield bytes - - yield b'\x00' * (512 - bytes_len % 512) - - for acquisition_index, acquisition in enumerate(acquisitions): - acquisition_dir = join(config.OBJECT_DIR, str(object.id), str(acquisition.id)) - - # Send each image - for image in os.listdir(acquisition_dir): - - # Generate tar header for file - image_path = join(acquisition_dir, image) - bytes = io.BytesIO() - stat = os.stat(image_path) - - # Create dummy tar to extract tar header for file - with tarfile.open(fileobj=bytes, mode='w') as buffer: - tar_info = tarfile.TarInfo(image_path) - tar_info.name = f'object/{calibration_index}/{acquisition_index}/{image}' - tar_info.size = stat.st_size - buffer.addfile(tar_info) - - # Yield header - value = bytes.getvalue() - yield value[:512] - - # Yield file content, by chunks of 4MiB - chunk_size = 4_194_304 - bytes_len = 0 - - with open(image_path, 'rb') as file: - while True: - bytes = file.read(chunk_size) - - if len(bytes) == 0: - break - - bytes_len += len(bytes) - yield bytes - - yield b'\x00' * (512 - bytes_len % 512) - - return app.response_class( - generate(), - mimetype='application/x-tar', - headers={'Content-Disposition': 'attachment; filename="archive.tar"'} - ) + return archive.response() @app.route('/static/') diff --git a/tar.py b/tar.py new file mode 100644 index 0000000..ef09d50 --- /dev/null +++ b/tar.py @@ -0,0 +1,58 @@ +from flask import Response +import io +import os +import tarfile + +# 4MiB chuns +CHUNK_SIZE = 4_194_304 + + +def header_chunk(filename: str, filepath: str) -> bytes: + bytes = io.BytesIO() + stat = os.stat(filepath) + + # Create dummy tar to extract tar header for file + with tarfile.open(fileobj=bytes, mode='w') as buffer: + tar_info = tarfile.TarInfo(filepath) + tar_info.name = filename + tar_info.size = stat.st_size + buffer.addfile(tar_info) + + # TODO if we were able to build this chunk without tarfile, it would avoid + # whole file copy in memory + return bytes.getvalue()[:512] + + +class TarSender: + def __init__(self): + self.files: dict[str, str] = {} + + def add_file(self, filename: str, filepath: str): + self.files[filename] = filepath + + def response(self): + def generate(): + for name, file in self.files.items(): + yield header_chunk(name, file) + + bytes_sent = 0 + + with open(file, 'rb') as f: + while True: + bytes = f.read(CHUNK_SIZE) + + if len(bytes) == 0: + break + + bytes_sent += len(bytes) + yield bytes + + # Because tar use records of 512 bytes, we need to pad the + # file with zeroes to fill the last chunk + yield b'\x00' * (512 - bytes_sent % 512) + + return Response( + generate(), + mimetype='application/x-tar', + headers={'Content-Disposition': 'attachment; filename="archive.tar"'} + )