import builtins from datetime import datetime from flask import Response import functools import os import zlib # Chunks for crc 32 computation CRC32_CHUNK_SIZE = 65_536 # 4MiB chunks CHUNK_SIZE = 4_194_304 # ASCII value for space SPACE = ord(' ') # ASCII value for zero ZERO = ord('0') def tar_header_chunk(filename: str, filepath: str) -> bytes: # Returns the octal representation without the initial def oct(i: int) -> str: return builtins.oct(i)[2:] stat = os.stat(filepath) buffer = bytearray(512) # Field 1: filename on 100 bytes buffer[0:len(filename)] = filename.encode('ascii') # Field 2: mode, on 8 bytes, octal, last byte must be \x00, so we set only the first 7 bytes buffer[100:107] = oct(stat.st_mode).rjust(7, '0').encode('ascii') # Field 3: owner, on 8 bytes, octal, last byte must be \x00, so we set only the first 7 bytes buffer[108:115] = oct(stat.st_uid).rjust(7, '0').encode('ascii') # Field 4: group, on 8 bytes, octal, last byte must be \x00, so we set only the first 7 bytes buffer[116:123] = oct(stat.st_gid).rjust(7, '0').encode('ascii') # Field 5: file size in bytes, on 12 bytes, octal, last byte must be \x00, so we set only the first 11 bytes buffer[124:135] = oct(stat.st_size).rjust(11, '0').encode('ascii') # Field 6: last modified, on 12 bytes, octal, last byte must be \x00, so we set only the first 11 bytes buffer[136:147] = oct(int(stat.st_mtime)).rjust(11, '0').encode('ascii') # Field 7: checksum, we fill it at the end # Field 8: type flag, 0 because we only have regular files buffer[156] = ZERO # Field 9: linkname, \x00s because we only have regular files # POSIX 1003.1-1990: 255 empty bytes # Compute the checksum: we start at 256 which are the 8 fields of checksum filled with spaces (32 * 8) checksum = oct(functools.reduce(lambda x, y: x + y, buffer, 256)).rjust(6, '0').encode('ascii') buffer[148:154] = checksum # Don't ask me why, but the checksum must end with b'\x00 ', so we skip the \x00 and write the space buffer[155] = SPACE return bytes(buffer) class ArchiveSender: def __init__(self): self.files: dict[str, str] = {} def add_file(self, filename: str, filepath: str): self.files[filename] = filepath def response(self): raise NotImplementedError("Abstract method") class TarSender(ArchiveSender): def generator(self): def generate(): for name, file in self.files.items(): yield tar_header_chunk(name, file) bytes_sent = 0 with open(file, 'rb') as f: while True: bytes = f.read(CHUNK_SIZE) if len(bytes) == 0: break bytes_sent += len(bytes) yield bytes # Because tar use records of 512 bytes, we need to pad the # file with zeroes to fill the last chunk yield b'\x00' * (512 - bytes_sent % 512) return generate() def response(self): return Response( self.generator(), mimetype='application/x-tar', headers={'Content-Disposition': 'attachment; filename="archive.tar"'} ) def crc32(filename) -> int: with open(filename, 'rb') as fh: hash = 0 while True: s = fh.read(CRC32_CHUNK_SIZE) if not s: break hash = zlib.crc32(s, hash) return hash def zip_local_file_header(filename: str, filepath: str, crc: int) -> bytes: buffer_size = 30 + len(filename) buffer = bytearray(buffer_size) stat = os.stat(filepath) # Field 1: local file header signature (buffer[0:4]) buffer[0:4] = b'\x50\x4b\x03\x04' # Field 2: version needed to extract (minimum) (buffer[4:6]) buffer[4:6] = b'\x0a' # Field 3: general purpose bit flag (buffer[6:8]), leave at 0 # Field 4: compression mode (buffer[8:10]), leave at 0 (uncompressed) # Field 5: file last modification time (buffer[10:14]) mtime = datetime.fromtimestamp(stat.st_mtime) buffer[10:12] = ((mtime.second // 2) | (mtime.minute << 5) | (mtime.hour << 11)).to_bytes(2, byteorder='little') buffer[12:14] = (mtime.day | (mtime.month << 5) | ((mtime.year - 1980) << 9)).to_bytes(2, byteorder='little') # Field 6: crc-32 of uncompressed data (buffer[14:18]) buffer[14:18] = crc.to_bytes(4, byteorder='little') # Field 7: compressed size (buffer[18:22]) buffer[18:22] = stat.st_size.to_bytes(4, byteorder='little') # Field 8: uncompressed size (buffer[22:26]) buffer[22:26] = stat.st_size.to_bytes(4, byteorder='little') # Field 9: filename length (buffer[26:28]) buffer[26:28] = len(filename).to_bytes(2, byteorder='little') # Field 10: extra field length (buffer[28:30]) # Field 11: filename (buffer[30:30+len(filename)]) buffer[30:30+len(filename)] = filename.encode('ascii') return bytes(buffer) def zip_central_directory_file_header(filename: str, filepath: str, crc: int, offset: int) -> bytes: buffer_size = 46 + len(filename) buffer = bytearray(buffer_size) stat = os.stat(filepath) # Field 1: central directory file header signature (buffer[0:4]) buffer[0:4] = b'\x50\x4b\x01\x02' # Field 2: version made by (buffer[4:6]) buffer[4:6] = b'\x0a' # Field 3: version needed to extract (minimum) (buffer[6:8]) buffer[6:8] = b'\x0a' # Field 3: general purpose bit flag (buffer[8:10]), leave at 0 # Field 4: compression mode (buffer[10:12]), leave at 0 (uncompressed) # Field 5: file last modification time (buffer[12:16]) mtime = datetime.fromtimestamp(stat.st_mtime) buffer[12:14] = ((mtime.second // 2) | (mtime.minute << 5) | (mtime.hour << 11)).to_bytes(2, byteorder='little') buffer[14:16] = (mtime.day | (mtime.month << 5) | ((mtime.year - 1980) << 9)).to_bytes(2, byteorder='little') # Field 6: crc-32 of uncompressed data (buffer[16:20]) buffer[16:20] = crc.to_bytes(4, byteorder='little') # Field 7: compressed size (buffer[20:24]) buffer[20:24] = stat.st_size.to_bytes(4, byteorder='little') # Field 8: uncompressed size (buffer[24:28]) buffer[24:28] = stat.st_size.to_bytes(4, byteorder='little') # Field 9: filename length (buffer[28:30]) buffer[28:30] = len(filename).to_bytes(2, byteorder='little') # Field 10: extra field length (buffer[30:32]) # Field 11: file comment length (buffer[32:34]) # Field 12: disk number where file starts (buffer[34:36]) # Field 13: internal file attributes (buffer[36:38]) # Field 14: external file attributes (buffer[38:42]) # Field 15: relative offset of the local file header (buffer[42:46]) buffer[42:46] = offset.to_bytes(4, byteorder='little') # Field 16: filename (buffer[46:46+len(filename)]) buffer[46:46+len(filename)] = filename.encode('ascii') return bytes(buffer) def zip_end_of_central_directory(items_number: int, central_directory_size: int, central_directory_offset: int): buffer = bytearray(22) # Field 1: End of central directory signature = 0x06054b50 (buffer[0:4]) buffer[0:4] = b'\x50\x4b\x05\x06' # Field 2: Number of this disk (buffer[4:6]) # Field 3: Disk where central directory starts (buffer[6:8]) # Field 4: Number of central directory records on this disk (buffer[8:10]) buffer[8:10] = items_number.to_bytes(2, byteorder='little') # Field 5: Total number of central directory records (buffer[10:12]) buffer[10:12] = items_number.to_bytes(2, byteorder='little') # Field 6: Size of central directory in bytes (buffer[12:16]) buffer[12:16] = central_directory_size.to_bytes(4, byteorder='little') # Field 7: Offset of start of central directory (buffer[16:20]) buffer[16:20] = central_directory_offset.to_bytes(4, byteorder='little') # Field 8: Comment length (buffer[20:22]) # Field 9: Comment (buffer[22:]) return bytes(buffer) class ZipSender(ArchiveSender): def generator(self): def generate(): local_offsets = dict() crcs = dict() current_byte = 0 for name, file in self.files.items(): crcs[name] = crc32(file) local_offsets[name] = current_byte chunk = zip_local_file_header(name, file, crcs[name]) current_byte += len(chunk) yield chunk with open(file, 'rb') as f: while True: bytes = f.read(CHUNK_SIZE) if len(bytes) == 0: break current_byte += len(bytes) yield bytes central_directory_size = 0 centra_directory_offset = current_byte for name, file, in self.files.items(): chunk = zip_central_directory_file_header(name, file, crcs[name], local_offsets[name]) central_directory_size += len(chunk) current_byte += len(chunk) yield chunk yield zip_end_of_central_directory(len(self.files.items()), central_directory_size, centra_directory_offset) return generate() def response(self): return Response( self.generator(), mimetype='application/zip', headers={'Content-Disposition': 'attachment; filename="archive.zip"'} )