nenuscanner/tar.py

import builtins
from flask import Response
import functools
import os

# 4MiB chunks
CHUNK_SIZE = 4_194_304

# ASCII value for space
SPACE = ord(' ')

# ASCII value for zero
ZERO = ord('0')

# Specification de l'entête du fichier
# Numéro Nom      Début   Taille   Description
#      1 name         0      100   Nom du fichier
#      2 mode       100        8   Permissions
#      3 uid        108        8   Propriétaire (inutilisé si format étendu)
#      4 gid        116        8   Groupe (inutilisé si format étendu)
#      5 size       124       12   Taille du fichier en octets.
#      6 mtime      136       12   Dernière modification en temps Unix.
#      7 chksum     148        8   Somme de contrôle de l'en-tête où ce champ est considéré comme rempli d'espaces (32)
#      8 type flag  156        1   Type de fichier
#      9 linkname   157      100   Nom du fichier pointé par ce lien symbolique (Si le type indique un lien symbolique)


def header_chunk(filename: str, filepath: str) -> bytes:

    # Returns the octal representation without the initial
    def oct(i: int) -> str:
        return builtins.oct(i)[2:]

    stat = os.stat(filepath)
    buffer = bytearray(512)

    # Field 1: filename on 100 bytes
    buffer[0:len(filename)] = filename.encode('ascii')

    # Field 2: mode, on 8 bytes, octal, last byte must be \x00, so we set only the first 7 bytes
    buffer[100:107] = oct(stat.st_mode).rjust(7, '0').encode('ascii')

    # Field 3: owner, on 8 bytes, octal, last byte must be \x00, so we set only the first 7 bytes
    buffer[108:115] = oct(stat.st_uid).rjust(7, '0').encode('ascii')

    # Field 4: group, on 8 bytes, octal, last byte must be \x00, so we set only the first 7 bytes
    buffer[116:123] = oct(stat.st_gid).rjust(7, '0').encode('ascii')

    # Field 5: file size in bytes, on 12 bytes, octal, last byte must be \x00, so we set only the first 11 bytes
    buffer[124:135] = oct(stat.st_size).rjust(11, '0').encode('ascii')

    # Field 6: last modified, on 12 bytes, octal, last byte must be \x00, so we set only the first 11 bytes
    buffer[136:147] = oct(int(stat.st_mtime)).rjust(11, '0').encode('ascii')

    # Field 7: checksum, we fill it at the end

    # Field 8: type flag, 0 because we only have regular files
    buffer[156] = ZERO

    # Field 9: linkname, \x00s because we only have regular files

    # POSIX 1003.1-1990: 255 empty bytes

    # Compute the checksum: we start at 256 which are the 8 fields of checksum filled with spaces (32 * 8)
    checksum = oct(functools.reduce(lambda x, y: x + y, buffer, 256)).rjust(6, '0').encode('ascii')
    buffer[148:154] = checksum

    # Don't ask me why, but the checksum must end with b'\x00 ', so we skip the \x00 and write the space
    buffer[155] = SPACE

    return bytes(buffer)


class TarSender:
    def __init__(self):
        self.files: dict[str, str] = {}

    def add_file(self, filename: str, filepath: str):
        self.files[filename] = filepath

    def response(self):
        def generate():
            for name, file in self.files.items():
                yield header_chunk(name, file)

                bytes_sent = 0

                with open(file, 'rb') as f:
                    while True:
                        bytes = f.read(CHUNK_SIZE)

                        if len(bytes) == 0:
                            break

                        bytes_sent += len(bytes)
                        yield bytes

                    # Because tar use records of 512 bytes, we need to pad the
                    # file with zeroes to fill the last chunk
                    yield b'\x00' * (512 - bytes_sent % 512)

        return Response(
            generate(),
            mimetype='application/x-tar',
            headers={'Content-Disposition': 'attachment; filename="archive.tar"'}
        )