nenuscanner/tar.py

107 lines
3.8 KiB
Python

import builtins
from flask import Response
import functools
import os
# 4MiB chunks
CHUNK_SIZE = 4_194_304
# ASCII value for space
SPACE = ord(' ')
# ASCII value for zero
ZERO = ord('0')
# Specification de l'entête du fichier
# Numéro Nom Début Taille Description
# 1 name 0 100 Nom du fichier
# 2 mode 100 8 Permissions
# 3 uid 108 8 Propriétaire (inutilisé si format étendu)
# 4 gid 116 8 Groupe (inutilisé si format étendu)
# 5 size 124 12 Taille du fichier en octets.
# 6 mtime 136 12 Dernière modification en temps Unix.
# 7 chksum 148 8 Somme de contrôle de l'en-tête où ce champ est considéré comme rempli d'espaces (32)
# 8 type flag 156 1 Type de fichier
# 9 linkname 157 100 Nom du fichier pointé par ce lien symbolique (Si le type indique un lien symbolique)
def header_chunk(filename: str, filepath: str) -> bytes:
# Returns the octal representation without the initial
def oct(i: int) -> str:
return builtins.oct(i)[2:]
stat = os.stat(filepath)
buffer = bytearray(512)
# Field 1: filename on 100 bytes
buffer[0:len(filename)] = filename.encode('ascii')
# Field 2: mode, on 8 bytes, octal, last byte must be \x00, so we set only the first 7 bytes
buffer[100:107] = oct(stat.st_mode).rjust(7, '0').encode('ascii')
# Field 3: owner, on 8 bytes, octal, last byte must be \x00, so we set only the first 7 bytes
buffer[108:115] = oct(stat.st_uid).rjust(7, '0').encode('ascii')
# Field 4: group, on 8 bytes, octal, last byte must be \x00, so we set only the first 7 bytes
buffer[116:123] = oct(stat.st_gid).rjust(7, '0').encode('ascii')
# Field 5: file size in bytes, on 12 bytes, octal, last byte must be \x00, so we set only the first 11 bytes
buffer[124:135] = oct(stat.st_size).rjust(11, '0').encode('ascii')
# Field 6: last modified, on 12 bytes, octal, last byte must be \x00, so we set only the first 11 bytes
buffer[136:147] = oct(int(stat.st_mtime)).rjust(11, '0').encode('ascii')
# Field 7: checksum, we fill it at the end
# Field 8: type flag, 0 because we only have regular files
buffer[156] = ZERO
# Field 9: linkname, \x00s because we only have regular files
# POSIX 1003.1-1990: 255 empty bytes
# Compute the checksum: we start at 256 which are the 8 fields of checksum filled with spaces (32 * 8)
checksum = oct(functools.reduce(lambda x, y: x + y, buffer, 256)).rjust(6, '0').encode('ascii')
buffer[148:154] = checksum
# Don't ask me why, but the checksum must end with b'\x00 ', so we skip the \x00 and write the space
buffer[155] = SPACE
return bytes(buffer)
class TarSender:
def __init__(self):
self.files: dict[str, str] = {}
def add_file(self, filename: str, filepath: str):
self.files[filename] = filepath
def response(self):
def generate():
for name, file in self.files.items():
yield header_chunk(name, file)
bytes_sent = 0
with open(file, 'rb') as f:
while True:
bytes = f.read(CHUNK_SIZE)
if len(bytes) == 0:
break
bytes_sent += len(bytes)
yield bytes
# Because tar use records of 512 bytes, we need to pad the
# file with zeroes to fill the last chunk
yield b'\x00' * (512 - bytes_sent % 512)
return Response(
generate(),
mimetype='application/x-tar',
headers={'Content-Disposition': 'attachment; filename="archive.tar"'}
)