Attempt at writing tar headers directly

This commit is contained in:
Thomas Forgione 2024-07-23 12:11:33 +02:00
parent 24b49234bb
commit 762900e1f1
1 changed files with 52 additions and 13 deletions

65
tar.py
View File

@ -1,26 +1,65 @@
from flask import Response
import io
import functools
import os
import tarfile
# 4MiB chuns
# 4MiB chunks
CHUNK_SIZE = 4_194_304
# Specification de l'entête du fichier
# Numéro Nom Début Taille Description
# 1 name 0 100 Nom du fichier
# 2 mode 100 8 Permissions
# 3 uid 108 8 Propriétaire (inutilisé si format étendu)
# 4 gid 116 8 Groupe (inutilisé si format étendu)
# 5 size 124 12 Taille du fichier en octets.
# 6 mtime 136 12 Dernière modification en temps Unix.
# 7 chksum 148 8 Somme de contrôle de l'en-tête où ce champ est considéré comme rempli d'espaces (32)
# 8 type flag 156 1 Type de fichier
# 9 linkname 157 100 Nom du fichier pointé par ce lien symbolique (Si le type indique un lien symbolique)
def header_chunk(filename: str, filepath: str) -> bytes:
bytes = io.BytesIO()
stat = os.stat(filepath)
buffer = bytearray()
# Create dummy tar to extract tar header for file
with tarfile.open(fileobj=bytes, mode='w') as buffer:
tar_info = tarfile.TarInfo(filepath)
tar_info.name = filename
tar_info.size = stat.st_size
buffer.addfile(tar_info)
# Field 1: filename on 100 bytes
buffer += filename.encode('ascii')
buffer += b'\x00' * (100 - len(filename))
# TODO if we were able to build this chunk without tarfile, it would avoid
# whole file copy in memory
return bytes.getvalue()[:512]
# Field 2: mode, on 8 bytes
# TODO we put 777 for test
buffer += '0000777'.encode('ascii') + b'\x00'
# Field 3: owner, we put 1000, default user
buffer += '0001000'.encode('ascii') + b'\x00'
# Field 4: group, we put 1000
buffer += '0001000'.encode('ascii') + b'\x00'
# Field 5: file size in bytes in ascii
buffer += str(stat.st_size).rjust(11, '0').encode('ascii') + b'\x00'
# Field 6: last modified, zeros for now
# buffer += '00000000000'.encode('ascii') + b'\x00'
buffer += '1721728914'.rjust(11, '0').encode('ascii') + b'\x00'
# Field 7: checksum, we put spaces and we will edit it at the end
buffer += ' '.encode('ascii')
# Field 8: type flag, 0 because we only have regular files
buffer += b'0'
# Field 9: linkname, \x00s because we only have regular files
buffer += b'\x00' * 100
# POSIX 1003.1-1990: 255 empty bytes
buffer += b'\x00' * 255
# Compute the checksum
checksum = str(functools.reduce(lambda x, y: x + y, buffer)).rjust(6, '0').encode('ascii') + b'\x00 '
buffer[148:156] = checksum
return bytes(buffer)
class TarSender: