Avoid recomputing crcs

This commit is contained in:
Thomas Forgione 2024-07-30 10:27:10 +02:00
parent 9d420793ec
commit bcf7ea2042
1 changed files with 11 additions and 7 deletions

View File

@ -117,13 +117,13 @@ def crc32(filename) -> int:
return hash return hash
def zip_local_file_header(filename: str, filepath: str) -> bytes: def zip_local_file_header(filename: str, filepath: str, crc: int) -> bytes:
buffer_size = 30 + len(filename) buffer_size = 30 + len(filename)
buffer = bytearray(buffer_size) buffer = bytearray(buffer_size)
stat = os.stat(filepath) stat = os.stat(filepath)
# Field 1: local file header signature (buffer[0:4]) # Field 1: local file header signature (buffer[0:4])
buffer[0:4] = b'PK\x03\x04' buffer[0:4] = b'\x50\x4b\x03\x04'
# Field 2: version needed to extract (minimum) (buffer[4:6]) # Field 2: version needed to extract (minimum) (buffer[4:6])
buffer[4:6] = b'\x0a' buffer[4:6] = b'\x0a'
@ -138,7 +138,7 @@ def zip_local_file_header(filename: str, filepath: str) -> bytes:
buffer[12:14] = (mtime.day | (mtime.month << 5) | ((mtime.year - 1980) << 9)).to_bytes(2, byteorder='little') buffer[12:14] = (mtime.day | (mtime.month << 5) | ((mtime.year - 1980) << 9)).to_bytes(2, byteorder='little')
# Field 6: crc-32 of uncompressed data (buffer[14:18]) # Field 6: crc-32 of uncompressed data (buffer[14:18])
buffer[14:18] = crc32(filepath).to_bytes(4, byteorder='little') buffer[14:18] = crc.to_bytes(4, byteorder='little')
# Field 7: compressed size (buffer[18:22]) # Field 7: compressed size (buffer[18:22])
buffer[18:22] = stat.st_size.to_bytes(4, byteorder='little') buffer[18:22] = stat.st_size.to_bytes(4, byteorder='little')
@ -157,7 +157,7 @@ def zip_local_file_header(filename: str, filepath: str) -> bytes:
return buffer return buffer
def zip_central_directory_file_header(filename: str, filepath: str, offset: int) -> bytes: def zip_central_directory_file_header(filename: str, filepath: str, crc: int, offset: int) -> bytes:
buffer_size = 46 + len(filename) buffer_size = 46 + len(filename)
buffer = bytearray(buffer_size) buffer = bytearray(buffer_size)
stat = os.stat(filepath) stat = os.stat(filepath)
@ -181,7 +181,7 @@ def zip_central_directory_file_header(filename: str, filepath: str, offset: int)
buffer[14:16] = (mtime.day | (mtime.month << 5) | ((mtime.year - 1980) << 9)).to_bytes(2, byteorder='little') buffer[14:16] = (mtime.day | (mtime.month << 5) | ((mtime.year - 1980) << 9)).to_bytes(2, byteorder='little')
# Field 6: crc-32 of uncompressed data (buffer[16:20]) # Field 6: crc-32 of uncompressed data (buffer[16:20])
buffer[16:20] = crc32(filepath).to_bytes(4, byteorder='little') buffer[16:20] = crc.to_bytes(4, byteorder='little')
# Field 7: compressed size (buffer[20:24]) # Field 7: compressed size (buffer[20:24])
buffer[20:24] = stat.st_size.to_bytes(4, byteorder='little') buffer[20:24] = stat.st_size.to_bytes(4, byteorder='little')
@ -203,6 +203,7 @@ def zip_central_directory_file_header(filename: str, filepath: str, offset: int)
# Field 14: external file attributes (buffer[38:42]) # Field 14: external file attributes (buffer[38:42])
# Field 15: relative offset of the local file header (buffer[42:46]) # Field 15: relative offset of the local file header (buffer[42:46])
buffer[42:46] = offset.to_bytes(4, byteorder='little')
# Field 16: filename (buffer[46:46+len(filename)]) # Field 16: filename (buffer[46:46+len(filename)])
buffer[46:46+len(filename)] = filename.encode('ascii') buffer[46:46+len(filename)] = filename.encode('ascii')
@ -241,11 +242,14 @@ class ZipSender(ArchiveSender):
def generator(self): def generator(self):
def generate(): def generate():
local_offsets = dict() local_offsets = dict()
crcs = dict()
current_byte = 0 current_byte = 0
for name, file in self.files.items(): for name, file in self.files.items():
crcs[name] = crc32(file)
local_offsets[name] = current_byte local_offsets[name] = current_byte
chunk = zip_local_file_header(name, file) chunk = zip_local_file_header(name, file, crcs[name])
current_byte += len(chunk) current_byte += len(chunk)
yield chunk yield chunk
@ -264,7 +268,7 @@ class ZipSender(ArchiveSender):
centra_directory_offset = current_byte centra_directory_offset = current_byte
for name, file, in self.files.items(): for name, file, in self.files.items():
chunk = zip_central_directory_file_header(name, file, local_offsets[name]) chunk = zip_central_directory_file_header(name, file, crcs[name], local_offsets[name])
central_directory_size += len(chunk) central_directory_size += len(chunk)
current_byte += len(chunk) current_byte += len(chunk)
yield chunk yield chunk