Avoid recomputing crcs
This commit is contained in:
parent
9d420793ec
commit
bcf7ea2042
18
archive.py
18
archive.py
|
|
@ -117,13 +117,13 @@ def crc32(filename) -> int:
|
||||||
return hash
|
return hash
|
||||||
|
|
||||||
|
|
||||||
def zip_local_file_header(filename: str, filepath: str) -> bytes:
|
def zip_local_file_header(filename: str, filepath: str, crc: int) -> bytes:
|
||||||
buffer_size = 30 + len(filename)
|
buffer_size = 30 + len(filename)
|
||||||
buffer = bytearray(buffer_size)
|
buffer = bytearray(buffer_size)
|
||||||
stat = os.stat(filepath)
|
stat = os.stat(filepath)
|
||||||
|
|
||||||
# Field 1: local file header signature (buffer[0:4])
|
# Field 1: local file header signature (buffer[0:4])
|
||||||
buffer[0:4] = b'PK\x03\x04'
|
buffer[0:4] = b'\x50\x4b\x03\x04'
|
||||||
|
|
||||||
# Field 2: version needed to extract (minimum) (buffer[4:6])
|
# Field 2: version needed to extract (minimum) (buffer[4:6])
|
||||||
buffer[4:6] = b'\x0a'
|
buffer[4:6] = b'\x0a'
|
||||||
|
|
@ -138,7 +138,7 @@ def zip_local_file_header(filename: str, filepath: str) -> bytes:
|
||||||
buffer[12:14] = (mtime.day | (mtime.month << 5) | ((mtime.year - 1980) << 9)).to_bytes(2, byteorder='little')
|
buffer[12:14] = (mtime.day | (mtime.month << 5) | ((mtime.year - 1980) << 9)).to_bytes(2, byteorder='little')
|
||||||
|
|
||||||
# Field 6: crc-32 of uncompressed data (buffer[14:18])
|
# Field 6: crc-32 of uncompressed data (buffer[14:18])
|
||||||
buffer[14:18] = crc32(filepath).to_bytes(4, byteorder='little')
|
buffer[14:18] = crc.to_bytes(4, byteorder='little')
|
||||||
|
|
||||||
# Field 7: compressed size (buffer[18:22])
|
# Field 7: compressed size (buffer[18:22])
|
||||||
buffer[18:22] = stat.st_size.to_bytes(4, byteorder='little')
|
buffer[18:22] = stat.st_size.to_bytes(4, byteorder='little')
|
||||||
|
|
@ -157,7 +157,7 @@ def zip_local_file_header(filename: str, filepath: str) -> bytes:
|
||||||
return buffer
|
return buffer
|
||||||
|
|
||||||
|
|
||||||
def zip_central_directory_file_header(filename: str, filepath: str, offset: int) -> bytes:
|
def zip_central_directory_file_header(filename: str, filepath: str, crc: int, offset: int) -> bytes:
|
||||||
buffer_size = 46 + len(filename)
|
buffer_size = 46 + len(filename)
|
||||||
buffer = bytearray(buffer_size)
|
buffer = bytearray(buffer_size)
|
||||||
stat = os.stat(filepath)
|
stat = os.stat(filepath)
|
||||||
|
|
@ -181,7 +181,7 @@ def zip_central_directory_file_header(filename: str, filepath: str, offset: int)
|
||||||
buffer[14:16] = (mtime.day | (mtime.month << 5) | ((mtime.year - 1980) << 9)).to_bytes(2, byteorder='little')
|
buffer[14:16] = (mtime.day | (mtime.month << 5) | ((mtime.year - 1980) << 9)).to_bytes(2, byteorder='little')
|
||||||
|
|
||||||
# Field 6: crc-32 of uncompressed data (buffer[16:20])
|
# Field 6: crc-32 of uncompressed data (buffer[16:20])
|
||||||
buffer[16:20] = crc32(filepath).to_bytes(4, byteorder='little')
|
buffer[16:20] = crc.to_bytes(4, byteorder='little')
|
||||||
|
|
||||||
# Field 7: compressed size (buffer[20:24])
|
# Field 7: compressed size (buffer[20:24])
|
||||||
buffer[20:24] = stat.st_size.to_bytes(4, byteorder='little')
|
buffer[20:24] = stat.st_size.to_bytes(4, byteorder='little')
|
||||||
|
|
@ -203,6 +203,7 @@ def zip_central_directory_file_header(filename: str, filepath: str, offset: int)
|
||||||
# Field 14: external file attributes (buffer[38:42])
|
# Field 14: external file attributes (buffer[38:42])
|
||||||
|
|
||||||
# Field 15: relative offset of the local file header (buffer[42:46])
|
# Field 15: relative offset of the local file header (buffer[42:46])
|
||||||
|
buffer[42:46] = offset.to_bytes(4, byteorder='little')
|
||||||
|
|
||||||
# Field 16: filename (buffer[46:46+len(filename)])
|
# Field 16: filename (buffer[46:46+len(filename)])
|
||||||
buffer[46:46+len(filename)] = filename.encode('ascii')
|
buffer[46:46+len(filename)] = filename.encode('ascii')
|
||||||
|
|
@ -241,11 +242,14 @@ class ZipSender(ArchiveSender):
|
||||||
def generator(self):
|
def generator(self):
|
||||||
def generate():
|
def generate():
|
||||||
local_offsets = dict()
|
local_offsets = dict()
|
||||||
|
crcs = dict()
|
||||||
current_byte = 0
|
current_byte = 0
|
||||||
|
|
||||||
for name, file in self.files.items():
|
for name, file in self.files.items():
|
||||||
|
crcs[name] = crc32(file)
|
||||||
|
|
||||||
local_offsets[name] = current_byte
|
local_offsets[name] = current_byte
|
||||||
chunk = zip_local_file_header(name, file)
|
chunk = zip_local_file_header(name, file, crcs[name])
|
||||||
current_byte += len(chunk)
|
current_byte += len(chunk)
|
||||||
|
|
||||||
yield chunk
|
yield chunk
|
||||||
|
|
@ -264,7 +268,7 @@ class ZipSender(ArchiveSender):
|
||||||
centra_directory_offset = current_byte
|
centra_directory_offset = current_byte
|
||||||
|
|
||||||
for name, file, in self.files.items():
|
for name, file, in self.files.items():
|
||||||
chunk = zip_central_directory_file_header(name, file, local_offsets[name])
|
chunk = zip_central_directory_file_header(name, file, crcs[name], local_offsets[name])
|
||||||
central_directory_size += len(chunk)
|
central_directory_size += len(chunk)
|
||||||
current_byte += len(chunk)
|
current_byte += len(chunk)
|
||||||
yield chunk
|
yield chunk
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue