blob: 9f59476922eb2d8726f25c765431ee7eab875f5d [file] [log] [blame]
#!/usr/bin/env python
# Copyright 2018 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""usage: makecab.py [options] source [destination]
Makes cab archives of single files, using zip compression.
Acts like Microsoft makecab.exe would act if passed `/D CompressionType=MSZIP`.
If [destination] is omitted, uses source with last character replaced with _.
options:
-h, --help: print this message
/D arg: silently ignored (for compat with makecab.exe)
/L outdir: put output file in outdir
/Vn: silently ignored (for compat with makecab.exe)
"""
# A cross-platform reimplementation of the bits of makecab.exe that we use.
# cab also supports LZX compression, which has a bitstream that allows for
# a higher compression rate than zip compression (aka deflate). But the cab
# shipped to users is built on the signing server using regular Microsoft
# makecab.exe, so having something in-tree that is good enough is good enough.
from __future__ import print_function
from collections import namedtuple
import datetime
import os
import struct
import sys
import zlib
class FlagParseError(Exception): pass
def ParseFlags(flags):
"""Parses |flags| and returns the parsed flags; returns None for --help."""
# Can't use optparse / argparse because of /-style flags :-/
input = None
output = None
output_dir = '.'
# Parse.
i = 0
while i < len(flags):
flag = flags[i]
if flag == '-h' or flag == '--help':
return None
if flag.startswith('/V'):
i += 1 # Ignore /V1 and friends.
elif flag in ['/D', '/L']:
if i == len(flags) - 1:
raise FlagParseError('argument needed after ' + flag)
if flag == '/L':
output_dir = flags[i + 1]
# Ignore all /D flags silently.
i += 2
elif (flag.startswith('-') or
(flag.startswith('/') and not os.path.exists(flag))):
raise FlagParseError('unknown flag ' + flag)
else:
if not input:
input = flag
elif not output:
output = flag
else:
raise FlagParseError('too many paths: %s %s %s' % (input, output, flag))
i += 1
# Validate and set default values.
if not input:
raise FlagParseError('no input file')
if not output:
output = os.path.basename(input)[:-1] + '_'
Flags = namedtuple('Flags', ['input', 'output', 'output_dir'])
return Flags(input=input, output=output, output_dir=output_dir)
def WriteCab(output_file, input_file, cab_stored_filename, input_size,
input_mtimestamp):
"""Reads data from input_file and stores its MSZIP-compressed data
in output_file. cab_stored_filename is the filename stored in the
cab file, input_size is the size of the input file, and input_mtimestamp
the mtime timestamp of the input file (must be at least midnight 1980-1-1)."""
# Need to write (all in little-endian)::
# 36 bytes CFHEADER cab header
# 8 bytes CFFOLDER (a set of files compressed with the same parameters)
# 16 bytes + filename (+ 1 byte trailing \0 for filename) CFFFILE
# Many 8 bytes CFDATA blocks, representing 32kB chunks of uncompressed data,
# each followed by the compressed data.
cffile_offset = 36 + 8
cfdata_offset = cffile_offset + 16 + len(cab_stored_filename) + 1
chunk_size = 1 << 15
num_chunks = (input_size + chunk_size - 1) / chunk_size
# https://msdn.microsoft.com/en-us/library/bb417343.aspx#cabinet_format
# Write CFHEADER.
CFHEADER = ('<'
'4s' # signature, 'MSCF'
'I' # reserved1, set to 0
'I' # cbCabinet, size of file in bytes. Not yet known, filled in later.
'I' # reserved2, set to 0
'I' # coffFiles, offset of first (and here, only) CFFILE.
'I' # reserved3, set to 0
'B' # versionMinor, currently 3. Yes, minor version is first.
'B' # versionMajor, currently 1.
'H' # cFolders, number of CFFOLDER entries.
'H' # cFiles, number of CFFILE entries.
'H' # flags, for multi-file cabinets. 0 here.
'H' # setID, for multi-file cabinets. 0 here.
'H' # iCabinet, index in multi-file cabinets. 0 here.
)
output_file.write(struct.pack(CFHEADER,
'MSCF', 0, 0, 0,
cffile_offset, 0, 3, 1, 1, 1, 0,
0, 0))
# Write single CFFOLDER.
CFFOLDER = ('<'
'I' # coffCabStart, offset of first CFDATA block in this folder.
'H' # cCFData, number of CFDATA blocks in this folder.
'H' # typeCompress, compression type. 1 means MS-ZIP.
)
output_file.write(struct.pack(CFFOLDER, cfdata_offset, num_chunks, 1))
# Write single CFFILE.
CFFILE = ('<'
'I' # cbFile, uncompressed size of this file in bytes.
'I' # uoffFolderStart, uncompressed offset of this file in folder.
'H' # iFolder, index into CFFOLDER area.
'H' # date, in the format ((year-1980) << 9) + (month << 5) + (day),
# where month={1..12} and day={1..31}.
'H' # time, in the format (hour << 11)+(minute << 5)+(seconds/2),
# where hour={0..23}.
'H' # attribs, 1: read-only
# 2: hidden
# 4: system file
# 0x20: file modified since last backup
# 0x40: run after extraction
# 0x80: name contains UTF
) # Followed by szFile, the file's name.
assert output_file.tell() == cffile_offset
mtime = datetime.datetime.fromtimestamp(input_mtimestamp)
date = (mtime.year - 1980) << 9 | mtime.month << 5 | mtime.day
# TODO(thakis): hour seems to be off by 1 from makecab.exe (DST?)
time = mtime.hour << 11 | mtime.minute << 5 | mtime.second / 2
output_file.write(struct.pack(CFFILE, input_size, 0, 0, date, time, 0))
output_file.write(cab_stored_filename + '\0')
# Write num_chunks many CFDATA headers, followed by the compressed data.
assert output_file.tell() == cfdata_offset
CFDATA = ('<'
'I' # checksum. Optional and expensive to compute in Python, so write 0.
'H' # cbData, number of compressed bytes in this block.
'H' # cbUncomp, size after decompressing. 1 << 15 for all but the last.
)
# Read input data in chunks of 32kB, compress and write out compressed data.
for _ in xrange(num_chunks):
chunk = input_file.read(chunk_size)
# Have to use compressobj instead of compress() so we can pass a negative
# window size to remove header and trailing checksum.
# Compression level 6 runs about 8x as fast as makecab.exe's LZX compression
# while producing a 45% larger file. (Interestingly, it also runs
# about 5x as fast as makecab.exe's MSZIP compression while being about
# 4.8% larger -- so it might be possible to write an LZX compressor that's
# much faster without being so much larger.) Compression level 9 isn't
# very different. Level 1 is another ~30% faster and 10% larger.
# Since 6 is ok and the default, let's go with that.
# Remember: User-shipped bits get recompressed on the signing server.
zlib_obj = zlib.compressobj(
zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -zlib.MAX_WBITS)
compressed = zlib_obj.compress(chunk) + zlib_obj.flush()
compressed_size = 2 + len(compressed) # Also count 0x43 0x4b magic header.
# cab spec: "Each data block represents 32k uncompressed, except that the
# last block in a folder may be smaller. A two-byte MSZIP signature precedes
# the compressed encoding in each block, consisting of the bytes 0x43, 0x4B.
# The maximum compressed size of each MSZIP block is 32k + 12 bytes."
assert compressed_size <= chunk_size + 12
output_file.write(struct.pack(CFDATA, 0, compressed_size, len(chunk)))
output_file.write('\x43\x4b') # MSZIP magic block header.
output_file.write(compressed)
outfile_size = output_file.tell()
# Now go back and fill in missing size in CFHEADER.
output_file.seek(8) # cbCabinet, size of file in bytes.
output_file.write(struct.pack('<I', outfile_size))
def main():
try:
flags = ParseFlags(sys.argv[1:])
except FlagParseError as arg_error:
print('makecab.py: error:', arg_error.message, file=sys.stderr)
print('pass --help for usage', file=sys.stderr)
sys.exit(1)
if not flags: # --help got passed
print(__doc__)
sys.exit(0)
if not os.path.exists(flags.input):
print('makecab.py: error: input file %s does not exist' % flags.input,
file=sys.stderr)
sys.exit(1)
with open(os.path.join(flags.output_dir, flags.output), 'wb') as output_file:
cab_stored_filename = os.path.basename(flags.input)
input_mtimestamp = os.path.getmtime(flags.input)
input_size = os.path.getsize(flags.input)
with open(flags.input, 'rb') as input_file:
WriteCab(output_file, input_file, cab_stored_filename, input_size,
input_mtimestamp)
if __name__ == '__main__':
main()