blob: 827121ab8cf291cdcd2e06d67a3db68688e85fdd [file] [log] [blame]
# Copyright 2017 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
from __future__ import division
from __future__ import print_function
import array
import difflib
import distutils.dir_util
import filecmp
import io
import operator
import os
import re
import shutil
import struct
import subprocess
import sys
import tempfile
import uuid
from functools import reduce
def ZapTimestamp(filename):
contents = open(filename, 'rb').read()
# midl.exe writes timestamp 2147483647 (2^31 - 1) as creation date into its
# outputs, but using the local timezone. To make the output timezone-
# independent, replace that date with a fixed string of the same length.
# Also blank out the minor version number.
if filename.endswith('.tlb'):
# See for
# a fairly complete description of the .tlb binary format.
# TLB files start with a 54 byte header. Offset 0x20 stores how many types
# are defined in the file, and the header is followed by that many uint32s.
# After that, 15 section headers appear. Each section header is 16 bytes,
# starting with offset and length uint32s.
# Section 12 in the file contains custom() data. custom() data has a type
# (int, string, etc). Each custom data chunk starts with a uint16_t
# describing its type. Type 8 is string data, consisting of a uint32_t
# len, followed by that many data bytes, followed by 'W' bytes to pad to a
# 4 byte boundary. Type 0x13 is uint32 data, followed by 4 data bytes,
# followed by two 'W' to pad to a 4 byte boundary.
# The custom block always starts with one string containing "Created by
# MIDL version 8...", followed by one uint32 containing 0x7fffffff,
# followed by another uint32 containing the MIDL compiler version (e.g.
# 0x0801026e for v8.1.622 -- 0x26e == 622). These 3 fields take 0x54 bytes.
# There might be more custom data after that, but these 3 blocks are always
# there for file-level metadata.
# All data is little-endian in the file.
assert contents[0:8] == b'MSFT\x02\x00\x01\x00'
ntypes, = struct.unpack_from('<I', contents, 0x20)
custom_off, custom_len = struct.unpack_from(
'<II', contents, 0x54 + 4*ntypes + 11*16)
assert custom_len >= 0x54
# First: Type string (0x8), followed by 0x3e characters.
assert contents[custom_off:custom_off + 6] == b'\x08\x00\x3e\x00\x00\x00'
assert re.match(
br'Created by MIDL version 8\.\d\d\.\d{4} at ... Jan 1. ..:..:.. 2038\n',
contents[custom_off + 6:custom_off + 6 + 0x3e])
# Second: Type uint32 (0x13) storing 0x7fffffff (followed by WW / 0x57 pad)
assert contents[custom_off+6+0x3e:custom_off+6+0x3e+8] == \
# Third: Type uint32 (0x13) storing MIDL compiler version.
assert contents[custom_off + 6 + 0x3e + 8:custom_off + 6 + 0x3e + 8 +
2] == b'\x13\x00'
# Replace "Created by" string with fixed string, and fixed MIDL version with
# 8.1.622 always.
contents = (
contents[0:custom_off + 6] +
b'Created by MIDL version 8.xx.xxxx at a redacted point in time\n' +
# uint32 (0x13) val 0x7fffffff, WW, uint32 (0x13), val 0x0801026e, WW
b'\x13\x00\xff\xff\xff\x7f\x57\x57\x13\x00\x6e\x02\x01\x08\x57\x57' +
contents[custom_off + 0x54:])
contents = re.sub(
br'File created by MIDL compiler version 8\.\d\d\.\d{4} \*/\r\n'
br'/\* at ... Jan 1. ..:..:.. 2038',
br'File created by MIDL compiler version 8.xx.xxxx */\r\n'
br'/* at a redacted point in time', contents)
contents = re.sub(
br' Oicf, W1, Zp8, env=(.....) \(32b run\), '
br'target_arch=(AMD64|X86) 8\.\d\d\.\d{4}',
br' Oicf, W1, Zp8, env=\1 (32b run), target_arch=\2 8.xx.xxxx',
# TODO(thakis): If we need more hacks than these, try to verify checked-in
# outputs when we're using the hermetic toolchain.
# midl.exe older than 8.1.622 omit '//' after #endif, fix that:
contents = contents.replace(b'#endif !_MIDL_USE_GUIDDEF_',
b'#endif // !_MIDL_USE_GUIDDEF_')
# midl.exe puts the midl version into code in one place. To have
# predictable output, lie about the midl version if it's not 8.1.622.
# This is unfortunate, but remember that there's beauty too in imperfection.
contents = contents.replace(b'0x801026c, /* MIDL Version 8.1.620 */',
b'0x801026e, /* MIDL Version 8.1.622 */')
open(filename, 'wb').write(contents)
def overwrite_cls_guid_h(h_file, dynamic_guid):
contents = open(h_file, 'rb').read()
contents = re.sub(br'class DECLSPEC_UUID\("[^"]*"\)',
br'class DECLSPEC_UUID("%s")' % str(dynamic_guid).encode(),
open(h_file, 'wb').write(contents)
def overwrite_cls_guid_iid(iid_file, dynamic_guid):
contents = open(iid_file, 'rb').read()
hexuuid = '0x%08x,0x%04x,0x%04x,' % dynamic_guid.fields[0:3]
# dynamic_guid.bytes is a bytestring in Py3, but a normal string in Py2.
if sys.version_info.major == 2:
hexuuid += ','.join('0x%02x' % ord(b) for b in dynamic_guid.bytes[8:])
hexuuid += ','.join('0x%02x' % b for b in dynamic_guid.bytes[8:])
contents = re.sub(br'MIDL_DEFINE_GUID\(CLSID, ([^,]*),[^)]*\)',
br'MIDL_DEFINE_GUID(CLSID, \1,%s)' % hexuuid.encode(),
open(iid_file, 'wb').write(contents)
def overwrite_cls_guid_tlb(tlb_file, dynamic_guid):
# See ZapTimestamp() for a short overview of the .tlb format. The 1st
# section contains type descriptions, and the first type should be our
# coclass. It points to the type's GUID in section 6, the GUID section.
contents = open(tlb_file, 'rb').read()
assert contents[0:8] == b'MSFT\x02\x00\x01\x00'
ntypes, = struct.unpack_from('<I', contents, 0x20)
type_off, type_len = struct.unpack_from('<II', contents, 0x54 + 4*ntypes)
# contents is a bytestring in Python 3, but a normal string in Py2.
if sys.version_info.major == 2:
coclass = ord(contents[type_off])
coclass = contents[type_off]
assert coclass == 0x25, "expected coclass"
guidind = struct.unpack_from('<I', contents, type_off + 0x2c)[0]
guid_off, guid_len = struct.unpack_from(
'<II', contents, 0x54 + 4*ntypes + 5*16)
assert guidind + 14 <= guid_len
contents = array.array('B', contents)
struct.pack_into('<IHH8s', contents, guid_off + guidind,
*(dynamic_guid.fields[0:3] + (dynamic_guid.bytes[8:],)))
# The GUID is correct now, but there's also a GUID hashtable in section 5.
# Need to recreate that too. Since the hash table uses chaining, it's
# easiest to recompute it from scratch rather than trying to patch it up.
hashtab = [0xffffffff] * (0x80 // 4)
for guidind in range(guid_off, guid_off + guid_len, 24):
guidbytes, typeoff, nextguid = struct.unpack_from(
'<16sII', contents, guidind)
words = struct.unpack('<8H', guidbytes)
# midl seems to use the following simple hash function for GUIDs:
guidhash = reduce(operator.xor, [w for w in words]) % (0x80 // 4)
nextguid = hashtab[guidhash]
struct.pack_into('<I', contents, guidind + 0x14, nextguid)
hashtab[guidhash] = guidind - guid_off
hash_off, hash_len = struct.unpack_from(
'<II', contents, 0x54 + 4*ntypes + 4*16)
for i, hashval in enumerate(hashtab):
struct.pack_into('<I', contents, hash_off + 4*i, hashval)
open(tlb_file, 'wb').write(contents)
def overwrite_cls_guid(h_file, iid_file, tlb_file, dynamic_guid):
# Fix up GUID in .h, _i.c, and .tlb. This currently assumes that there's
# only one coclass in the idl file, and that that's the type with the
# dynamic type.
overwrite_cls_guid_h(h_file, dynamic_guid)
overwrite_cls_guid_iid(iid_file, dynamic_guid)
overwrite_cls_guid_tlb(tlb_file, dynamic_guid)
def main(arch, gendir, outdir, dynamic_guid, tlb, h, dlldata, iid, proxy, idl,
# Copy checked-in outputs to final location.
source = gendir
if os.path.isdir(os.path.join(source, os.path.basename(idl))):
source = os.path.join(source, os.path.basename(idl))
source = os.path.join(source, arch.split('.')[1]) # Append 'x86' or 'x64'.
source = os.path.normpath(source)
distutils.dir_util.copy_tree(source, outdir, preserve_times=False)
if dynamic_guid != 'none':
overwrite_cls_guid(os.path.join(outdir, h),
os.path.join(outdir, iid),
os.path.join(outdir, tlb),
# On non-Windows, that's all we can do.
if sys.platform != 'win32':
return 0
# Skip the verification step on Windows 7 to avoid issues running the
# compiler.
ver = sys.getwindowsversion()
# Impossible version number, but just in case...
if ver.major < 6:
return 0
# Check for before Windows 8.
if ver.major == 6 and ver.minor < 2:
return 0;
# On Windows, run midl.exe on the input and check that its outputs are
# identical to the checked-in outputs (after possibly replacing their main
# class guid).
tmp_dir = tempfile.mkdtemp()
delete_tmp_dir = True
# Read the environment block from the file. This is stored in the format used
# by CreateProcess. Drop last 2 NULs, one for list terminator, one for
# trailing vs. separator.
env_pairs = open(arch).read()[:-2].split('\0')
env_dict = dict([item.split('=', 1) for item in env_pairs])
args = ['midl', '/nologo'] + list(flags) + [
'/out', tmp_dir,
'/tlb', tlb,
'/h', h,
'/dlldata', dlldata,
'/iid', iid,
'/proxy', proxy,
popen = subprocess.Popen(args, shell=True, env=env_dict,
stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
out, _ = popen.communicate()
# Filter junk out of stdout, and write filtered versions. Output we want
# to filter is pairs of lines that look like this:
# Processing C:\Program Files (x86)\Microsoft SDKs\...\include\objidl.idl
# objidl.idl
lines = out.decode('utf-8').splitlines()
prefixes = ('Processing ', '64 bit Processing ')
processing = set(os.path.basename(x)
for x in lines if x.startswith(prefixes))
for line in lines:
if not line.startswith(prefixes) and line not in processing:
if popen.returncode != 0:
return popen.returncode
for f in os.listdir(tmp_dir):
ZapTimestamp(os.path.join(tmp_dir, f))
# Now compare the output in tmp_dir to the copied-over outputs.
diff = filecmp.dircmp(tmp_dir, outdir)
if diff.diff_files:
print('midl.exe output different from files in %s, see %s' % (outdir,
for f in diff.diff_files:
if f.endswith('.tlb'): continue
fromfile = os.path.join(outdir, f)
tofile = os.path.join(tmp_dir, f)
difflib.unified_diff(,, fromfile, tofile)))
delete_tmp_dir = False
print('To rebaseline:')
print(r' copy /y %s\* %s' % (tmp_dir, source))
return 0
if os.path.exists(tmp_dir) and delete_tmp_dir:
if __name__ == '__main__':