tools/win/pe_summarize.py - chromium/src - Git at Google

 # Copyright (c) 2016 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 """
 Parse information about a PE file to summarize the on-disk and
 in-memory sizes of the sections, in decimal MB instead of in hex. This
 script will also automatically display diffs between two files if they
 have the same name. This script relies on having VS 2015 installed and is used
 to help investigate binary size regressions and improvements.

 Section information printed by dumpbin looks like this:

 SECTION HEADER #2
   .rdata name
   5CCD56 virtual size
  1CEF000 virtual address (11CEF000 to 122BBD55)
   5CCE00 size of raw data
  1CEE000 file pointer to raw data (01CEE000 to 022BADFF)
        0 file pointer to relocation table
        0 file pointer to line numbers
        0 number of relocations
        0 number of line numbers
 40000040 flags
          Initialized Data
          Read Only

 The reports generated by this script look like this:

 > python tools\win\pe_summarize.py out\release\chrome.dll
 Size of out\release\chrome.dll is 41.190912 MB
       name:   mem size  ,  disk size
      .text: 33.199959 MB
     .rdata:  6.170416 MB
      .data:  0.713864 MB,  0.270336 MB
       .tls:  0.000025 MB
   CPADinfo:  0.000036 MB
    .rodata:  0.003216 MB
   .crthunk:  0.000064 MB
     .gfids:  0.001052 MB
     _RDATA:  0.000288 MB
      .rsrc:  0.130808 MB
     .reloc:  1.410172 MB

 Note that the .data section has separate in-memory and on-disk sizes due to
 zero-initialized data. Other sections have smaller discrepancies - the disk size
 is only printed if it differs from the memory size by more than 512 bytes.

 Note that many of the sections - such as .text, .rdata, and .rsrc - are shared
 between processes. Some sections - such as .reloc - are discarded after a
 process is loaded. Other sections, such as .data, produce private pages and are
 therefore objectively 'worse' than the others.
 """

 import os
 import subprocess
 import sys


 def _FindSection(section_list, section_name):
   for i in range(len(section_list)):
     if section_name == section_list[i][0]:
       return i
   return -1


 def main():
   if len(sys.argv) < 2:
     print r'Usage: %s PEFileName [OtherPeFileNames...]' % sys.argv[0]
     print r'Sample: %s chrome.dll' % sys.argv[0]
     print r'Sample: %s chrome.dll original\chrome.dll' % sys.argv[0]
     return 0

   # Add to the path so that dumpbin can run.
   vs_dir = r'C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\bin\amd64'
   if not os.path.exists(os.path.join(vs_dir, 'dumpbin.exe')):
     print "Couldn't find dumpbin.exe. Visual Studio 2015 must be installed."
     return 0
   os.environ['PATH'] = vs_dir + ';' + os.environ["PATH"]

   # Track the name of the last PE (Portable Executable) file to be processed -
   # file name only, without the path.
   last_pe_filepart = ""

   for pe_path in sys.argv[1:]:
     results = []
     if not os.path.exists(pe_path):
       print '%s does not exist!' % pe_path
       continue

     print 'Size of %s is %1.6f MB' % (pe_path, os.path.getsize(pe_path) / 1e6)
     print '%10s:  %9s  ,  %9s' % ('name', 'mem size', 'disk size')

     sections = None
     command = 'dumpbin.exe /headers "%s"' % pe_path
     for line in subprocess.check_output(command).splitlines():
       if line.startswith('SECTION HEADER #'):
         sections = []
       elif type(sections) == type([]):
         # We must be processing a section header.
         sections.append(line.strip())
         # When we've accumulated four lines of data, process them.
         if len(sections) == 4:
           name, memory_size, _, disk_size = sections
           assert name.count('name') == 1
           assert memory_size.count('virtual size') == 1
           assert disk_size.count('size of raw data') == 1
           name = name.split()[0]
           memory_size = int(memory_size.split()[0], 16)
           disk_size = int(disk_size.split()[0], 16)
           # Print the sizes in decimal MB. This makes large numbers easier to
           # understand - 33.199959 is easier to read than 33199959. Decimal MB
           # is used to allow simple conversions to a precise number of bytes.
           if abs(memory_size - disk_size) < 512:
             print '%10s: %9.6f MB' % (name, memory_size / 1e6)
           else:
             print '%10s: %9.6f MB, %9.6f MB' % (name, memory_size / 1e6,
                                                 disk_size / 1e6)
           results.append((name, memory_size))
           sections = None

     print
     pe_filepart = os.path.split(pe_path)[1]
     if pe_filepart.lower() == last_pe_filepart.lower():
       # Print out the section-by-section size changes, for memory sizes only.
       print 'Memory size change from %s to %s' % (last_pe_path, pe_path)
       total_delta = 0
       for i in range(len(results)):
         section_name = results[i][0]
         # Find a matching section name. Mismatches can occur when comparing
         # 32-bit and 64-bit binaries. They can also occur when one of the
         # binaries pulls in code that defines custom sections such as .rodata.
         last_i = _FindSection(last_results, section_name)
         delta = results[i][1]
         if last_i >= 0:
           delta -= last_results[last_i][1]
         total_delta += delta
         if delta:
           print '%12s: %7d bytes change' % (section_name, delta)
       for last_i in range(len(last_results)):
         section_name = last_results[last_i][0]
         # Find sections that exist only in last_results.
         i = _FindSection(results, section_name)
         if i < 0:
           delta = -last_results[last_i][1]
           total_delta += delta
           print '%12s: %7d bytes change' % (section_name, delta)
       print 'Total change: %7d bytes' % total_delta
     last_pe_filepart = pe_filepart
     last_pe_path = pe_path
     last_results = results


 if __name__ == '__main__':
   sys.exit(main())
	# Copyright (c) 2016 The Chromium Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	"""
	Parse information about a PE file to summarize the on-disk and
	in-memory sizes of the sections, in decimal MB instead of in hex. This
	script will also automatically display diffs between two files if they
	have the same name. This script relies on having VS 2015 installed and is used
	to help investigate binary size regressions and improvements.

	Section information printed by dumpbin looks like this:

	SECTION HEADER #2
	.rdata name
	5CCD56 virtual size
	1CEF000 virtual address (11CEF000 to 122BBD55)
	5CCE00 size of raw data
	1CEE000 file pointer to raw data (01CEE000 to 022BADFF)
	0 file pointer to relocation table
	0 file pointer to line numbers
	0 number of relocations
	0 number of line numbers
	40000040 flags
	Initialized Data
	Read Only

	The reports generated by this script look like this:

	> python tools\win\pe_summarize.py out\release\chrome.dll
	Size of out\release\chrome.dll is 41.190912 MB
	name: mem size , disk size
	.text: 33.199959 MB
	.rdata: 6.170416 MB
	.data: 0.713864 MB, 0.270336 MB
	.tls: 0.000025 MB
	CPADinfo: 0.000036 MB
	.rodata: 0.003216 MB
	.crthunk: 0.000064 MB
	.gfids: 0.001052 MB
	_RDATA: 0.000288 MB
	.rsrc: 0.130808 MB
	.reloc: 1.410172 MB

	Note that the .data section has separate in-memory and on-disk sizes due to
	zero-initialized data. Other sections have smaller discrepancies - the disk size
	is only printed if it differs from the memory size by more than 512 bytes.

	Note that many of the sections - such as .text, .rdata, and .rsrc - are shared
	between processes. Some sections - such as .reloc - are discarded after a
	process is loaded. Other sections, such as .data, produce private pages and are
	therefore objectively 'worse' than the others.
	"""

	import os
	import subprocess
	import sys


	def _FindSection(section_list, section_name):
	for i in range(len(section_list)):
	if section_name == section_list[i][0]:
	return i
	return -1


	def main():
	if len(sys.argv) < 2:
	print r'Usage: %s PEFileName [OtherPeFileNames...]' % sys.argv[0]
	print r'Sample: %s chrome.dll' % sys.argv[0]
	print r'Sample: %s chrome.dll original\chrome.dll' % sys.argv[0]
	return 0

	# Add to the path so that dumpbin can run.
	vs_dir = r'C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\bin\amd64'
	if not os.path.exists(os.path.join(vs_dir, 'dumpbin.exe')):
	print "Couldn't find dumpbin.exe. Visual Studio 2015 must be installed."
	return 0
	os.environ['PATH'] = vs_dir + ';' + os.environ["PATH"]

	# Track the name of the last PE (Portable Executable) file to be processed -
	# file name only, without the path.
	last_pe_filepart = ""

	for pe_path in sys.argv[1:]:
	results = []
	if not os.path.exists(pe_path):
	print '%s does not exist!' % pe_path
	continue

	print 'Size of %s is %1.6f MB' % (pe_path, os.path.getsize(pe_path) / 1e6)
	print '%10s: %9s , %9s' % ('name', 'mem size', 'disk size')

	sections = None
	command = 'dumpbin.exe /headers "%s"' % pe_path
	for line in subprocess.check_output(command).splitlines():
	if line.startswith('SECTION HEADER #'):
	sections = []
	elif type(sections) == type([]):
	# We must be processing a section header.
	sections.append(line.strip())
	# When we've accumulated four lines of data, process them.
	if len(sections) == 4:
	name, memory_size, _, disk_size = sections
	assert name.count('name') == 1
	assert memory_size.count('virtual size') == 1
	assert disk_size.count('size of raw data') == 1
	name = name.split()[0]
	memory_size = int(memory_size.split()[0], 16)
	disk_size = int(disk_size.split()[0], 16)
	# Print the sizes in decimal MB. This makes large numbers easier to
	# understand - 33.199959 is easier to read than 33199959. Decimal MB
	# is used to allow simple conversions to a precise number of bytes.
	if abs(memory_size - disk_size) < 512:
	print '%10s: %9.6f MB' % (name, memory_size / 1e6)
	else:
	print '%10s: %9.6f MB, %9.6f MB' % (name, memory_size / 1e6,
	disk_size / 1e6)
	results.append((name, memory_size))
	sections = None

	print
	pe_filepart = os.path.split(pe_path)[1]
	if pe_filepart.lower() == last_pe_filepart.lower():
	# Print out the section-by-section size changes, for memory sizes only.
	print 'Memory size change from %s to %s' % (last_pe_path, pe_path)
	total_delta = 0
	for i in range(len(results)):
	section_name = results[i][0]
	# Find a matching section name. Mismatches can occur when comparing
	# 32-bit and 64-bit binaries. They can also occur when one of the
	# binaries pulls in code that defines custom sections such as .rodata.
	last_i = _FindSection(last_results, section_name)
	delta = results[i][1]
	if last_i >= 0:
	delta -= last_results[last_i][1]
	total_delta += delta
	if delta:
	print '%12s: %7d bytes change' % (section_name, delta)
	for last_i in range(len(last_results)):
	section_name = last_results[last_i][0]
	# Find sections that exist only in last_results.
	i = _FindSection(results, section_name)
	if i < 0:
	delta = -last_results[last_i][1]
	total_delta += delta
	print '%12s: %7d bytes change' % (section_name, delta)
	print 'Total change: %7d bytes' % total_delta
	last_pe_filepart = pe_filepart
	last_pe_path = pe_path
	last_results = results


	if __name__ == '__main__':
	sys.exit(main())