pe_summarize.py - chromium/src/tools/win - Git at Google

 # Copyright 2016 The Chromium Authors
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 """
 Parse information about a PE file to summarize the on-disk and
 in-memory sizes of the sections, in decimal MB instead of in hex. This
 script will also automatically display diffs between two files if they
 have the same name. This script relies on having VS 2015 installed and is used
 to help investigate binary size regressions and improvements.

 Section information printed by dumpbin looks like this:

 SECTION HEADER #2
   .rdata name
   5CCD56 virtual size
  1CEF000 virtual address (11CEF000 to 122BBD55)
   5CCE00 size of raw data
  1CEE000 file pointer to raw data (01CEE000 to 022BADFF)
        0 file pointer to relocation table
        0 file pointer to line numbers
        0 number of relocations
        0 number of line numbers
 40000040 flags
          Initialized Data
          Read Only

 The reports generated by this script look like this:

 > python tools\win\pe_summarize.py out\release\chrome.dll
 Size of out\release\chrome.dll is 41.190912 MB
       name:   mem size  ,  disk size
      .text: 33.199959 MB
     .rdata:  6.170416 MB
      .data:  0.713864 MB,  0.270336 MB
       .tls:  0.000025 MB
   CPADinfo:  0.000036 MB
    .rodata:  0.003216 MB
   .crthunk:  0.000064 MB
     .gfids:  0.001052 MB
     _RDATA:  0.000288 MB
      .rsrc:  0.130808 MB
     .reloc:  1.410172 MB

 Note that the .data section has separate in-memory and on-disk sizes due to
 zero-initialized data. Other sections have smaller discrepancies - the disk size
 is only printed if it differs from the memory size by more than 512 bytes.

 Note that many of the sections - such as .text, .rdata, and .rsrc - are shared
 between processes. Some sections - such as .reloc - are discarded after a
 process is loaded. Other sections, such as .data, produce private pages and are
 therefore objectively 'worse' than the others.
 """

 from __future__ import print_function

 import os
 import subprocess
 import sys


 def _FindSection(section_list, section_name):
     for i in range(len(section_list)):
         if section_name == section_list[i][0]:
             return i
     return -1


 def main():
     if len(sys.argv) < 2:
         print(r'Usage: %s PEFileName [OtherPeFileNames...]' % sys.argv[0])
         print(r'Sample: %s chrome.dll' % sys.argv[0])
         print(r'Sample: %s chrome.dll original\chrome.dll' % sys.argv[0])
         return 0

     # Track the name of the last PE (Portable Executable) file to be processed -
     # file name only, without the path.
     last_pe_filepart = ""

     for pe_path in sys.argv[1:]:
         results = []
         if not os.path.exists(pe_path):
             print('%s does not exist!' % pe_path)
             continue

         print('Size of %s is %1.6f MB' %
               (pe_path, os.path.getsize(pe_path) / 1e6))
         print('%10s:  %9s  ,  %9s' % ('name', 'mem size', 'disk size'))

         sections = None
         # Pass the undocumented /nopdb header to avoid hitting symbol servers
         # for the entrypoint name.
         command = 'dumpbin.exe /nopdb /headers "%s"' % pe_path
         try:
             for line in subprocess.check_output(command).decode().splitlines():
                 if line.startswith('SECTION HEADER #'):
                     sections = []
                 elif type(sections) == type([]):
                     # We must be processing a section header.
                     sections.append(line.strip())
                     # When we've accumulated four lines of data, process them.
                     if len(sections) == 4:
                         name, memory_size, _, disk_size = sections
                         assert name.count('name') == 1
                         assert memory_size.count('virtual size') == 1
                         assert disk_size.count('size of raw data') == 1
                         name = name.split()[0]
                         memory_size = int(memory_size.split()[0], 16)
                         disk_size = int(disk_size.split()[0], 16)
                         # Print the sizes in decimal MB. This makes large
                         # numbers easier to understand - 33.199959 is easier to
                         # read than 33199959. Decimal MB is used to allow simple
                         # conversions to a precise number of bytes.
                         if abs(memory_size - disk_size) < 512:
                             print('%10s: %9.6f MB' % (name, memory_size / 1e6))
                         else:
                             print('%10s: %9.6f MB, %9.6f MB' %
                                   (name, memory_size / 1e6, disk_size / 1e6))
                         results.append((name, memory_size))
                         sections = None
         except WindowsError as error:
             if error.winerror == 2:
                 print(
                     r'Cannot find dumpbin. Run "C:\Program Files\Microsoft '
                     r'Visual Studio\2022\Professional\VC\Auxiliary\Build'
                     r'\vcvarsall.bat amd64" or similar to add dumpbin to the '
                     r'path.')
             else:
                 print(error)
             break

         print()
         pe_filepart = os.path.split(pe_path)[1]
         if pe_filepart.lower() == last_pe_filepart.lower():
             # Print out the section-by-section size changes, for memory sizes
             # only.
             print('Memory size change from %s to %s' % (last_pe_path, pe_path))
             total_delta = 0
             for i in range(len(results)):
                 section_name = results[i][0]
                 # Find a matching section name. Mismatches can occur when
                 # comparing 32-bit and 64-bit binaries. They can also occur when
                 # one of the binaries pulls in code that defines custom sections
                 # such as .rodata.
                 last_i = _FindSection(last_results, section_name)
                 delta = results[i][1]
                 if last_i >= 0:
                     delta -= last_results[last_i][1]
                 total_delta += delta
                 if delta:
                     print('%12s: %7d bytes change' % (section_name, delta))
             for last_i in range(len(last_results)):
                 section_name = last_results[last_i][0]
                 # Find sections that exist only in last_results.
                 i = _FindSection(results, section_name)
                 if i < 0:
                     delta = -last_results[last_i][1]
                     total_delta += delta
                     print('%12s: %7d bytes change' % (section_name, delta))
             print('Total change: %7d bytes' % total_delta)
         last_pe_filepart = pe_filepart
         last_pe_path = pe_path
         last_results = results


 if __name__ == '__main__':
     sys.exit(main())
	# Copyright 2016 The Chromium Authors
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.
	"""
	Parse information about a PE file to summarize the on-disk and
	in-memory sizes of the sections, in decimal MB instead of in hex. This
	script will also automatically display diffs between two files if they
	have the same name. This script relies on having VS 2015 installed and is used
	to help investigate binary size regressions and improvements.

	Section information printed by dumpbin looks like this:

	SECTION HEADER #2
	.rdata name
	5CCD56 virtual size
	1CEF000 virtual address (11CEF000 to 122BBD55)
	5CCE00 size of raw data
	1CEE000 file pointer to raw data (01CEE000 to 022BADFF)
	0 file pointer to relocation table
	0 file pointer to line numbers
	0 number of relocations
	0 number of line numbers
	40000040 flags
	Initialized Data
	Read Only

	The reports generated by this script look like this:

	> python tools\win\pe_summarize.py out\release\chrome.dll
	Size of out\release\chrome.dll is 41.190912 MB
	name: mem size , disk size
	.text: 33.199959 MB
	.rdata: 6.170416 MB
	.data: 0.713864 MB, 0.270336 MB
	.tls: 0.000025 MB
	CPADinfo: 0.000036 MB
	.rodata: 0.003216 MB
	.crthunk: 0.000064 MB
	.gfids: 0.001052 MB
	_RDATA: 0.000288 MB
	.rsrc: 0.130808 MB
	.reloc: 1.410172 MB

	Note that the .data section has separate in-memory and on-disk sizes due to
	zero-initialized data. Other sections have smaller discrepancies - the disk size
	is only printed if it differs from the memory size by more than 512 bytes.

	Note that many of the sections - such as .text, .rdata, and .rsrc - are shared
	between processes. Some sections - such as .reloc - are discarded after a
	process is loaded. Other sections, such as .data, produce private pages and are
	therefore objectively 'worse' than the others.
	"""

	from __future__ import print_function

	import os
	import subprocess
	import sys


	def _FindSection(section_list, section_name):
	for i in range(len(section_list)):
	if section_name == section_list[i][0]:
	return i
	return -1


	def main():
	if len(sys.argv) < 2:
	print(r'Usage: %s PEFileName [OtherPeFileNames...]' % sys.argv[0])
	print(r'Sample: %s chrome.dll' % sys.argv[0])
	print(r'Sample: %s chrome.dll original\chrome.dll' % sys.argv[0])
	return 0

	# Track the name of the last PE (Portable Executable) file to be processed -
	# file name only, without the path.
	last_pe_filepart = ""

	for pe_path in sys.argv[1:]:
	results = []
	if not os.path.exists(pe_path):
	print('%s does not exist!' % pe_path)
	continue

	print('Size of %s is %1.6f MB' %
	(pe_path, os.path.getsize(pe_path) / 1e6))
	print('%10s: %9s , %9s' % ('name', 'mem size', 'disk size'))

	sections = None
	# Pass the undocumented /nopdb header to avoid hitting symbol servers
	# for the entrypoint name.
	command = 'dumpbin.exe /nopdb /headers "%s"' % pe_path
	try:
	for line in subprocess.check_output(command).decode().splitlines():
	if line.startswith('SECTION HEADER #'):
	sections = []
	elif type(sections) == type([]):
	# We must be processing a section header.
	sections.append(line.strip())
	# When we've accumulated four lines of data, process them.
	if len(sections) == 4:
	name, memory_size, _, disk_size = sections
	assert name.count('name') == 1
	assert memory_size.count('virtual size') == 1
	assert disk_size.count('size of raw data') == 1
	name = name.split()[0]
	memory_size = int(memory_size.split()[0], 16)
	disk_size = int(disk_size.split()[0], 16)
	# Print the sizes in decimal MB. This makes large
	# numbers easier to understand - 33.199959 is easier to
	# read than 33199959. Decimal MB is used to allow simple
	# conversions to a precise number of bytes.
	if abs(memory_size - disk_size) < 512:
	print('%10s: %9.6f MB' % (name, memory_size / 1e6))
	else:
	print('%10s: %9.6f MB, %9.6f MB' %
	(name, memory_size / 1e6, disk_size / 1e6))
	results.append((name, memory_size))
	sections = None
	except WindowsError as error:
	if error.winerror == 2:
	print(
	r'Cannot find dumpbin. Run "C:\Program Files\Microsoft '
	r'Visual Studio\2022\Professional\VC\Auxiliary\Build'
	r'\vcvarsall.bat amd64" or similar to add dumpbin to the '
	r'path.')
	else:
	print(error)
	break

	print()
	pe_filepart = os.path.split(pe_path)[1]
	if pe_filepart.lower() == last_pe_filepart.lower():
	# Print out the section-by-section size changes, for memory sizes
	# only.
	print('Memory size change from %s to %s' % (last_pe_path, pe_path))
	total_delta = 0
	for i in range(len(results)):
	section_name = results[i][0]
	# Find a matching section name. Mismatches can occur when
	# comparing 32-bit and 64-bit binaries. They can also occur when
	# one of the binaries pulls in code that defines custom sections
	# such as .rodata.
	last_i = _FindSection(last_results, section_name)
	delta = results[i][1]
	if last_i >= 0:
	delta -= last_results[last_i][1]
	total_delta += delta
	if delta:
	print('%12s: %7d bytes change' % (section_name, delta))
	for last_i in range(len(last_results)):
	section_name = last_results[last_i][0]
	# Find sections that exist only in last_results.
	i = _FindSection(results, section_name)
	if i < 0:
	delta = -last_results[last_i][1]
	total_delta += delta
	print('%12s: %7d bytes change' % (section_name, delta))
	print('Total change: %7d bytes' % total_delta)
	last_pe_filepart = pe_filepart
	last_pe_path = pe_path
	last_results = results


	if __name__ == '__main__':
	sys.exit(main())