src/tools/binary_size/map2size.py - external/github.com/google/proto-quic - Git at Google

 #!/usr/bin/env python
 # Copyright 2017 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 """Main Python API for analyzing binary size."""

 import argparse
 import datetime
 import distutils.spawn
 import gzip
 import logging
 import os
 import re
 import subprocess
 import sys

 import describe
 import file_format
 import function_signature
 import helpers
 import linker_map_parser
 import models
 import ninja_parser


 def _OpenMaybeGz(path, mode=None):
   """Calls `gzip.open()` if |path| ends in ".gz", otherwise calls `open()`."""
   if path.endswith('.gz'):
     if mode and 'w' in mode:
       return gzip.GzipFile(path, mode, 1)
     return gzip.open(path, mode)
   return open(path, mode or 'r')


 def _UnmangleRemainingSymbols(symbol_group, tool_prefix):
   """Uses c++filt to unmangle any symbols that need it."""
   to_process = [s for s in symbol_group if s.name.startswith('_Z')]
   if not to_process:
     return

   logging.info('Unmangling %d names', len(to_process))
   proc = subprocess.Popen([tool_prefix + 'c++filt'], stdin=subprocess.PIPE,
                           stdout=subprocess.PIPE)
   stdout = proc.communicate('\n'.join(s.name for s in to_process))[0]
   assert proc.returncode == 0

   for i, line in enumerate(stdout.splitlines()):
     to_process[i].name = line


 def _NormalizeNames(symbol_group):
   """Ensures that all names are formatted in a useful way.

   This includes:
     - Assigning of |full_name|.
     - Stripping of return types in |full_name| and |name| (for functions).
     - Stripping parameters from |name|.
     - Moving "vtable for" and the like to be suffixes rather than prefixes.
   """
   found_prefixes = set()
   for symbol in symbol_group:
     if symbol.name.startswith('*'):
       # See comment in _RemoveDuplicatesAndCalculatePadding() about when this
       # can happen.
       continue

     # E.g.: vtable for FOO
     idx = symbol.name.find(' for ', 0, 30)
     if idx != -1:
       found_prefixes.add(symbol.name[:idx + 4])
       symbol.name = symbol.name[idx + 5:] + ' [' + symbol.name[:idx] + ']'

     # E.g.: virtual thunk to FOO
     idx = symbol.name.find(' to ', 0, 30)
     if idx != -1:
       found_prefixes.add(symbol.name[:idx + 3])
       symbol.name = symbol.name[idx + 4:] + ' [' + symbol.name[:idx] + ']'

     # Strip out return type, and identify where parameter list starts.
     if symbol.section == 't':
       symbol.full_name, symbol.name = function_signature.Parse(symbol.name)

     # Remove anonymous namespaces (they just harm clustering).
     non_anonymous = symbol.name.replace('(anonymous namespace)::', '')
     if symbol.name != non_anonymous:
       symbol.is_anonymous = True
       symbol.name = non_anonymous
       symbol.full_name = symbol.full_name.replace(
           '(anonymous namespace)::', '')

     if symbol.section != 't' and '(' in symbol.name:
       # Pretty rare. Example:
       # blink::CSSValueKeywordsHash::findValueImpl(char const*)::value_word_list
       symbol.full_name = symbol.name
       symbol.name = re.sub(r'\(.*\)', '', symbol.full_name)

   logging.debug('Found name prefixes of: %r', found_prefixes)


 def _NormalizeObjectPaths(symbol_group):
   """Ensures that all paths are formatted in a useful way."""
   for symbol in symbol_group:
     path = symbol.object_path
     if path.startswith('obj/'):
       # Convert obj/third_party/... -> third_party/...
       path = path[4:]
     elif path.startswith('../../'):
       # Convert ../../third_party/... -> third_party/...
       path = path[6:]
     if path.endswith(')'):
       # Convert foo/bar.a(baz.o) -> foo/bar.a/(baz.o)
       start_idx = path.index('(')
       path = os.path.join(path[:start_idx], path[start_idx:])
     symbol.object_path = path


 def _NormalizeSourcePath(path):
   if path.startswith('gen/'):
     # Convert gen/third_party/... -> third_party/...
     return path[4:]
   if path.startswith('../../'):
     # Convert ../../third_party/... -> third_party/...
     return path[6:]
   return path


 def _ExtractSourcePaths(symbol_group, output_directory):
   """Fills in the .source_path attribute of all symbols."""
   mapper = ninja_parser.SourceFileMapper(output_directory)

   for symbol in symbol_group:
     object_path = symbol.object_path
     if symbol.source_path or not object_path:
       continue
     # We don't have source info for prebuilt .a files.
     if not object_path.startswith('..'):
       source_path = mapper.FindSourceForPath(object_path)
       if source_path:
         symbol.source_path = _NormalizeSourcePath(source_path)
       else:
         logging.warning('Could not find source path for %s', object_path)
   logging.debug('Parsed %d .ninja files.', mapper.GetParsedFileCount())


 def _RemoveDuplicatesAndCalculatePadding(symbol_group):
   """Removes symbols at the same address and calculates the |padding| field.

   Symbols must already be sorted by |address|.
   """
   to_remove = set()
   all_symbols = symbol_group.symbols
   for i, symbol in enumerate(all_symbols[1:]):
     prev_symbol = all_symbols[i]
     if prev_symbol.section_name != symbol.section_name:
       continue
     if symbol.address > 0 and prev_symbol.address > 0:
       # Fold symbols that are at the same address (happens in nm output).
       if symbol.address == prev_symbol.address:
         symbol.size = max(prev_symbol.size, symbol.size)
         to_remove.add(i + 1)
         continue
       # Even with symbols at the same address removed, overlaps can still
       # happen. In this case, padding will be negative (and this is fine).
       padding = symbol.address - prev_symbol.end_address
       # These thresholds were found by manually auditing arm32 Chrome.
       # E.g.: Set them to 0 and see what warnings get logged.
       # TODO(agrieve): See if these thresholds make sense for architectures
       #     other than arm32.
       if (symbol.section in 'rd' and padding >= 256 or
           symbol.section in 't' and padding >= 64):
         # For nm data, this is caused by data that has no associated symbol.
         # The linker map file lists them with no name, but with a file.
         # Example:
         #   .data 0x02d42764 0x120 .../V8SharedWorkerGlobalScope.o
         # Where as most look like:
         #   .data.MANGLED_NAME...
         logging.debug('Large padding of %d between:\n  A) %r\n  B) %r' % (
                       padding, prev_symbol, symbol))
         continue
       symbol.padding = padding
       symbol.size += padding
       assert symbol.size >= 0, 'Symbol has negative size: ' + (
           '%r\nprev symbol: %r' % (symbol, prev_symbol))
   # Map files have no overlaps, so worth special-casing the no-op case.
   if to_remove:
     logging.info('Removing %d overlapping symbols', len(to_remove))
     symbol_group.symbols = (
         [s for i, s in enumerate(all_symbols) if i not in to_remove])


 def AddOptions(parser):
   parser.add_argument('--tool-prefix', default='',
                       help='Path prefix for c++filt.')
   parser.add_argument('--output-directory',
                       help='Path to the root build directory.')


 def _DetectToolPrefix(tool_prefix, input_file, output_directory=None):
   """Detects values for --tool-prefix and --output-directory."""
   if not output_directory:
     abs_path = os.path.abspath(input_file)
     release_idx = abs_path.find('Release')
     if release_idx != -1:
       output_directory = abs_path[:release_idx] + 'Release'
       output_directory = os.path.relpath(abs_path[:release_idx] + '/Release')
       logging.debug('Detected --output-directory=%s', output_directory)

   if not tool_prefix and output_directory:
     # Auto-detect from build_vars.txt
     build_vars_path = os.path.join(output_directory, 'build_vars.txt')
     if os.path.exists(build_vars_path):
       with open(build_vars_path) as f:
         build_vars = dict(l.rstrip().split('=', 1) for l in f if '=' in l)
       logging.debug('Found --tool-prefix from build_vars.txt')
       tool_prefix = os.path.join(output_directory,
                                  build_vars['android_tool_prefix'])

   if os.path.sep not in tool_prefix:
     full_path = distutils.spawn.find_executable(tool_prefix + 'c++filt')
   else:
     full_path = tool_prefix + 'c++filt'

   if not full_path or not os.path.isfile(full_path):
     raise Exception('Bad --tool-prefix. Path not found: %s' % full_path)
   if not output_directory or not os.path.isdir(output_directory):
     raise Exception('Bad --output-directory. Path not found: %s' %
                     output_directory)
   logging.info('Using --output-directory=%s', output_directory)
   logging.info('Using --tool-prefix=%s', tool_prefix)
   return output_directory, tool_prefix


 def AnalyzeWithArgs(args, input_path):
   return Analyze(input_path, args.output_directory, args.tool_prefix)


 def Analyze(path, output_directory=None, tool_prefix=''):
   if path.endswith('.size'):
     logging.debug('Loading results from: %s', path)
     size_info = file_format.LoadSizeInfo(path)
     # Recompute derived values (padding and function names).
     logging.info('Calculating padding')
     _RemoveDuplicatesAndCalculatePadding(size_info.symbols)
     logging.info('Deriving signatures')
     # Re-parse out function parameters.
     _NormalizeNames(size_info.symbols)
     return size_info
   elif not path.endswith('.map') and not path.endswith('.map.gz'):
     raise Exception('Expected input to be a .map or a .size')
   else:
     # Verify tool_prefix early.
     output_directory, tool_prefix = (
         _DetectToolPrefix(tool_prefix, path, output_directory))

     with _OpenMaybeGz(path) as map_file:
       section_sizes, symbols = linker_map_parser.MapFileParser().Parse(map_file)
     timestamp = datetime.datetime.utcfromtimestamp(os.path.getmtime(path))
     size_info = models.SizeInfo(section_sizes, models.SymbolGroup(symbols),
                                 timestamp=timestamp)

     # Map file for some reason doesn't unmangle all names.
     logging.info('Calculating padding')
     _RemoveDuplicatesAndCalculatePadding(size_info.symbols)
     # Unmangle prints its own log statement.
     _UnmangleRemainingSymbols(size_info.symbols, tool_prefix)
     logging.info('Extracting source paths from .ninja files')
     _ExtractSourcePaths(size_info.symbols, output_directory)
     # Resolve paths prints its own log statement.
     logging.info('Normalizing names')
     _NormalizeNames(size_info.symbols)
     logging.info('Normalizing paths')
     _NormalizeObjectPaths(size_info.symbols)

   if logging.getLogger().isEnabledFor(logging.INFO):
     for line in describe.DescribeSizeInfoCoverage(size_info):
       logging.info(line)
   logging.info('Finished analyzing %d symbols', len(size_info.symbols))
   return size_info


 def _DetectGitRevision(path):
   try:
     git_rev = subprocess.check_output(
         ['git', '-C', os.path.dirname(path), 'rev-parse', 'HEAD'])
     return git_rev.rstrip()
   except Exception:
     logging.warning('Failed to detect git revision for file metadata.')
     return None


 def main(argv):
   parser = argparse.ArgumentParser(argv)
   parser.add_argument('input_file', help='Path to input .map file.')
   parser.add_argument('output_file', help='Path to output .size(.gz) file.')
   AddOptions(parser)
   args = helpers.AddCommonOptionsAndParseArgs(parser, argv)
   if not args.output_file.endswith('.size'):
     parser.error('output_file must end with .size')

   size_info = AnalyzeWithArgs(args, args.input_file)
   if not args.input_file.endswith('.size'):
     git_rev = _DetectGitRevision(args.input_file)
     size_info.tag = 'Filename=%s git_rev=%s' % (
         os.path.basename(args.input_file), git_rev)
   logging.info('Recording metadata: %s',
                describe.DescribeSizeInfoMetadata(size_info))
   logging.info('Saving result to %s', args.output_file)
   file_format.SaveSizeInfo(size_info, args.output_file)

   logging.info('Done')


 if __name__ == '__main__':
   sys.exit(main(sys.argv))
	#!/usr/bin/env python
	# Copyright 2017 The Chromium Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	"""Main Python API for analyzing binary size."""

	import argparse
	import datetime
	import distutils.spawn
	import gzip
	import logging
	import os
	import re
	import subprocess
	import sys

	import describe
	import file_format
	import function_signature
	import helpers
	import linker_map_parser
	import models
	import ninja_parser


	def _OpenMaybeGz(path, mode=None):
	"""Calls `gzip.open()` if \|path\| ends in ".gz", otherwise calls `open()`."""
	if path.endswith('.gz'):
	if mode and 'w' in mode:
	return gzip.GzipFile(path, mode, 1)
	return gzip.open(path, mode)
	return open(path, mode or 'r')


	def _UnmangleRemainingSymbols(symbol_group, tool_prefix):
	"""Uses c++filt to unmangle any symbols that need it."""
	to_process = [s for s in symbol_group if s.name.startswith('_Z')]
	if not to_process:
	return

	logging.info('Unmangling %d names', len(to_process))
	proc = subprocess.Popen([tool_prefix + 'c++filt'], stdin=subprocess.PIPE,
	stdout=subprocess.PIPE)
	stdout = proc.communicate('\n'.join(s.name for s in to_process))[0]
	assert proc.returncode == 0

	for i, line in enumerate(stdout.splitlines()):
	to_process[i].name = line


	def _NormalizeNames(symbol_group):
	"""Ensures that all names are formatted in a useful way.

	This includes:
	- Assigning of \|full_name\|.
	- Stripping of return types in \|full_name\| and \|name\| (for functions).
	- Stripping parameters from \|name\|.
	- Moving "vtable for" and the like to be suffixes rather than prefixes.
	"""
	found_prefixes = set()
	for symbol in symbol_group:
	if symbol.name.startswith('*'):
	# See comment in _RemoveDuplicatesAndCalculatePadding() about when this
	# can happen.
	continue

	# E.g.: vtable for FOO
	idx = symbol.name.find(' for ', 0, 30)
	if idx != -1:
	found_prefixes.add(symbol.name[:idx + 4])
	symbol.name = symbol.name[idx + 5:] + ' [' + symbol.name[:idx] + ']'

	# E.g.: virtual thunk to FOO
	idx = symbol.name.find(' to ', 0, 30)
	if idx != -1:
	found_prefixes.add(symbol.name[:idx + 3])
	symbol.name = symbol.name[idx + 4:] + ' [' + symbol.name[:idx] + ']'

	# Strip out return type, and identify where parameter list starts.
	if symbol.section == 't':
	symbol.full_name, symbol.name = function_signature.Parse(symbol.name)

	# Remove anonymous namespaces (they just harm clustering).
	non_anonymous = symbol.name.replace('(anonymous namespace)::', '')
	if symbol.name != non_anonymous:
	symbol.is_anonymous = True
	symbol.name = non_anonymous
	symbol.full_name = symbol.full_name.replace(
	'(anonymous namespace)::', '')

	if symbol.section != 't' and '(' in symbol.name:
	# Pretty rare. Example:
	# blink::CSSValueKeywordsHash::findValueImpl(char const*)::value_word_list
	symbol.full_name = symbol.name
	symbol.name = re.sub(r'\(.*\)', '', symbol.full_name)

	logging.debug('Found name prefixes of: %r', found_prefixes)


	def _NormalizeObjectPaths(symbol_group):
	"""Ensures that all paths are formatted in a useful way."""
	for symbol in symbol_group:
	path = symbol.object_path
	if path.startswith('obj/'):
	# Convert obj/third_party/... -> third_party/...
	path = path[4:]
	elif path.startswith('../../'):
	# Convert ../../third_party/... -> third_party/...
	path = path[6:]
	if path.endswith(')'):
	# Convert foo/bar.a(baz.o) -> foo/bar.a/(baz.o)
	start_idx = path.index('(')
	path = os.path.join(path[:start_idx], path[start_idx:])
	symbol.object_path = path


	def _NormalizeSourcePath(path):
	if path.startswith('gen/'):
	# Convert gen/third_party/... -> third_party/...
	return path[4:]
	if path.startswith('../../'):
	# Convert ../../third_party/... -> third_party/...
	return path[6:]
	return path


	def _ExtractSourcePaths(symbol_group, output_directory):
	"""Fills in the .source_path attribute of all symbols."""
	mapper = ninja_parser.SourceFileMapper(output_directory)

	for symbol in symbol_group:
	object_path = symbol.object_path
	if symbol.source_path or not object_path:
	continue
	# We don't have source info for prebuilt .a files.
	if not object_path.startswith('..'):
	source_path = mapper.FindSourceForPath(object_path)
	if source_path:
	symbol.source_path = _NormalizeSourcePath(source_path)
	else:
	logging.warning('Could not find source path for %s', object_path)
	logging.debug('Parsed %d .ninja files.', mapper.GetParsedFileCount())


	def _RemoveDuplicatesAndCalculatePadding(symbol_group):
	"""Removes symbols at the same address and calculates the \|padding\| field.

	Symbols must already be sorted by \|address\|.
	"""
	to_remove = set()
	all_symbols = symbol_group.symbols
	for i, symbol in enumerate(all_symbols[1:]):
	prev_symbol = all_symbols[i]
	if prev_symbol.section_name != symbol.section_name:
	continue
	if symbol.address > 0 and prev_symbol.address > 0:
	# Fold symbols that are at the same address (happens in nm output).
	if symbol.address == prev_symbol.address:
	symbol.size = max(prev_symbol.size, symbol.size)
	to_remove.add(i + 1)
	continue
	# Even with symbols at the same address removed, overlaps can still
	# happen. In this case, padding will be negative (and this is fine).
	padding = symbol.address - prev_symbol.end_address
	# These thresholds were found by manually auditing arm32 Chrome.
	# E.g.: Set them to 0 and see what warnings get logged.
	# TODO(agrieve): See if these thresholds make sense for architectures
	# other than arm32.
	if (symbol.section in 'rd' and padding >= 256 or
	symbol.section in 't' and padding >= 64):
	# For nm data, this is caused by data that has no associated symbol.
	# The linker map file lists them with no name, but with a file.
	# Example:
	# .data 0x02d42764 0x120 .../V8SharedWorkerGlobalScope.o
	# Where as most look like:
	# .data.MANGLED_NAME...
	logging.debug('Large padding of %d between:\n A) %r\n B) %r' % (
	padding, prev_symbol, symbol))
	continue
	symbol.padding = padding
	symbol.size += padding
	assert symbol.size >= 0, 'Symbol has negative size: ' + (
	'%r\nprev symbol: %r' % (symbol, prev_symbol))
	# Map files have no overlaps, so worth special-casing the no-op case.
	if to_remove:
	logging.info('Removing %d overlapping symbols', len(to_remove))
	symbol_group.symbols = (
	[s for i, s in enumerate(all_symbols) if i not in to_remove])


	def AddOptions(parser):
	parser.add_argument('--tool-prefix', default='',
	help='Path prefix for c++filt.')
	parser.add_argument('--output-directory',
	help='Path to the root build directory.')


	def _DetectToolPrefix(tool_prefix, input_file, output_directory=None):
	"""Detects values for --tool-prefix and --output-directory."""
	if not output_directory:
	abs_path = os.path.abspath(input_file)
	release_idx = abs_path.find('Release')
	if release_idx != -1:
	output_directory = abs_path[:release_idx] + 'Release'
	output_directory = os.path.relpath(abs_path[:release_idx] + '/Release')
	logging.debug('Detected --output-directory=%s', output_directory)

	if not tool_prefix and output_directory:
	# Auto-detect from build_vars.txt
	build_vars_path = os.path.join(output_directory, 'build_vars.txt')
	if os.path.exists(build_vars_path):
	with open(build_vars_path) as f:
	build_vars = dict(l.rstrip().split('=', 1) for l in f if '=' in l)
	logging.debug('Found --tool-prefix from build_vars.txt')
	tool_prefix = os.path.join(output_directory,
	build_vars['android_tool_prefix'])

	if os.path.sep not in tool_prefix:
	full_path = distutils.spawn.find_executable(tool_prefix + 'c++filt')
	else:
	full_path = tool_prefix + 'c++filt'

	if not full_path or not os.path.isfile(full_path):
	raise Exception('Bad --tool-prefix. Path not found: %s' % full_path)
	if not output_directory or not os.path.isdir(output_directory):
	raise Exception('Bad --output-directory. Path not found: %s' %
	output_directory)
	logging.info('Using --output-directory=%s', output_directory)
	logging.info('Using --tool-prefix=%s', tool_prefix)
	return output_directory, tool_prefix


	def AnalyzeWithArgs(args, input_path):
	return Analyze(input_path, args.output_directory, args.tool_prefix)


	def Analyze(path, output_directory=None, tool_prefix=''):
	if path.endswith('.size'):
	logging.debug('Loading results from: %s', path)
	size_info = file_format.LoadSizeInfo(path)
	# Recompute derived values (padding and function names).
	logging.info('Calculating padding')
	_RemoveDuplicatesAndCalculatePadding(size_info.symbols)
	logging.info('Deriving signatures')
	# Re-parse out function parameters.
	_NormalizeNames(size_info.symbols)
	return size_info
	elif not path.endswith('.map') and not path.endswith('.map.gz'):
	raise Exception('Expected input to be a .map or a .size')
	else:
	# Verify tool_prefix early.
	output_directory, tool_prefix = (
	_DetectToolPrefix(tool_prefix, path, output_directory))

	with _OpenMaybeGz(path) as map_file:
	section_sizes, symbols = linker_map_parser.MapFileParser().Parse(map_file)
	timestamp = datetime.datetime.utcfromtimestamp(os.path.getmtime(path))
	size_info = models.SizeInfo(section_sizes, models.SymbolGroup(symbols),
	timestamp=timestamp)

	# Map file for some reason doesn't unmangle all names.
	logging.info('Calculating padding')
	_RemoveDuplicatesAndCalculatePadding(size_info.symbols)
	# Unmangle prints its own log statement.
	_UnmangleRemainingSymbols(size_info.symbols, tool_prefix)
	logging.info('Extracting source paths from .ninja files')
	_ExtractSourcePaths(size_info.symbols, output_directory)
	# Resolve paths prints its own log statement.
	logging.info('Normalizing names')
	_NormalizeNames(size_info.symbols)
	logging.info('Normalizing paths')
	_NormalizeObjectPaths(size_info.symbols)

	if logging.getLogger().isEnabledFor(logging.INFO):
	for line in describe.DescribeSizeInfoCoverage(size_info):
	logging.info(line)
	logging.info('Finished analyzing %d symbols', len(size_info.symbols))
	return size_info


	def _DetectGitRevision(path):
	try:
	git_rev = subprocess.check_output(
	['git', '-C', os.path.dirname(path), 'rev-parse', 'HEAD'])
	return git_rev.rstrip()
	except Exception:
	logging.warning('Failed to detect git revision for file metadata.')
	return None


	def main(argv):
	parser = argparse.ArgumentParser(argv)
	parser.add_argument('input_file', help='Path to input .map file.')
	parser.add_argument('output_file', help='Path to output .size(.gz) file.')
	AddOptions(parser)
	args = helpers.AddCommonOptionsAndParseArgs(parser, argv)
	if not args.output_file.endswith('.size'):
	parser.error('output_file must end with .size')

	size_info = AnalyzeWithArgs(args, args.input_file)
	if not args.input_file.endswith('.size'):
	git_rev = _DetectGitRevision(args.input_file)
	size_info.tag = 'Filename=%s git_rev=%s' % (
	os.path.basename(args.input_file), git_rev)
	logging.info('Recording metadata: %s',
	describe.DescribeSizeInfoMetadata(size_info))
	logging.info('Saving result to %s', args.output_file)
	file_format.SaveSizeInfo(size_info, args.output_file)

	logging.info('Done')


	if __name__ == '__main__':
	sys.exit(main(sys.argv))