blob: 03f4483f890cc729f3ca6c86d63684acb48e862b [file] [log] [blame]
#!/usr/bin/env python
# Copyright 2017 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Main Python API for analyzing binary size."""
import argparse
import datetime
import distutils.spawn
import gzip
import logging
import os
import re
import subprocess
import sys
import describe
import file_format
import function_signature
import helpers
import linker_map_parser
import models
import ninja_parser
def _OpenMaybeGz(path, mode=None):
"""Calls `gzip.open()` if |path| ends in ".gz", otherwise calls `open()`."""
if path.endswith('.gz'):
if mode and 'w' in mode:
return gzip.GzipFile(path, mode, 1)
return gzip.open(path, mode)
return open(path, mode or 'r')
def _UnmangleRemainingSymbols(symbol_group, tool_prefix):
"""Uses c++filt to unmangle any symbols that need it."""
to_process = [s for s in symbol_group if s.name.startswith('_Z')]
if not to_process:
return
logging.info('Unmangling %d names', len(to_process))
proc = subprocess.Popen([tool_prefix + 'c++filt'], stdin=subprocess.PIPE,
stdout=subprocess.PIPE)
stdout = proc.communicate('\n'.join(s.name for s in to_process))[0]
assert proc.returncode == 0
for i, line in enumerate(stdout.splitlines()):
to_process[i].name = line
def _NormalizeNames(symbol_group):
"""Ensures that all names are formatted in a useful way.
This includes:
- Assigning of |full_name|.
- Stripping of return types in |full_name| and |name| (for functions).
- Stripping parameters from |name|.
- Moving "vtable for" and the like to be suffixes rather than prefixes.
"""
found_prefixes = set()
for symbol in symbol_group:
if symbol.name.startswith('*'):
# See comment in _RemoveDuplicatesAndCalculatePadding() about when this
# can happen.
continue
# E.g.: vtable for FOO
idx = symbol.name.find(' for ', 0, 30)
if idx != -1:
found_prefixes.add(symbol.name[:idx + 4])
symbol.name = symbol.name[idx + 5:] + ' [' + symbol.name[:idx] + ']'
# E.g.: virtual thunk to FOO
idx = symbol.name.find(' to ', 0, 30)
if idx != -1:
found_prefixes.add(symbol.name[:idx + 3])
symbol.name = symbol.name[idx + 4:] + ' [' + symbol.name[:idx] + ']'
# Strip out return type, and identify where parameter list starts.
if symbol.section == 't':
symbol.full_name, symbol.name = function_signature.Parse(symbol.name)
# Remove anonymous namespaces (they just harm clustering).
non_anonymous = symbol.name.replace('(anonymous namespace)::', '')
if symbol.name != non_anonymous:
symbol.is_anonymous = True
symbol.name = non_anonymous
symbol.full_name = symbol.full_name.replace(
'(anonymous namespace)::', '')
if symbol.section != 't' and '(' in symbol.name:
# Pretty rare. Example:
# blink::CSSValueKeywordsHash::findValueImpl(char const*)::value_word_list
symbol.full_name = symbol.name
symbol.name = re.sub(r'\(.*\)', '', symbol.full_name)
logging.debug('Found name prefixes of: %r', found_prefixes)
def _NormalizeObjectPaths(symbol_group):
"""Ensures that all paths are formatted in a useful way."""
for symbol in symbol_group:
path = symbol.object_path
if path.startswith('obj/'):
# Convert obj/third_party/... -> third_party/...
path = path[4:]
elif path.startswith('../../'):
# Convert ../../third_party/... -> third_party/...
path = path[6:]
if path.endswith(')'):
# Convert foo/bar.a(baz.o) -> foo/bar.a/(baz.o)
start_idx = path.index('(')
path = os.path.join(path[:start_idx], path[start_idx:])
symbol.object_path = path
def _NormalizeSourcePath(path):
if path.startswith('gen/'):
# Convert gen/third_party/... -> third_party/...
return path[4:]
if path.startswith('../../'):
# Convert ../../third_party/... -> third_party/...
return path[6:]
return path
def _ExtractSourcePaths(symbol_group, output_directory):
"""Fills in the .source_path attribute of all symbols."""
mapper = ninja_parser.SourceFileMapper(output_directory)
for symbol in symbol_group:
object_path = symbol.object_path
if symbol.source_path or not object_path:
continue
# We don't have source info for prebuilt .a files.
if not object_path.startswith('..'):
source_path = mapper.FindSourceForPath(object_path)
if source_path:
symbol.source_path = _NormalizeSourcePath(source_path)
else:
logging.warning('Could not find source path for %s', object_path)
logging.debug('Parsed %d .ninja files.', mapper.GetParsedFileCount())
def _RemoveDuplicatesAndCalculatePadding(symbol_group):
"""Removes symbols at the same address and calculates the |padding| field.
Symbols must already be sorted by |address|.
"""
to_remove = set()
all_symbols = symbol_group.symbols
for i, symbol in enumerate(all_symbols[1:]):
prev_symbol = all_symbols[i]
if prev_symbol.section_name != symbol.section_name:
continue
if symbol.address > 0 and prev_symbol.address > 0:
# Fold symbols that are at the same address (happens in nm output).
if symbol.address == prev_symbol.address:
symbol.size = max(prev_symbol.size, symbol.size)
to_remove.add(i + 1)
continue
# Even with symbols at the same address removed, overlaps can still
# happen. In this case, padding will be negative (and this is fine).
padding = symbol.address - prev_symbol.end_address
# These thresholds were found by manually auditing arm32 Chrome.
# E.g.: Set them to 0 and see what warnings get logged.
# TODO(agrieve): See if these thresholds make sense for architectures
# other than arm32.
if (symbol.section in 'rd' and padding >= 256 or
symbol.section in 't' and padding >= 64):
# For nm data, this is caused by data that has no associated symbol.
# The linker map file lists them with no name, but with a file.
# Example:
# .data 0x02d42764 0x120 .../V8SharedWorkerGlobalScope.o
# Where as most look like:
# .data.MANGLED_NAME...
logging.debug('Large padding of %d between:\n A) %r\n B) %r' % (
padding, prev_symbol, symbol))
continue
symbol.padding = padding
symbol.size += padding
assert symbol.size >= 0, 'Symbol has negative size: ' + (
'%r\nprev symbol: %r' % (symbol, prev_symbol))
# Map files have no overlaps, so worth special-casing the no-op case.
if to_remove:
logging.info('Removing %d overlapping symbols', len(to_remove))
symbol_group.symbols = (
[s for i, s in enumerate(all_symbols) if i not in to_remove])
def AddOptions(parser):
parser.add_argument('--tool-prefix', default='',
help='Path prefix for c++filt.')
parser.add_argument('--output-directory',
help='Path to the root build directory.')
def _DetectToolPrefix(tool_prefix, input_file, output_directory=None):
"""Detects values for --tool-prefix and --output-directory."""
if not output_directory:
abs_path = os.path.abspath(input_file)
release_idx = abs_path.find('Release')
if release_idx != -1:
output_directory = abs_path[:release_idx] + 'Release'
output_directory = os.path.relpath(abs_path[:release_idx] + '/Release')
logging.debug('Detected --output-directory=%s', output_directory)
if not tool_prefix and output_directory:
# Auto-detect from build_vars.txt
build_vars_path = os.path.join(output_directory, 'build_vars.txt')
if os.path.exists(build_vars_path):
with open(build_vars_path) as f:
build_vars = dict(l.rstrip().split('=', 1) for l in f if '=' in l)
logging.debug('Found --tool-prefix from build_vars.txt')
tool_prefix = os.path.join(output_directory,
build_vars['android_tool_prefix'])
if os.path.sep not in tool_prefix:
full_path = distutils.spawn.find_executable(tool_prefix + 'c++filt')
else:
full_path = tool_prefix + 'c++filt'
if not full_path or not os.path.isfile(full_path):
raise Exception('Bad --tool-prefix. Path not found: %s' % full_path)
if not output_directory or not os.path.isdir(output_directory):
raise Exception('Bad --output-directory. Path not found: %s' %
output_directory)
logging.info('Using --output-directory=%s', output_directory)
logging.info('Using --tool-prefix=%s', tool_prefix)
return output_directory, tool_prefix
def AnalyzeWithArgs(args, input_path):
return Analyze(input_path, args.output_directory, args.tool_prefix)
def Analyze(path, output_directory=None, tool_prefix=''):
if path.endswith('.size'):
logging.debug('Loading results from: %s', path)
size_info = file_format.LoadSizeInfo(path)
# Recompute derived values (padding and function names).
logging.info('Calculating padding')
_RemoveDuplicatesAndCalculatePadding(size_info.symbols)
logging.info('Deriving signatures')
# Re-parse out function parameters.
_NormalizeNames(size_info.symbols)
return size_info
elif not path.endswith('.map') and not path.endswith('.map.gz'):
raise Exception('Expected input to be a .map or a .size')
else:
# Verify tool_prefix early.
output_directory, tool_prefix = (
_DetectToolPrefix(tool_prefix, path, output_directory))
with _OpenMaybeGz(path) as map_file:
section_sizes, symbols = linker_map_parser.MapFileParser().Parse(map_file)
timestamp = datetime.datetime.utcfromtimestamp(os.path.getmtime(path))
size_info = models.SizeInfo(section_sizes, models.SymbolGroup(symbols),
timestamp=timestamp)
# Map file for some reason doesn't unmangle all names.
logging.info('Calculating padding')
_RemoveDuplicatesAndCalculatePadding(size_info.symbols)
# Unmangle prints its own log statement.
_UnmangleRemainingSymbols(size_info.symbols, tool_prefix)
logging.info('Extracting source paths from .ninja files')
_ExtractSourcePaths(size_info.symbols, output_directory)
# Resolve paths prints its own log statement.
logging.info('Normalizing names')
_NormalizeNames(size_info.symbols)
logging.info('Normalizing paths')
_NormalizeObjectPaths(size_info.symbols)
if logging.getLogger().isEnabledFor(logging.INFO):
for line in describe.DescribeSizeInfoCoverage(size_info):
logging.info(line)
logging.info('Finished analyzing %d symbols', len(size_info.symbols))
return size_info
def _DetectGitRevision(path):
try:
git_rev = subprocess.check_output(
['git', '-C', os.path.dirname(path), 'rev-parse', 'HEAD'])
return git_rev.rstrip()
except Exception:
logging.warning('Failed to detect git revision for file metadata.')
return None
def main(argv):
parser = argparse.ArgumentParser(argv)
parser.add_argument('input_file', help='Path to input .map file.')
parser.add_argument('output_file', help='Path to output .size(.gz) file.')
AddOptions(parser)
args = helpers.AddCommonOptionsAndParseArgs(parser, argv)
if not args.output_file.endswith('.size'):
parser.error('output_file must end with .size')
size_info = AnalyzeWithArgs(args, args.input_file)
if not args.input_file.endswith('.size'):
git_rev = _DetectGitRevision(args.input_file)
size_info.tag = 'Filename=%s git_rev=%s' % (
os.path.basename(args.input_file), git_rev)
logging.info('Recording metadata: %s',
describe.DescribeSizeInfoMetadata(size_info))
logging.info('Saving result to %s', args.output_file)
file_format.SaveSizeInfo(size_info, args.output_file)
logging.info('Done')
if __name__ == '__main__':
sys.exit(main(sys.argv))