blob: c448b26f27a7f1ef4a1f5e624119bd808e08afc3 [file] [log] [blame]
#!/usr/bin/env python
# Copyright (c) 2012 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Dump functions called by static intializers in a Linux Release binary.
Usage example:
tools/linux/ out/Release/chrome
A brief overview of static initialization:
1) the compiler writes out, per object file, a function that contains
the static intializers for that file.
2) the compiler also writes out a pointer to that function in a special
3) at link time, the linker concatenates the function pointer sections
into a single list of all initializers.
4) at run time, on startup the binary runs all function pointers.
The functions in (1) all have mangled names of the form or __cxx_global_var_initN
using objdump, we can disassemble those functions and dump all symbols that
they reference.
from __future__ import print_function
import optparse
import re
import subprocess
import sys
# A map of symbol => informative text about it.
'__cxa_atexit@plt': 'registers a dtor to run at exit',
'std::__ioinit': '#includes <iostream>, use <ostream> instead',
# Determine whether this is a git checkout (as opposed to e.g. svn).
IS_GIT_WORKSPACE = (subprocess.Popen(
['git', 'rev-parse'], stderr=subprocess.PIPE).wait() == 0)
class Demangler(object):
"""A wrapper around c++filt to provide a function to demangle symbols."""
def __init__(self, toolchain):
self.cppfilt = subprocess.Popen([toolchain + 'c++filt'],
def Demangle(self, sym):
"""Given mangled symbol |sym|, return its demangled form."""
self.cppfilt.stdin.write(sym + '\n')
return self.cppfilt.stdout.readline().strip()
# Matches for example: "", capturing "cert_logger".
protobuf_filename_re = re.compile(r'(.*)\.pb\.cc$')
def QualifyFilenameAsProto(filename):
"""Attempt to qualify a bare |filename| with a src-relative path, assuming it
is a protoc-generated file. If a single match is found, it is returned.
Otherwise the original filename is returned."""
return filename
match = protobuf_filename_re.match(filename)
if not match:
return filename
basename = match.groups(0)
gitlsfiles = subprocess.Popen(
['git', 'ls-files', '--', '*/%s.proto' % basename],
candidate = filename
for line in gitlsfiles.stdout:
if candidate != filename:
return filename # Multiple hits, can't help.
candidate = line.strip()
return candidate
# Regex matching the substring of a symbol's demangled text representation most
# likely to appear in a source file.
# Example: "v8::internal::Builtins::InitBuiltinFunctionTable()" becomes
# "InitBuiltinFunctionTable", since the first (optional & non-capturing) group
# picks up any ::-qualification and the last fragment picks up a suffix that
# starts with an opener.
symbol_code_name_re = re.compile(r'^(?:[^(<[]*::)?([^:(<[]*).*?$')
def QualifyFilename(filename, symbol):
"""Given a bare filename and a symbol that occurs in it, attempt to qualify
it with a src-relative path. If more than one file matches, return the
original filename."""
return filename
match = symbol_code_name_re.match(symbol)
if not match:
return filename
symbol =
gitgrep = subprocess.Popen(
['git', 'grep', '-l', symbol, '--', '*/%s' % filename],
candidate = filename
for line in gitgrep.stdout:
if candidate != filename: # More than one candidate; return bare filename.
return filename
candidate = line.strip()
return candidate
# Regex matching nm output for the symbols we're interested in. The two formats
# we are interested in are _GLOBAL__sub_I_<filename> and _cxx_global_var_initN.
# See test_ParseNmLine for examples.
nm_re = re.compile(
r'''(\S+)\s(\S+)\st\s # Symbol start address and size
(?:_ZN12)?_GLOBAL__(?:sub_)?I_ # Pattern with filename
__cxx_global_var_init\d* # Pattern without filename
)(.*) # capture the filename''',
def ParseNmLine(line):
"""Parse static initializers from a line of nm output.
Given a line of nm output, parse static initializers as a
(file, start, size, symbol) tuple."""
match = nm_re.match(line)
if match:
addr, size, prefix, filename = match.groups()
return (filename, int(addr, 16), int(size, 16), prefix+filename)
def test_ParseNmLine():
"""Verify the nm_re regex matches some sample lines."""
parse = ParseNmLine(
'0000000001919920 0000000000000008 t '
assert parse == ('', 26319136, 8,
''), parse
parse = ParseNmLine(
'00000000026b9eb0 0000000000000024 t '
assert parse == ('', 40607408, 36,
''), parse
parse = ParseNmLine(
'0000000002e75a60 0000000000000016 t __cxx_global_var_init')
assert parse == ('', 48716384, 22, '__cxx_global_var_init'), parse
parse = ParseNmLine(
'0000000002e75a60 0000000000000016 t __cxx_global_var_init89')
assert parse == ('', 48716384, 22, '__cxx_global_var_init89'), parse
# Just always run the test; it is fast enough.
def ParseNm(toolchain, binary):
"""Yield static initializers for the given binary.
Given a binary, yield static initializers as (file, start, size, symbol)
nm = subprocess.Popen([toolchain + 'nm', '-S', binary],
for line in nm.stdout:
parse = ParseNmLine(line)
if parse:
yield parse
# Regex matching objdump output for the symbols we're interested in.
# Example line:
# 12354ab: (disassembly, including <FunctionReference>)
disassembly_re = re.compile(r'^\s+[0-9a-f]+:.*<(\S+)>')
def ExtractSymbolReferences(toolchain, binary, start, end, symbol):
"""Given a span of addresses, returns symbol references from disassembly."""
cmd = [toolchain + 'objdump', binary, '--disassemble',
'--start-address=0x%x' % start, '--stop-address=0x%x' % end]
objdump = subprocess.Popen(cmd, stdout=subprocess.PIPE)
refs = set()
for line in objdump.stdout:
if '__static_initialization_and_destruction' in line:
raise RuntimeError, ('code mentions '
'__static_initialization_and_destruction; '
'did you accidentally run this on a Debug binary?')
match =
if match:
(ref,) = match.groups()
if ref.startswith('.LC') or ref.startswith('_DYNAMIC'):
# Ignore these, they are uninformative.
if re.match(symbol, ref):
# Probably a relative jump within this function.
return sorted(refs)
def main():
parser = optparse.OptionParser(usage='%prog [option] filename')
parser.add_option('-d', '--diffable', dest='diffable',
action='store_true', default=False,
help='Prints the filename on each line, for more easily '
'diff-able output. (Used by')
parser.add_option('-t', '--toolchain-prefix', dest='toolchain',
action='store', default='',
help='Toolchain prefix to append to all tool invocations '
'(nm, objdump).')
opts, args = parser.parse_args()
if len(args) != 1:
parser.error('missing filename argument')
return 1
binary = args[0]
demangler = Demangler(opts.toolchain)
file_count = 0
initializer_count = 0
files = ParseNm(opts.toolchain, binary)
if opts.diffable:
files = sorted(files)
for filename, addr, size, symbol in files:
file_count += 1
ref_output = []
qualified_filename = QualifyFilenameAsProto(filename)
if size == 2:
# gcc generates a two-byte 'repz retq' initializer when there is a
# ctor even when the ctor is empty. This is fixed in gcc 4.6, but
# Android uses gcc 4.4.
ref_output.append('[empty ctor, but it still has cost on gcc <4.6]')
for ref in ExtractSymbolReferences(opts.toolchain, binary, addr,
addr+size, symbol):
initializer_count += 1
ref = demangler.Demangle(ref)
if qualified_filename == filename:
qualified_filename = QualifyFilename(filename, ref)
note = ''
if ref in NOTES:
note = NOTES[ref]
elif ref.endswith('_2eproto()'):
note = 'protocol compiler bug:'
if note:
ref_output.append('%s [%s]' % (ref, note))
if opts.diffable:
if ref_output:
'# ' + qualified_filename + ' ' + r for r in ref_output))
print('# %s: (empty initializer list)' % qualified_filename)
print('%s (initializer offset 0x%x size 0x%x)' % (qualified_filename,
addr, size))
print(''.join(' %s\n' % r for r in ref_output))
if opts.diffable:
print('#', end=' ')
print('Found %d static initializers in %d files.' % (initializer_count,
return 0
if '__main__' == __name__: