#!/usr/bin/env python
# Copyright (c) 2012 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Dump functions called by static intializers in a Linux Release binary.
Usage example:
tools/linux/ out/Release/chrome
A brief overview of static initialization:
1) the compiler writes out, per object file, a function that contains
the static intializers for that file.
2) the compiler also writes out a pointer to that function in a special
3) at link time, the linker concatenates the function pointer sections
into a single list of all initializers.
4) at run time, on startup the binary runs all function pointers.
The functions in (1) all have mangled names of the form
using objdump, we can disassemble those functions and dump all symbols that
they reference.
import optparse
import re
import subprocess
import sys
# A map of symbol => informative text about it.
'__cxa_atexit@plt': 'registers a dtor to run at exit',
'std::__ioinit': '#includes <iostream>, use <ostream> instead',
# Determine whether this is a git checkout (as opposed to e.g. svn).
IS_GIT_WORKSPACE = (subprocess.Popen(
['git', 'rev-parse'], stderr=subprocess.PIPE).wait() == 0)
class Demangler(object):
"""A wrapper around c++filt to provide a function to demangle symbols."""
def __init__(self, toolchain):
self.cppfilt = subprocess.Popen([toolchain + 'c++filt'],
def Demangle(self, sym):
"""Given mangled symbol |sym|, return its demangled form."""
self.cppfilt.stdin.write(sym + '\n')
return self.cppfilt.stdout.readline().strip()
# Matches for example: "", capturing "cert_logger".
protobuf_filename_re = re.compile(r'(.*)\.pb\.cc$')
def QualifyFilenameAsProto(filename):
"""Attempt to qualify a bare |filename| with a src-relative path, assuming it
is a protoc-generated file. If a single match is found, it is returned.
Otherwise the original filename is returned."""
return filename
match = protobuf_filename_re.match(filename)
if not match:
return filename
basename = match.groups(0)
gitlsfiles = subprocess.Popen(
['git', 'ls-files', '--', '*/%s.proto' % basename],
candidate = filename
for line in gitlsfiles.stdout:
if candidate != filename:
return filename # Multiple hits, can't help.
candidate = line.strip()
return candidate
# Regex matching the substring of a symbol's demangled text representation most
# likely to appear in a source file.
# Example: "v8::internal::Builtins::InitBuiltinFunctionTable()" becomes
# "InitBuiltinFunctionTable", since the first (optional & non-capturing) group
# picks up any ::-qualification and the last fragment picks up a suffix that
# starts with an opener.
symbol_code_name_re = re.compile(r'^(?:[^(<[]*::)?([^:(<[]*).*?$')
def QualifyFilename(filename, symbol):
"""Given a bare filename and a symbol that occurs in it, attempt to qualify
it with a src-relative path. If more than one file matches, return the
original filename."""
return filename
match = symbol_code_name_re.match(symbol)
if not match:
return filename
symbol =
gitgrep = subprocess.Popen(
['git', 'grep', '-l', symbol, '--', '*/%s' % filename],
candidate = filename
for line in gitgrep.stdout:
if candidate != filename: # More than one candidate; return bare filename.
return filename
candidate = line.strip()
return candidate
# Regex matching nm output for the symbols we're interested in.
# See test_ParseNmLine for examples.
nm_re = re.compile(r'(\S+) (\S+) t (?:_ZN12)?_GLOBAL__(?:sub_)?I_(.*)')
def ParseNmLine(line):
"""Given a line of nm output, parse static initializers as a
(file, start, size) tuple."""
match = nm_re.match(line)
if match:
addr, size, filename = match.groups()
return (filename, int(addr, 16), int(size, 16))
def test_ParseNmLine():
"""Verify the nm_re regex matches some sample lines."""
parse = ParseNmLine(
'0000000001919920 0000000000000008 t '
assert parse == ('', 26319136, 8), parse
parse = ParseNmLine(
'00000000026b9eb0 0000000000000024 t '
assert parse == ('', 40607408, 36), parse
# Just always run the test; it is fast enough.
def ParseNm(toolchain, binary):
"""Given a binary, yield static initializers as (file, start, size) tuples."""
nm = subprocess.Popen([toolchain + 'nm', '-S', binary],
for line in nm.stdout:
parse = ParseNmLine(line)
if parse:
yield parse
# Regex matching objdump output for the symbols we're interested in.
# Example line:
# 12354ab: (disassembly, including <FunctionReference>)
disassembly_re = re.compile(r'^\s+[0-9a-f]+:.*<(\S+)>')
def ExtractSymbolReferences(toolchain, binary, start, end):
"""Given a span of addresses, returns symbol references from disassembly."""
cmd = [toolchain + 'objdump', binary, '--disassemble',
'--start-address=0x%x' % start, '--stop-address=0x%x' % end]
objdump = subprocess.Popen(cmd, stdout=subprocess.PIPE)
refs = set()
for line in objdump.stdout:
if '__static_initialization_and_destruction' in line:
raise RuntimeError, ('code mentions '
'__static_initialization_and_destruction; '
'did you accidentally run this on a Debug binary?')
match =
if match:
(ref,) = match.groups()
if ref.startswith('.LC') or ref.startswith('_DYNAMIC'):
# Ignore these, they are uninformative.
if re.match('_GLOBAL__(?:sub_)?I_', ref):
# Probably a relative jump within this function.
return sorted(refs)
def main():
parser = optparse.OptionParser(usage='%prog [option] filename')
parser.add_option('-d', '--diffable', dest='diffable',
action='store_true', default=False,
help='Prints the filename on each line, for more easily '
'diff-able output. (Used by')
parser.add_option('-t', '--toolchain-prefix', dest='toolchain',
action='store', default='',
help='Toolchain prefix to append to all tool invocations '
'(nm, objdump).')
opts, args = parser.parse_args()
if len(args) != 1:
parser.error('missing filename argument')
return 1
binary = args[0]
demangler = Demangler(opts.toolchain)
file_count = 0
initializer_count = 0
files = ParseNm(opts.toolchain, binary)
if opts.diffable:
files = sorted(files)
for filename, addr, size in files:
file_count += 1
ref_output = []
qualified_filename = QualifyFilenameAsProto(filename)
if size == 2:
# gcc generates a two-byte 'repz retq' initializer when there is a
# ctor even when the ctor is empty. This is fixed in gcc 4.6, but
# Android uses gcc 4.4.
ref_output.append('[empty ctor, but it still has cost on gcc <4.6]')
for ref in ExtractSymbolReferences(opts.toolchain, binary, addr,
initializer_count += 1
ref = demangler.Demangle(ref)
if qualified_filename == filename:
qualified_filename = QualifyFilename(filename, ref)
note = ''
if ref in NOTES:
note = NOTES[ref]
elif ref.endswith('_2eproto()'):
note = 'protocol compiler bug:'
if note:
ref_output.append('%s [%s]' % (ref, note))
if opts.diffable:
if ref_output:
print '\n'.join('# ' + qualified_filename + ' ' + r for r in ref_output)
print '# %s: (empty initializer list)' % qualified_filename
print '%s (initializer offset 0x%x size 0x%x)' % (qualified_filename,
addr, size)
print ''.join(' %s\n' % r for r in ref_output)
if opts.diffable:
print '#',
print 'Found %d static initializers in %d files.' % (initializer_count,
return 0
if '__main__' == __name__: