blob: b4265d7114c394ec7b1a469b37904e932a30b076 [file] [log] [blame]
#!/usr/bin/env python3
# Copyright 2012 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Dumps the names, addresses, and disassmebly of static initializers.
Usage example:
tools/linux/dump-static-intializers.py out/Release/chrome
For an explanation of static initializers, see: //docs/static_initializers.md.
"""
import argparse
import json
import os
import pathlib
import subprocess
import sys
_TOOLCHAIN_PREFIX = str(
pathlib.Path(__file__).parents[2] / 'third_party' / 'llvm-build' /
'Release+Asserts' / 'bin' / 'llvm-')
# It is too slow to dump disassembly for a lot of symbols.
_MAX_DISASSEMBLY_SYMBOLS = 10
def _ParseNm(binary, addresses):
# Example output:
# 000000000de66bd0 0000000000000026 t _GLOBAL__sub_I_add.cc
output = subprocess.check_output(
[_TOOLCHAIN_PREFIX + 'nm', '--print-size', binary], encoding='utf8')
addresses = set(addresses)
ret = {}
for line in output.splitlines():
parts = line.split()
if len(parts) != 4:
continue
address = int(parts[0], 16)
if address in addresses:
ret[address] = int(parts[1], 16)
return ret
def _Disassemble(binary, start, end):
cmd = [
_TOOLCHAIN_PREFIX + 'objdump',
binary,
'--disassemble',
'--source',
'--demangle',
'--start-address=0x%x' % start,
'--stop-address=0x%x' % end,
]
stdout = subprocess.check_output(cmd, encoding='utf8')
all_lines = stdout.splitlines(keepends=True)
source_lines = [l for l in all_lines if l.startswith(';')]
ret = []
if source_lines:
ret = ['Showing source lines that appear in the symbol (via objdump).\n']
else:
ret = [
'Symbol missing source lines. Showing raw disassembly (via objdump).\n'
]
lines = source_lines or all_lines
if len(lines) > 10:
ret += ['This might be verbose due to inlined functions.\n']
ret += lines
return ''.join(ret)
def _DumpInitArray(binary):
cmd = [_TOOLCHAIN_PREFIX + 'readobj', '--hex-dump=.init_array', binary]
output = subprocess.check_output(cmd, encoding='utf8')
# Example output:
# File: lib.unstripped/libmonochrome_64.so
# Format: elf64-littleaarch64
# Arch: aarch64
# AddressSize: 64bit
# LoadName: libmonochrome_64.so
# Hex dump of section '.init_array':
# 0x091f6198 14f80204 00000000 c0cf3003 00000000 ..........0.....
# 0x091f61a8 68c70104 00000000 h........^F.....
is_64_bit = False
is_arm = False
byte_order = 'little'
ret = []
for line in output.splitlines():
if line.startswith('Format:') and 'big' in line:
byte_order = 'big'
continue
if line == 'Arch: arm':
is_arm = True
continue
if line == 'AddressSize: 64bit':
is_64_bit = True
continue
if not line.startswith('0x'):
continue
init_array_address = int(line[:10], 16)
parts = line[10:-16].split()
assert len(parts) <= 4, 'Too many parts: ' + line
if is_64_bit:
parts = [parts[i] + parts[i + 1] for i in range(0, len(parts), 2)]
arrays = (bytearray.fromhex(p) for p in parts)
for a in arrays:
address = int.from_bytes(a, byteorder=byte_order, signed=False)
if is_arm:
address = address & ~1 # Adjust for arm thumb addresses being odd.
ret.append((init_array_address, address))
init_array_address += 8 if is_64_bit else 4
return ret
def _DumpRelativeRelocations(binary):
# Example output from: llvm-readobj --relocations chrome
# File: chrome
# Format: elf64-x86-64
# Arch: x86_64
# AddressSize: 64bit
# LoadName: <Not found>
# Relocations [
# Section (10) .rela.dyn {
# 0x26C2AD88 R_X86_64_RELATIVE - 0xA6DABE0
# 0x26C2AD90 R_X86_64_RELATIVE - 0xA6DC2B0
# ...
cmd = [_TOOLCHAIN_PREFIX + 'readobj', '--relocations', binary]
lines = subprocess.check_output(cmd, encoding='utf8').splitlines()
ret = {}
for line in lines:
if 'RELATIVE' in line:
parts = line.split()
ret[int(parts[0], 16)] = int(parts[-1], 16)
return ret
def _ResolveRelativeAddresses(binary, address_tuples):
relocations_dict = None
ret = []
for init_address, address in address_tuples:
if address == 0:
if relocations_dict is None:
relocations_dict = _DumpRelativeRelocations(binary)
address = relocations_dict.get(init_address)
if address is None:
raise Exception('Failed to resolve relocation for address: ' +
hex(init_address))
ret.append(address)
return ret
def _SymbolizeAddresses(binary, addresses):
# Example output from: llvm-symbolizer -e chrome \
# --output-style=JSON --functions 0x3323430 0x403a768 0x5489b98
# [{"Address":"0xa6afdd0","ModuleName":"chrome","Symbol":[...]}, ...]
# Where Symbol = {"Column":24,"Discriminator":0,"FileName":"...",
# "FunctionName":"MaybeStartBackgroundThread","Line":85,
# "StartAddress":"0xa6afdd0","StartFileName":"","StartLine":0}
ret = {}
if not addresses:
return ret
cmd = [
_TOOLCHAIN_PREFIX + 'symbolizer', '-e', binary, '--functions',
'--output-style=JSON'
] + [hex(a) for a in addresses]
output = subprocess.check_output(cmd, encoding='utf8')
for main_entry in json.loads(output):
# Multiple symbol entries can exist due to inlining. Last entry is the
# outer-most symbol.
symbols = main_entry['Symbol']
name_entry = symbols[-1]
# Take the last entry that has a line number as the best filename.
file_entry = next((x for x in symbols[::-1] if x['Line'] != 0), name_entry)
address = int(main_entry['Address'], 16)
filename = file_entry['FileName']
line = file_entry['Line']
if line:
filename += f':{line}'
ret[address] = (filename, name_entry['FunctionName'])
return ret
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--json',
action='store_true',
help='Output in JSON format')
parser.add_argument('binary', help='The non-stripped binary to analyze.')
args = parser.parse_args()
address_tuples = _DumpInitArray(args.binary)
addresses = _ResolveRelativeAddresses(args.binary, address_tuples)
symbolized_by_address = _SymbolizeAddresses(args.binary, addresses)
skip_disassembly = len(addresses) > _MAX_DISASSEMBLY_SYMBOLS
if skip_disassembly:
sys.stderr.write('Not collection disassembly due to the large number of '
'results.\n')
else:
size_by_address = _ParseNm(args.binary, addresses)
entries = []
for address in addresses:
filename, symbol_name = symbolized_by_address[address]
if skip_disassembly:
disassembly = ''
else:
size = size_by_address.get(address, 0)
if size == 0:
disassembly = ('Not showing disassembly because of unknown symbol size '
'(assembly symbols sometimes omit size).\n')
else:
disassembly = _Disassemble(args.binary, address, address + size)
entries.append({
'address': address,
'disassembly': disassembly,
'filename': filename,
'symbol_name': symbol_name,
})
if args.json:
print(json.dumps({'entries': entries}))
return
for e in entries:
print(f'# 0x{e["address"]:x} {e["filename"]} {e["symbol_name"]}')
print(e['disassembly'])
print(f'Found {len(entries)} files containing static initializers.')
if '__main__' == __name__:
main()