| #!/usr/bin/env python3 |
| # Copyright 2012 The Chromium Authors |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """Dumps the names, addresses, and disassmebly of static initializers. |
| |
| Usage example: |
| tools/linux/dump-static-intializers.py out/Release/chrome |
| |
| For an explanation of static initializers, see: //docs/static_initializers.md. |
| """ |
| |
| import argparse |
| import json |
| import os |
| import pathlib |
| import subprocess |
| import sys |
| |
| _TOOLCHAIN_PREFIX = str( |
| pathlib.Path(__file__).parents[2] / 'third_party' / 'llvm-build' / |
| 'Release+Asserts' / 'bin' / 'llvm-') |
| |
| # It is too slow to dump disassembly for a lot of symbols. |
| _MAX_DISASSEMBLY_SYMBOLS = 10 |
| |
| |
| def _ParseNm(binary, addresses): |
| # Example output: |
| # 000000000de66bd0 0000000000000026 t _GLOBAL__sub_I_add.cc |
| output = subprocess.check_output( |
| [_TOOLCHAIN_PREFIX + 'nm', '--print-size', binary], encoding='utf8') |
| addresses = set(addresses) |
| ret = {} |
| for line in output.splitlines(): |
| parts = line.split() |
| if len(parts) != 4: |
| continue |
| address = int(parts[0], 16) |
| if address in addresses: |
| ret[address] = int(parts[1], 16) |
| return ret |
| |
| |
| def _Disassemble(binary, start, end): |
| cmd = [ |
| _TOOLCHAIN_PREFIX + 'objdump', |
| binary, |
| '--disassemble', |
| '--source', |
| '--demangle', |
| '--start-address=0x%x' % start, |
| '--stop-address=0x%x' % end, |
| ] |
| stdout = subprocess.check_output(cmd, encoding='utf8') |
| all_lines = stdout.splitlines(keepends=True) |
| source_lines = [l for l in all_lines if l.startswith(';')] |
| ret = [] |
| if source_lines: |
| ret = ['Showing source lines that appear in the symbol (via objdump).\n'] |
| else: |
| ret = [ |
| 'Symbol missing source lines. Showing raw disassembly (via objdump).\n' |
| ] |
| lines = source_lines or all_lines |
| if len(lines) > 10: |
| ret += ['This might be verbose due to inlined functions.\n'] |
| ret += lines |
| return ''.join(ret) |
| |
| |
| def _DumpInitArray(binary): |
| cmd = [_TOOLCHAIN_PREFIX + 'readobj', '--hex-dump=.init_array', binary] |
| output = subprocess.check_output(cmd, encoding='utf8') |
| # Example output: |
| # File: lib.unstripped/libmonochrome_64.so |
| # Format: elf64-littleaarch64 |
| # Arch: aarch64 |
| # AddressSize: 64bit |
| # LoadName: libmonochrome_64.so |
| # Hex dump of section '.init_array': |
| # 0x091f6198 14f80204 00000000 c0cf3003 00000000 ..........0..... |
| # 0x091f61a8 68c70104 00000000 h........^F..... |
| is_64_bit = False |
| is_arm = False |
| byte_order = 'little' |
| ret = [] |
| for line in output.splitlines(): |
| if line.startswith('Format:') and 'big' in line: |
| byte_order = 'big' |
| continue |
| if line == 'Arch: arm': |
| is_arm = True |
| continue |
| if line == 'AddressSize: 64bit': |
| is_64_bit = True |
| continue |
| if not line.startswith('0x'): |
| continue |
| init_array_address = int(line[:10], 16) |
| parts = line[10:-16].split() |
| assert len(parts) <= 4, 'Too many parts: ' + line |
| if is_64_bit: |
| parts = [parts[i] + parts[i + 1] for i in range(0, len(parts), 2)] |
| arrays = (bytearray.fromhex(p) for p in parts) |
| for a in arrays: |
| address = int.from_bytes(a, byteorder=byte_order, signed=False) |
| if is_arm: |
| address = address & ~1 # Adjust for arm thumb addresses being odd. |
| ret.append((init_array_address, address)) |
| init_array_address += 8 if is_64_bit else 4 |
| return ret |
| |
| |
| def _DumpRelativeRelocations(binary): |
| # Example output from: llvm-readobj --relocations chrome |
| # File: chrome |
| # Format: elf64-x86-64 |
| # Arch: x86_64 |
| # AddressSize: 64bit |
| # LoadName: <Not found> |
| # Relocations [ |
| # Section (10) .rela.dyn { |
| # 0x26C2AD88 R_X86_64_RELATIVE - 0xA6DABE0 |
| # 0x26C2AD90 R_X86_64_RELATIVE - 0xA6DC2B0 |
| # ... |
| cmd = [_TOOLCHAIN_PREFIX + 'readobj', '--relocations', binary] |
| lines = subprocess.check_output(cmd, encoding='utf8').splitlines() |
| ret = {} |
| for line in lines: |
| if 'RELATIVE' in line: |
| parts = line.split() |
| ret[int(parts[0], 16)] = int(parts[-1], 16) |
| return ret |
| |
| |
| def _ResolveRelativeAddresses(binary, address_tuples): |
| relocations_dict = None |
| ret = [] |
| for init_address, address in address_tuples: |
| if address == 0: |
| if relocations_dict is None: |
| relocations_dict = _DumpRelativeRelocations(binary) |
| address = relocations_dict.get(init_address) |
| if address is None: |
| raise Exception('Failed to resolve relocation for address: ' + |
| hex(init_address)) |
| ret.append(address) |
| return ret |
| |
| |
| def _SymbolizeAddresses(binary, addresses): |
| # Example output from: llvm-symbolizer -e chrome \ |
| # --output-style=JSON --functions 0x3323430 0x403a768 0x5489b98 |
| # [{"Address":"0xa6afdd0","ModuleName":"chrome","Symbol":[...]}, ...] |
| # Where Symbol = {"Column":24,"Discriminator":0,"FileName":"...", |
| # "FunctionName":"MaybeStartBackgroundThread","Line":85, |
| # "StartAddress":"0xa6afdd0","StartFileName":"","StartLine":0} |
| ret = {} |
| if not addresses: |
| return ret |
| cmd = [ |
| _TOOLCHAIN_PREFIX + 'symbolizer', '-e', binary, '--functions', |
| '--output-style=JSON' |
| ] + [hex(a) for a in addresses] |
| output = subprocess.check_output(cmd, encoding='utf8') |
| for main_entry in json.loads(output): |
| # Multiple symbol entries can exist due to inlining. Last entry is the |
| # outer-most symbol. |
| symbols = main_entry['Symbol'] |
| name_entry = symbols[-1] |
| # Take the last entry that has a line number as the best filename. |
| file_entry = next((x for x in symbols[::-1] if x['Line'] != 0), name_entry) |
| address = int(main_entry['Address'], 16) |
| filename = file_entry['FileName'] |
| line = file_entry['Line'] |
| if line: |
| filename += f':{line}' |
| ret[address] = (filename, name_entry['FunctionName']) |
| return ret |
| |
| |
| def main(): |
| parser = argparse.ArgumentParser() |
| parser.add_argument('--json', |
| action='store_true', |
| help='Output in JSON format') |
| parser.add_argument('binary', help='The non-stripped binary to analyze.') |
| args = parser.parse_args() |
| |
| address_tuples = _DumpInitArray(args.binary) |
| addresses = _ResolveRelativeAddresses(args.binary, address_tuples) |
| symbolized_by_address = _SymbolizeAddresses(args.binary, addresses) |
| |
| skip_disassembly = len(addresses) > _MAX_DISASSEMBLY_SYMBOLS |
| if skip_disassembly: |
| sys.stderr.write('Not collection disassembly due to the large number of ' |
| 'results.\n') |
| else: |
| size_by_address = _ParseNm(args.binary, addresses) |
| |
| entries = [] |
| for address in addresses: |
| filename, symbol_name = symbolized_by_address[address] |
| if skip_disassembly: |
| disassembly = '' |
| else: |
| size = size_by_address.get(address, 0) |
| if size == 0: |
| disassembly = ('Not showing disassembly because of unknown symbol size ' |
| '(assembly symbols sometimes omit size).\n') |
| else: |
| disassembly = _Disassemble(args.binary, address, address + size) |
| entries.append({ |
| 'address': address, |
| 'disassembly': disassembly, |
| 'filename': filename, |
| 'symbol_name': symbol_name, |
| }) |
| |
| if args.json: |
| print(json.dumps({'entries': entries})) |
| return |
| |
| for e in entries: |
| print(f'# 0x{e["address"]:x} {e["filename"]} {e["symbol_name"]}') |
| print(e['disassembly']) |
| |
| print(f'Found {len(entries)} files containing static initializers.') |
| |
| |
| if '__main__' == __name__: |
| main() |