blob: 452548e3e78fb82f389fa865e16d9dccf44df8e6 [file] [log] [blame]
#!/usr/bin/python
# Copyright 2015 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Utilities to get and manipulate symbols from a binary."""
import collections
import logging
import os
import re
import subprocess
import sys
import cygprofile_utils
sys.path.insert(
0, os.path.join(os.path.dirname(__file__), os.pardir, os.pardir,
'third_party', 'android_platform', 'development',
'scripts'))
import symbol
_MAX_WARNINGS_TO_PRINT = 200
SymbolInfo = collections.namedtuple('SymbolInfo', ('name', 'offset', 'size',
'section'))
def SetArchitecture(arch):
"""Set the architecture for binaries to be symbolized."""
symbol.ARCH = arch
def _FromObjdumpLine(line):
"""Create a SymbolInfo by parsing a properly formatted objdump output line.
Args:
line: line from objdump
Returns:
An instance of SymbolInfo if the line represents a symbol, None otherwise.
"""
# All of the symbol lines we care about are in the form
# 0000000000 g F .text.foo 000000000 [.hidden] foo
# where g (global) might also be l (local) or w (weak).
parts = line.split()
if len(parts) < 6 or parts[2] != 'F':
return None
assert len(parts) == 6 or (len(parts) == 7 and parts[5] == '.hidden')
accepted_scopes = set(['g', 'l', 'w'])
assert parts[1] in accepted_scopes
offset = int(parts[0], 16)
section = parts[3]
size = int(parts[4], 16)
name = parts[-1].rstrip('\n')
# Forbid ARM mapping symbols and other unexpected symbol names, but allow $
# characters in a non-initial position, which can appear as a component of a
# mangled name, e.g. Clang can mangle a lambda function to:
# 02cd61e0 l F .text 000000c0 _ZZL11get_globalsvENK3$_1clEv
# The equivalent objdump line from GCC is:
# 0325c58c l F .text 000000d0 _ZZL11get_globalsvENKUlvE_clEv
assert re.match('^[a-zA-Z0-9_.][a-zA-Z0-9_.$]*$', name)
return SymbolInfo(name=name, offset=offset, section=section, size=size)
def _SymbolInfosFromStream(objdump_lines):
"""Parses the output of objdump, and get all the symbols from a binary.
Args:
objdump_lines: An iterable of lines
Returns:
A list of SymbolInfo.
"""
symbol_infos = []
for line in objdump_lines:
symbol_info = _FromObjdumpLine(line)
if symbol_info is not None:
symbol_infos.append(symbol_info)
return symbol_infos
def SymbolInfosFromBinary(binary_filename):
"""Runs objdump to get all the symbols from a binary.
Args:
binary_filename: path to the binary.
Returns:
A list of SymbolInfo from the binary.
"""
command = (symbol.ToolPath('objdump'), '-t', '-w', binary_filename)
p = subprocess.Popen(command, shell=False, stdout=subprocess.PIPE)
try:
result = _SymbolInfosFromStream(p.stdout)
return result
finally:
p.stdout.close()
p.wait()
def GroupSymbolInfosByOffset(symbol_infos):
"""Create a dict {offset: [symbol_info1, ...], ...}.
As several symbols can be at the same offset, this is a 1-to-many
relationship.
Args:
symbol_infos: iterable of SymbolInfo instances
Returns:
a dict {offset: [symbol_info1, ...], ...}
"""
offset_to_symbol_infos = collections.defaultdict(list)
for symbol_info in symbol_infos:
offset_to_symbol_infos[symbol_info.offset].append(symbol_info)
return dict(offset_to_symbol_infos)
def GroupSymbolInfosByName(symbol_infos):
"""Create a dict {name: [symbol_info1, ...], ...}.
A symbol can have several offsets, this is a 1-to-many relationship.
Args:
symbol_infos: iterable of SymbolInfo instances
Returns:
a dict {name: [symbol_info1, ...], ...}
"""
name_to_symbol_infos = collections.defaultdict(list)
for symbol_info in symbol_infos:
name_to_symbol_infos[symbol_info.name].append(symbol_info)
return dict(name_to_symbol_infos)
def CreateNameToSymbolInfo(symbol_infos):
"""Create a dict {name: symbol_info, ...}.
Args:
symbol_infos: iterable of SymbolInfo instances
Returns:
a dict {name: symbol_info, ...}
If a symbol name corresponds to more than one symbol_info, the symbol_info
with the lowest offset is chosen.
"""
# TODO(lizeb,pasko): move the functionality in this method into
# check_orderfile.
symbol_infos_by_name = {}
warnings = cygprofile_utils.WarningCollector(_MAX_WARNINGS_TO_PRINT)
for infos in GroupSymbolInfosByName(symbol_infos).itervalues():
first_symbol_info = min(infos, key=lambda x:x.offset)
symbol_infos_by_name[first_symbol_info.name] = first_symbol_info
if len(infos) > 1:
warnings.Write('Symbol %s appears at %d offsets: %s' %
(first_symbol_info.name,
len(infos),
','.join([hex(x.offset) for x in infos])))
warnings.WriteEnd('symbols at multiple offsets.')
return symbol_infos_by_name
def DemangleSymbol(mangled_symbol):
"""Return the demangled form of mangled_symbol."""
return symbol.CallCppFilt(mangled_symbol)