blob: d3025e3cc94759e8b074087daa11b6d5ee0eb48b [file] [log] [blame]
#!/usr/bin/env python
#===- lib/asan/scripts/ -----------------------------------===#
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
# See for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Example of use: -c "$HOME/opt/cross/bin/arm-linux-gnueabi-" -s "$HOME/SymbolFiles" < asan.log
This script provides a way for external plug-ins to hook into the behaviour of
various parts of this script (see `--plugins`). This is useful for situations
where it is necessary to handle site-specific quirks (e.g. binaries with debug
symbols only accessible via a remote service) without having to modify the
script itself.
import argparse
import bisect
import getopt
import logging
import os
import re
import subprocess
import sys
symbolizers = {}
demangle = False
binutils_prefix = None
fix_filename_patterns = None
logfile = sys.stdin
allow_system_symbolizer = True
force_system_symbolizer = False
# FIXME: merge the code that calls fix_filename().
def fix_filename(file_name):
if fix_filename_patterns:
for path_to_cut in fix_filename_patterns:
file_name = re.sub('.*' + path_to_cut, '', file_name)
file_name = re.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name)
file_name = re.sub('.*crtstuff.c:0', '???:0', file_name)
return file_name
def is_valid_arch(s):
return s in ["i386", "x86_64", "x86_64h", "arm", "armv6", "armv7", "armv7s",
"armv7k", "arm64", "powerpc64", "powerpc64le", "s390x", "s390"]
def guess_arch(addr):
# Guess which arch we're running. 10 = len('0x') + 8 hex digits.
if len(addr) > 10:
return 'x86_64'
return 'i386'
class Symbolizer(object):
def __init__(self):
def symbolize(self, addr, binary, offset):
"""Symbolize the given address (pair of binary and offset).
Overriden in subclasses.
addr: virtual address of an instruction.
binary: path to executable/shared object containing this instruction.
offset: instruction offset in the @binary.
list of strings (one string for each inlined frame) describing
the code locations for this instruction (that is, function name, file
name, line and column numbers).
return None
class LLVMSymbolizer(Symbolizer):
def __init__(self, symbolizer_path, default_arch, system, dsym_hints=[]):
super(LLVMSymbolizer, self).__init__()
self.symbolizer_path = symbolizer_path
self.default_arch = default_arch
self.system = system
self.dsym_hints = dsym_hints
self.pipe = self.open_llvm_symbolizer()
def open_llvm_symbolizer(self):
cmd = [self.symbolizer_path,
'--demangle=%s' % demangle,
'--default-arch=%s' % self.default_arch]
if self.system == 'Darwin':
for hint in self.dsym_hints:
cmd.append('--dsym-hint=%s' % hint)
logging.debug(' '.join(cmd))
result = subprocess.Popen(cmd, stdin=subprocess.PIPE,
except OSError:
result = None
return result
def symbolize(self, addr, binary, offset):
"""Overrides Symbolizer.symbolize."""
if not self.pipe:
return None
result = []
symbolizer_input = '"%s" %s' % (binary, offset)
self.pipe.stdin.write("%s\n" % symbolizer_input)
while True:
function_name = self.pipe.stdout.readline().rstrip()
if not function_name:
file_name = self.pipe.stdout.readline().rstrip()
file_name = fix_filename(file_name)
if (not function_name.startswith('??') or
not file_name.startswith('??')):
# Append only non-trivial frames.
result.append('%s in %s %s' % (addr, function_name,
except Exception:
result = []
if not result:
result = None
return result
def LLVMSymbolizerFactory(system, default_arch, dsym_hints=[]):
symbolizer_path = os.getenv('LLVM_SYMBOLIZER_PATH')
if not symbolizer_path:
symbolizer_path = os.getenv('ASAN_SYMBOLIZER_PATH')
if not symbolizer_path:
# Assume llvm-symbolizer is in PATH.
symbolizer_path = 'llvm-symbolizer'
return LLVMSymbolizer(symbolizer_path, default_arch, system, dsym_hints)
class Addr2LineSymbolizer(Symbolizer):
def __init__(self, binary):
super(Addr2LineSymbolizer, self).__init__()
self.binary = binary
self.pipe = self.open_addr2line()
self.output_terminator = -1
def open_addr2line(self):
addr2line_tool = 'addr2line'
if binutils_prefix:
addr2line_tool = binutils_prefix + addr2line_tool
cmd = [addr2line_tool, '-fi']
if demangle:
cmd += ['--demangle']
cmd += ['-e', self.binary]
logging.debug(' '.join(cmd))
return subprocess.Popen(cmd,
stdin=subprocess.PIPE, stdout=subprocess.PIPE,
def symbolize(self, addr, binary, offset):
"""Overrides Symbolizer.symbolize."""
if self.binary != binary:
return None
lines = []
self.pipe.stdin.write("%s\n" % offset)
self.pipe.stdin.write("%s\n" % self.output_terminator)
is_first_frame = True
while True:
function_name = self.pipe.stdout.readline().rstrip()
file_name = self.pipe.stdout.readline().rstrip()
if is_first_frame:
is_first_frame = False
elif function_name in ['', '??']:
assert file_name == function_name
lines.append((function_name, file_name));
except Exception:
lines.append(('??', '??:0'))
return ['%s in %s %s' % (addr, function, fix_filename(file)) for (function, file) in lines]
class UnbufferedLineConverter(object):
Wrap a child process that responds to each line of input with one line of
output. Uses pty to trick the child into providing unbuffered output.
def __init__(self, args, close_stderr=False):
# Local imports so that the script can start on Windows.
import pty
import termios
pid, fd = pty.fork()
if pid == 0:
# We're the child. Transfer control to command.
if close_stderr:
dev_null ='/dev/null', 0)
os.dup2(dev_null, 2)
os.execvp(args[0], args)
# Disable echoing.
attr = termios.tcgetattr(fd)
attr[3] = attr[3] & ~termios.ECHO
termios.tcsetattr(fd, termios.TCSANOW, attr)
# Set up a file()-like interface to the child process
self.r = os.fdopen(fd, "r", 1)
self.w = os.fdopen(os.dup(fd), "w", 1)
def convert(self, line):
self.w.write(line + "\n")
return self.readline()
def readline(self):
return self.r.readline().rstrip()
class DarwinSymbolizer(Symbolizer):
def __init__(self, addr, binary, arch):
super(DarwinSymbolizer, self).__init__()
self.binary = binary
self.arch = arch
def open_atos(self):
logging.debug('atos -o %s -arch %s', self.binary, self.arch)
cmdline = ['atos', '-o', self.binary, '-arch', self.arch]
self.atos = UnbufferedLineConverter(cmdline, close_stderr=True)
def symbolize(self, addr, binary, offset):
"""Overrides Symbolizer.symbolize."""
if self.binary != binary:
return None
if not os.path.exists(binary):
# If the binary doesn't exist atos will exit which will lead to IOError
# exceptions being raised later on so just don't try to symbolize.
return ['{} ({}:{}+{})'.format(addr, binary, self.arch, offset)]
atos_line = self.atos.convert('0x%x' % int(offset, 16))
while "got symbolicator for" in atos_line:
atos_line = self.atos.readline()
# A well-formed atos response looks like this:
# foo(type1, type2) (in (
match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line)
logging.debug('atos_line: %s', atos_line)
if match:
function_name =
function_name = re.sub('\(.*?\)', '', function_name)
file_name = fix_filename(
return ['%s in %s %s' % (addr, function_name, file_name)]
return ['%s in %s' % (addr, atos_line)]
# Chain several symbolizers so that if one symbolizer fails, we fall back
# to the next symbolizer in chain.
class ChainSymbolizer(Symbolizer):
def __init__(self, symbolizer_list):
super(ChainSymbolizer, self).__init__()
self.symbolizer_list = symbolizer_list
def symbolize(self, addr, binary, offset):
"""Overrides Symbolizer.symbolize."""
for symbolizer in self.symbolizer_list:
if symbolizer:
result = symbolizer.symbolize(addr, binary, offset)
if result:
return result
return None
def append_symbolizer(self, symbolizer):
def BreakpadSymbolizerFactory(binary):
suffix = os.getenv('BREAKPAD_SUFFIX')
if suffix:
filename = binary + suffix
if os.access(filename, os.F_OK):
return BreakpadSymbolizer(filename)
return None
def SystemSymbolizerFactory(system, addr, binary, arch):
if system == 'Darwin':
return DarwinSymbolizer(addr, binary, arch)
elif system in ['Linux', 'FreeBSD', 'NetBSD', 'SunOS']:
return Addr2LineSymbolizer(binary)
class BreakpadSymbolizer(Symbolizer):
def __init__(self, filename):
super(BreakpadSymbolizer, self).__init__()
self.filename = filename
lines = file(filename).readlines()
self.files = []
self.symbols = {}
self.address_list = []
self.addresses = {}
# MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t
fragments = lines[0].rstrip().split()
self.arch = fragments[2]
self.debug_id = fragments[3]
self.binary = ' '.join(fragments[4:])
def parse_lines(self, lines):
cur_function_addr = ''
for line in lines:
fragments = line.split()
if fragments[0] == 'FILE':
assert int(fragments[1]) == len(self.files)
self.files.append(' '.join(fragments[2:]))
elif fragments[0] == 'PUBLIC':
self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:])
elif fragments[0] in ['CFI', 'STACK']:
elif fragments[0] == 'FUNC':
cur_function_addr = int(fragments[1], 16)
if not cur_function_addr in self.symbols.keys():
self.symbols[cur_function_addr] = ' '.join(fragments[4:])
# Line starting with an address.
addr = int(fragments[0], 16)
# Tuple of symbol address, size, line, file number.
self.addresses[addr] = (cur_function_addr,
int(fragments[1], 16),
def get_sym_file_line(self, addr):
key = None
if addr in self.addresses.keys():
key = addr
index = bisect.bisect_left(self.address_list, addr)
if index == 0:
return None
key = self.address_list[index - 1]
sym_id, size, line_no, file_no = self.addresses[key]
symbol = self.symbols[sym_id]
filename = self.files[file_no]
if addr < key + size:
return symbol, filename, line_no
return None
def symbolize(self, addr, binary, offset):
if self.binary != binary:
return None
res = self.get_sym_file_line(int(offset, 16))
if res:
function_name, file_name, line_no = res
result = ['%s in %s %s:%d' % (
addr, function_name, file_name, line_no)]
return result
return None
class SymbolizationLoop(object):
def __init__(self, plugin_proxy=None, dsym_hint_producer=None):
self.plugin_proxy = plugin_proxy
if sys.platform == 'win32':
# ASan on Windows uses dbghelp.dll to symbolize in-process, which works
# even in sandboxed processes. Nothing needs to be done here.
self.process_line = self.process_line_echo
# Used by clients who may want to supply a different binary name.
# E.g. in Chrome several binaries may share a single .dSYM.
self.dsym_hint_producer = dsym_hint_producer
self.system = os.uname()[0]
if self.system not in ['Linux', 'Darwin', 'FreeBSD', 'NetBSD','SunOS']:
raise Exception('Unknown system')
self.llvm_symbolizers = {}
self.last_llvm_symbolizer = None
self.dsym_hints = set([])
self.frame_no = 0
self.process_line = self.process_line_posix
def symbolize_address(self, addr, binary, offset, arch):
# On non-Darwin (i.e. on platforms without .dSYM debug info) always use
# a single symbolizer binary.
# On Darwin, if the dsym hint producer is present:
# 1. check whether we've seen this binary already; if so,
# use |llvm_symbolizers[binary]|, which has already loaded the debug
# info for this binary (might not be the case for
# |last_llvm_symbolizer|);
# 2. otherwise check if we've seen all the hints for this binary already;
# if so, reuse |last_llvm_symbolizer| which has the full set of hints;
# 3. otherwise create a new symbolizer and pass all currently known
# .dSYM hints to it.
result = None
if not force_system_symbolizer:
if not binary in self.llvm_symbolizers:
use_new_symbolizer = True
if self.system == 'Darwin' and self.dsym_hint_producer:
dsym_hints_for_binary = set(self.dsym_hint_producer(binary))
use_new_symbolizer = bool(dsym_hints_for_binary - self.dsym_hints)
self.dsym_hints |= dsym_hints_for_binary
if self.last_llvm_symbolizer and not use_new_symbolizer:
self.llvm_symbolizers[binary] = self.last_llvm_symbolizer
self.last_llvm_symbolizer = LLVMSymbolizerFactory(
self.system, arch, self.dsym_hints)
self.llvm_symbolizers[binary] = self.last_llvm_symbolizer
# Use the chain of symbolizers:
# Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos
# (fall back to next symbolizer if the previous one fails).
if not binary in symbolizers:
symbolizers[binary] = ChainSymbolizer(
[BreakpadSymbolizerFactory(binary), self.llvm_symbolizers[binary]])
result = symbolizers[binary].symbolize(addr, binary, offset)
symbolizers[binary] = ChainSymbolizer([])
if result is None:
if not allow_system_symbolizer:
raise Exception('Failed to launch or use llvm-symbolizer.')
# Initialize system symbolizer only if other symbolizers failed.
SystemSymbolizerFactory(self.system, addr, binary, arch))
result = symbolizers[binary].symbolize(addr, binary, offset)
# The system symbolizer must produce some result.
assert result
return result
def get_symbolized_lines(self, symbolized_lines):
if not symbolized_lines:
return [self.current_line]
result = []
for symbolized_frame in symbolized_lines:
result.append(' #%s %s' % (str(self.frame_no), symbolized_frame.rstrip()))
self.frame_no += 1
return result
def process_logfile(self):
self.frame_no = 0
for line in logfile:
processed = self.process_line(line)
def process_line_echo(self, line):
return [line.rstrip()]
def process_line_posix(self, line):
self.current_line = line.rstrip()
#0 0x7f6e35cf2e45 (/blah/
stack_trace_line_format = (
'^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)')
match = re.match(stack_trace_line_format, line)
if not match:
return [self.current_line]
_, frameno_str, addr, binary, offset = match.groups()
arch = ""
# Arch can be embedded in the filename, e.g.: "libabc.dylib:x86_64h"
colon_pos = binary.rfind(":")
if colon_pos != -1:
maybe_arch = binary[colon_pos+1:]
if is_valid_arch(maybe_arch):
arch = maybe_arch
binary = binary[0:colon_pos]
if arch == "":
arch = guess_arch(addr)
if frameno_str == '0':
# Assume that frame #0 is the first frame of new stack trace.
self.frame_no = 0
original_binary = binary
binary = self.plugin_proxy.filter_binary_path(binary)
if binary is None:
# The binary filter has told us this binary can't be symbolized.
logging.debug('Skipping symbolication of binary "%s"', original_binary)
return [self.current_line]
symbolized_line = self.symbolize_address(addr, binary, offset, arch)
if not symbolized_line:
if original_binary != binary:
symbolized_line = self.symbolize_address(addr, original_binary, offset, arch)
return self.get_symbolized_lines(symbolized_line)
class AsanSymbolizerPlugInProxy(object):
Serves several purposes:
- Manages the lifetime of plugins (must be used a `with` statement).
- Provides interface for calling into plugins from within this script.
def __init__(self):
self._plugins = [ ]
self._plugin_names = set()
def load_plugin_from_file(self, file_path):'Loading plugins from "{}"'.format(file_path))
globals_space = dict(globals())
# Provide function to register plugins
def register_plugin(plugin):'Registering plugin %s', plugin.get_name())
globals_space['register_plugin'] = register_plugin
if sys.version_info.major < 3:
execfile(file_path, globals_space, None)
with open(file_path, 'r') as f:
exec(, globals_space, None)
def add_plugin(self, plugin):
assert isinstance(plugin, AsanSymbolizerPlugIn)
def remove_plugin(self, plugin):
assert isinstance(plugin, AsanSymbolizerPlugIn)
logging.debug('Removing plugin %s', plugin.get_name())
def has_plugin(self, name):
Returns true iff the plugin name is currently
being managed by AsanSymbolizerPlugInProxy.
return name in self._plugin_names
def register_cmdline_args(self, parser):
plugins = list(self._plugins)
for plugin in plugins:
def process_cmdline_args(self, pargs):
# Use copy so we can remove items as we iterate.
plugins = list(self._plugins)
for plugin in plugins:
keep = plugin.process_cmdline_args(pargs)
assert isinstance(keep, bool)
if not keep:
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
for plugin in self._plugins:
# Don't suppress raised exceptions
return False
def _filter_single_value(self, function_name, input_value):
Helper for filter style plugin functions.
new_value = input_value
for plugin in self._plugins:
result = getattr(plugin, function_name)(new_value)
if result is None:
return None
new_value = result
return new_value
def filter_binary_path(self, binary_path):
Consult available plugins to filter the path to a binary
to make it suitable for symbolication.
Returns `None` if symbolication should not be attempted for this
return self._filter_single_value('filter_binary_path', binary_path)
class AsanSymbolizerPlugIn(object):
This is the interface the `` code uses to talk
to plugins.
def get_name(cls):
Returns the name of the plugin.
return cls.__name__
def _receive_proxy(self, proxy):
assert isinstance(proxy, AsanSymbolizerPlugInProxy)
self.proxy = proxy
def register_cmdline_args(self, parser):
Hook for registering command line arguments to be
consumed in `process_cmdline_args()`.
`parser` - Instance of `argparse.ArgumentParser`.
def process_cmdline_args(self, pargs):
Hook for handling parsed arguments. Implementations
should not modify `pargs`.
`pargs` - Instance of `argparse.Namespace` containing
parsed command line arguments.
Return `True` if plug-in should be used, otherwise
return `False`.
return True
def destroy(self):
Hook called when a plugin is about to be destroyed.
Implementations should free any allocated resources here.
# Symbolization hooks
def filter_binary_path(self, binary_path):
Given a binary path return a binary path suitable for symbolication.
Implementations should return `None` if symbolication of this binary
should be skipped.
return binary_path
class SysRootFilterPlugIn(AsanSymbolizerPlugIn):
Simple plug-in to add sys root prefix to all binary paths
used for symbolication.
def __init__(self):
self.sysroot_path = ""
def register_cmdline_args(self, parser):
parser.add_argument('-s', dest='sys_root', metavar='SYSROOT',
help='set path to sysroot for sanitized binaries')
def process_cmdline_args(self, pargs):
if pargs.sys_root is None:
# Not being used so remove ourselves.
return False
self.sysroot_path = pargs.sys_root
return True
def filter_binary_path(self, path):
return self.sysroot_path + path
def add_logging_args(parser):
help='Destination path for script logging (default stderr).',
choices=['debug', 'info', 'warning', 'error', 'critical'],
help='Log level for script (default: %(default)s).'
def setup_logging():
# Set up a parser just for parsing the logging arguments.
# This is necessary because logging should be configured before we
# perform the main argument parsing.
parser = argparse.ArgumentParser(add_help=False)
pargs, unparsed_args = parser.parse_known_args()
log_level = getattr(logging, pargs.log_level.upper())
if log_level == logging.DEBUG:
log_format = '%(levelname)s: [%(funcName)s() %(filename)s:%(lineno)d] %(message)s'
log_format = '%(levelname)s: %(message)s'
basic_config = {
'level': log_level,
'format': log_format
log_dest = pargs.log_dest
if log_dest:
basic_config['filename'] = log_dest
logging.debug('Logging level set to "{}" and directing output to "{}"'.format(
'stderr' if log_dest is None else log_dest)
return unparsed_args
def add_load_plugin_args(parser):
parser.add_argument('-p', '--plugins',
help='Load plug-in', nargs='+', default=[])
def setup_plugins(plugin_proxy, args):
parser = argparse.ArgumentParser(add_help=False)
pargs , unparsed_args = parser.parse_known_args()
for plugin_path in pargs.plugins:
# Add built-in plugins.
return unparsed_args
if __name__ == '__main__':
remaining_args = setup_logging()
with AsanSymbolizerPlugInProxy() as plugin_proxy:
remaining_args = setup_plugins(plugin_proxy, remaining_args)
parser = argparse.ArgumentParser(
description='ASan symbolization script',
parser.add_argument('path_to_cut', nargs='*',
help='pattern to be cut from the result file path ')
parser.add_argument('-d','--demangle', action='store_true',
help='demangle function names')
parser.add_argument('-c', metavar='CROSS_COMPILE',
help='set prefix for binutils')
parser.add_argument('-l','--logfile', default=sys.stdin,
help='set log file name to parse, default is stdin')
parser.add_argument('--force-system-symbolizer', action='store_true',
help='don\'t use llvm-symbolizer')
# Add logging arguments so that `--help` shows them.
# Add load plugin arguments so that `--help` shows them.
args = parser.parse_args(remaining_args)
if args.path_to_cut:
fix_filename_patterns = args.path_to_cut
if args.demangle:
demangle = True
if args.c:
binutils_prefix = args.c
if args.logfile:
logfile = args.logfile
logfile = sys.stdin
if args.force_system_symbolizer:
force_system_symbolizer = True
if force_system_symbolizer:
loop = SymbolizationLoop(plugin_proxy)