Migrate to llvm-objdump and llvm-symbolizer
Android NDK will no longer support tools based on GNU toolchain.
Hence migrate ASan symbolization for Android to LLVM base tools
like llvm-symbolizer and llvm-objdump.
Bug: 1273402
Cq-Include-Trybots: luci.chromium.try:linux_chromium_asan_rel_ng,win-asan,mac_chromium_asan_rel_ng,android-asan
Change-Id: Icc0c5f8e64e1bb05d49f347c3316850a0ff83e55
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/3303875
Reviewed-by: Andrew Grieve <agrieve@chromium.org>
Reviewed-by: Hans Wennborg <hans@chromium.org>
Reviewed-by: Nico Weber <thakis@chromium.org>
Commit-Queue: Jonathan Wright <jonathan.wright@arm.com>
Cr-Commit-Position: refs/heads/main@{#961488}
NOKEYCHECK=True
GitOrigin-RevId: ffa736ce5f5d7b018b768c29c0cc1ff35152393a
diff --git a/README.chromium b/README.chromium
index e4e184d..bf7bf9f 100644
--- a/README.chromium
+++ b/README.chromium
@@ -19,7 +19,6 @@
The scripts have been modified to better suit Chromium development. Changes
include, but are not limited to, the following:
-Added memoization of addr2line and objdump.
Added option to change the amount of symbolization done.
Updated output directories to be set by environment variable or --flags
When calling addr2line, check the symbol is a file that looks like it contains
@@ -31,16 +30,15 @@
Added support for arch=x64 as an alias to arch=x86_64
Added debug logging and --verbose parameter.
Used fast ELF symbolizer for symbols.py and tombstones
-Used multiprocessing to pre-process logcat before symbolizing it
Added code address adjustment for the debuggerd output from pre-M Android
where relocations are packed.
Added code to capture java stderr for better handling of native->java crashes.
Fixed invalid using decl in logging header debug.h
Only attempt to symbolize with ELF libraries.
-Changed the stack script to use llvm symbolizer instead of addr2line,
-objdump, etc, since llvm symbolizer is more efficient in finding
-function names, line numbers etc.
+Changed the stack scripts to use llvm tools instead of gnu toolchain.
+Additionally use symbolizer instead of addr2line, objdump, etc, since llvm symbolizer
+is more efficient in finding function names, line numbers etc.
Speedup symbolization by avoiding unnecessary APK manifest extraction loops.
diff --git a/development/scripts/stack.pydeps b/development/scripts/stack.pydeps
index ab65fc3..8a95b21 100644
--- a/development/scripts/stack.pydeps
+++ b/development/scripts/stack.pydeps
@@ -3,8 +3,7 @@
../../../../build/android/pylib/__init__.py
../../../../build/android/pylib/constants/__init__.py
../../../../build/android/pylib/constants/host_paths.py
-../../../../build/android/pylib/symbols/__init__.py
-../../../../build/android/pylib/symbols/elf_symbolizer.py
+../../../../tools/python/llvm_objdump.py
../../../../tools/python/llvm_symbolizer.py
../../../catapult/devil/devil/__init__.py
../../../catapult/devil/devil/android/__init__.py
diff --git a/development/scripts/stack_core.py b/development/scripts/stack_core.py
index e869a14..b0937db 100755
--- a/development/scripts/stack_core.py
+++ b/development/scripts/stack_core.py
@@ -463,10 +463,9 @@
logging.debug('Identified lib: %s' % area)
# If a calls b which further calls c and c is inlined to b, we want to
# display "a -> b -> c" in the stack trace instead of just "a -> c"
- # To use llvm symbolizer, the hexadecimal address has to start with 0x.
- info = llvm_symbolizer.GetSymbolInformation(
- os.path.join(symbol.SYMBOLS_DIR, symbol.TranslateLibPath(area)),
- '0x' + code_addr)
+ library = os.path.join(symbol.SYMBOLS_DIR,
+ symbol.TranslateLibPath(area))
+ info = llvm_symbolizer.GetSymbolInformation(library, int(code_addr,16))
logging.debug('symbol information: %s' % info)
nest_count = len(info) - 1
for source_symbol, source_location in info:
@@ -481,15 +480,16 @@
trace_lines.append((code_addr,
source_symbol,
source_location))
+
match = _VALUE_LINE.match(line)
if match:
(_, addr, value, area, _, symbol_name) = match.groups()
if area == UNKNOWN or area == HEAP or area == STACK or not area:
value_lines.append((addr, value, '', area))
else:
- info = llvm_symbolizer.GetSymbolInformation(
- os.path.join(symbol.SYMBOLS_DIR, symbol.TranslateLibPath(area)),
- '0x' + value)
+ library = os.path.join(symbol.SYMBOLS_DIR,
+ symbol.TranslateLibPath(area))
+ info = llvm_symbolizer.GetSymbolInformation(library, int(value,16))
source_symbol, source_location = info.pop()
value_lines.append((addr,
diff --git a/development/scripts/stack_test.py b/development/scripts/stack_test.py
index 4d53fa9..d5609b1 100755
--- a/development/scripts/stack_test.py
+++ b/development/scripts/stack_test.py
@@ -56,7 +56,7 @@
def __init__(self, directory):
self._lib_directory = directory
- def GetSymbolInformation(self, library, address_string):
+ def GetSymbolInformation(self, library, address):
basename = os.path.basename(library)
local_file = os.path.join(self._lib_directory, basename)
@@ -67,7 +67,6 @@
# If the address isn't in the library, LLVM symbolizer yields ??.
lib_size = os.stat(local_file).st_size
- address = int(address_string, 16)
if address >= lib_size:
return [('??', '??:0:0')]
diff --git a/development/scripts/symbol.py b/development/scripts/symbol.py
index f0a2dea..c8a36b9 100755
--- a/development/scripts/symbol.py
+++ b/development/scripts/symbol.py
@@ -34,11 +34,16 @@
'build', 'android'))
from pylib import constants
from pylib.constants import host_paths
-from pylib.symbols import elf_symbolizer
+sys.path.insert(0, os.path.join(os.path.dirname(__file__),
+ os.pardir, os.pardir, os.pardir, os.pardir,
+ 'tools', 'python'))
+from llvm_symbolizer import LLVMSymbolizer, IsValidLLVMSymbolizerTarget
+
+from llvm_objdump import LLVMObjdumper
# WARNING: These global variables can be modified by other scripts!
-SYMBOLS_DIR = constants.DIR_SOURCE_ROOT
+SYMBOLS_DIR = constants.DIR_SOURCE_ROOT + os.sep
CHROME_SYMBOLS_DIR = None
ARCH = "arm"
@@ -220,8 +225,7 @@
A list of matching library filenames for library_name.
"""
def extant_library(filename):
- if (os.path.exists(filename)
- and elf_symbolizer.ContainsElfMagic(filename)):
+ if (os.path.exists(filename) and IsValidLLVMSymbolizerTarget(filename)):
return [filename]
return []
@@ -313,12 +317,15 @@
if not lib:
return None
- addr_to_line = _CallAddr2LineForSet(lib, unique_addrs, cpu_arch)
+ symbols = SYMBOLS_DIR + lib
+
+ addr_to_line = _CallAddr2LineForSet(symbols, unique_addrs)
+
if not addr_to_line:
return None
if get_detailed_info:
- addr_to_objdump = _CallObjdumpForSet(lib, unique_addrs, cpu_arch)
+ addr_to_objdump = _CallObjdumpForSet(symbols, unique_addrs, cpu_arch)
if not addr_to_objdump:
return None
else:
@@ -328,53 +335,27 @@
for addr in unique_addrs:
source_info = addr_to_line.get(addr)
if not source_info:
- source_info = [(None, None)]
+ source_info = [(None,None)]
+
if addr in addr_to_objdump:
(object_symbol, object_offset) = addr_to_objdump.get(addr)
object_symbol_with_offset = _FormatSymbolWithOffset(object_symbol,
object_offset)
else:
object_symbol_with_offset = None
+
result[addr] = [(source_symbol, source_location, object_symbol_with_offset)
for (source_symbol, source_location) in source_info]
return result
-class _MemoizedForSet:
- """Decorator class used to memoize CallXXXForSet() results."""
- def __init__(self, fn):
- self.fn = fn
- self.cache = {}
- self.cpu_arch = None
-
- def __call__(self, lib, unique_addrs, cpu_arch):
- if self.cpu_arch is None:
- self.cpu_arch = cpu_arch
- else:
- # Sanity check, this doesn't expect cpu_arch to change.
- assert self.cpu_arch == cpu_arch
-
- lib_cache = self.cache.setdefault(lib, {})
-
- uncached_addrs = [k for k in unique_addrs if k not in lib_cache]
- if uncached_addrs:
- lib_cache.update((k, None) for k in uncached_addrs)
- result = self.fn(lib, uncached_addrs, cpu_arch)
- if result:
- lib_cache.update(result)
-
- return dict((k, lib_cache[k]) for k in unique_addrs if lib_cache[k])
-
-
-@_MemoizedForSet
-def _CallAddr2LineForSet(lib, unique_addrs, cpu_arch):
+def _CallAddr2LineForSet(lib, unique_addrs):
"""Look up line and symbol information for a set of addresses.
Args:
lib: library (or executable) pathname containing symbols
unique_addrs: set of string hexidecimal addresses look up.
- cpu_arch: Target CPU architecture.
Returns:
A dictionary of the form {addr: [(symbol, file:line)]} where
@@ -388,55 +369,23 @@
if not lib:
return None
- symbols = SYMBOLS_DIR + lib
- if not os.path.splitext(symbols)[1] in ['', '.so', '.apk']:
+ if not os.path.splitext(lib)[1] in ['', '.so', '.apk']:
return None
- if not os.path.isfile(symbols):
+ if not os.path.isfile(lib):
return None
- addrs = sorted(unique_addrs)
+ sorted_addrs = sorted(unique_addrs)
+
result = {}
- def _Callback(sym, addr):
- records = []
- while sym: # Traverse all the inlines following the |inlined_by| chain.
- if sym.source_path and sym.source_line:
- location = '%s:%d' % (sym.source_path, sym.source_line)
- else:
- location = None
- records += [(sym.name, location)]
- sym = sym.inlined_by
- result[addr] = records
+ with LLVMSymbolizer() as llvm_symbolizer:
+ for addr in sorted_addrs:
+ result[addr] = llvm_symbolizer.GetSymbolInformation(lib,addr)
- symbolizer = elf_symbolizer.ELFSymbolizer(
- elf_file_path=symbols,
- addr2line_path=host_paths.ToolPath("addr2line", cpu_arch),
- callback=_Callback,
- inlines=True)
-
- for addr in addrs:
- symbolizer.SymbolizeAsync(int(addr, 16), addr)
- symbolizer.Join()
return result
-def _StripPC(addr, cpu_arch):
- """Strips the Thumb bit a program counter address when appropriate.
-
- Args:
- addr: the program counter address
- cpu_arch: Target CPU architecture.
-
- Returns:
- The stripped program counter address.
- """
- if cpu_arch == "arm":
- return addr & ~1
- return addr
-
-
-@_MemoizedForSet
def _CallObjdumpForSet(lib, unique_addrs, cpu_arch):
"""Use objdump to find out the names of the containing functions.
@@ -448,72 +397,20 @@
Returns:
A dictionary of the form {addr: (string symbol, offset)}.
"""
+
if not lib:
return None
- symbols = SYMBOLS_DIR + lib
- if not os.path.exists(symbols):
- return None
-
- symbols = SYMBOLS_DIR + lib
- if not os.path.exists(symbols):
+ if not os.path.exists(lib):
return None
result = {}
- # Function lines look like:
- # 000177b0 <android::IBinder::~IBinder()+0x2c>:
- # We pull out the address and function first. Then we check for an optional
- # offset. This is tricky due to functions that look like "operator+(..)+0x2c"
- func_regexp = re.compile("(^[a-f0-9]*) \<(.*)\>:$")
- offset_regexp = re.compile("(.*)\+0x([a-f0-9]*)")
-
- # A disassembly line looks like:
- # 177b2: b510 push {r4, lr}
- asm_regexp = re.compile("(^[ a-f0-9]*):[ a-f0-0]*.*$")
-
- for target_addr in unique_addrs:
- start_addr_dec = str(_StripPC(int(target_addr, 16), cpu_arch))
- stop_addr_dec = str(_StripPC(int(target_addr, 16), cpu_arch) + 8)
- cmd = [host_paths.ToolPath("objdump", cpu_arch),
- "--section=.text",
- "--demangle",
- "--disassemble",
- "--start-address=" + start_addr_dec,
- "--stop-address=" + stop_addr_dec,
- symbols]
-
- current_symbol = None # The current function symbol in the disassembly.
- current_symbol_addr = 0 # The address of the current function.
-
- stream = subprocess.Popen(cmd, stdout=subprocess.PIPE).stdout
- for line in stream:
- # Is it a function line like:
- # 000177b0 <android::IBinder::~IBinder()>:
- components = func_regexp.match(line)
- if components:
- # This is a new function, so record the current function and its
- # address.
- current_symbol_addr = int(components.group(1), 16)
- current_symbol = components.group(2)
-
- # Does it have an optional offset like: "foo(..)+0x2c"?
- components = offset_regexp.match(current_symbol)
- if components:
- current_symbol = components.group(1)
- offset = components.group(2)
- if offset:
- current_symbol_addr -= int(offset, 16)
-
- # Is it an disassembly line like:
- # 177b2: b510 push {r4, lr}
- components = asm_regexp.match(line)
- if components:
- addr = components.group(1)
- i_addr = int(addr, 16)
- i_target = _StripPC(int(target_addr, 16), cpu_arch)
- if i_addr == i_target:
- result[target_addr] = (current_symbol, i_target - current_symbol_addr)
- stream.close()
+ with LLVMObjdumper() as llvm_objdumper:
+ for current_address in unique_addrs:
+ symbol_data = llvm_objdumper.GetSymbolInformation(lib=lib,
+ address=current_address,
+ cpu_arch=cpu_arch)
+ result[current_address] = (symbol_data.symbol, symbol_data.offset)
return result