blob: de6cf0743d5636e3c793c10f2f1367a49961c302 [file] [log] [blame]
#!/usr/bin/env python3
# Copyright 2021 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Runs dwarfdump on passed-in .so."""
import bisect
import dataclasses
import subprocess
import typing
import path_util
@dataclasses.dataclass(order=True)
class _AddressRange:
start: int
stop: int
class _SourceMapper:
def __init__(self, range_info_list):
self._range_info_list = range_info_list
self._largest_address = 0
self._unmatched_queries_count = 0
self._total_queries_count = 0
if self._range_info_list:
self._largest_address = self._range_info_list[-1][0].stop
def FindSourceForTextAddress(self, address):
"""Returns source file path matching passed-in symbol address.
Only symbols in the .text section of the elf file are supported.
"""
self._total_queries_count += 1
# Bisect against stop = self._largest_address + 1 to avoid bisecting against
# the "source path" tuple component.
bisect_index = bisect.bisect_right(
self._range_info_list,
(_AddressRange(address, self._largest_address + 1), '')) - 1
if bisect_index >= 0:
info = self._range_info_list[bisect_index]
if info[0].start <= address < info[0].stop:
return info[1]
self._unmatched_queries_count += 1
return None
@property
def unmatched_queries_ratio(self):
return self._unmatched_queries_count / self._total_queries_count
def CreateAddressSourceMapper(elf_path, tool_prefix):
"""Runs dwarfdump. Returns object for querying source path given address."""
return _SourceMapper(_Parse(elf_path, tool_prefix))
def CreateAddressSourceMapperForTest(lines):
return _SourceMapper(_ParseDumpOutput(lines))
def ParseDumpOutputForTest(lines):
return _ParseDumpOutput(lines)
def _Parse(elf_path, tool_prefix):
cmd = [
path_util.GetDwarfdumpPath(tool_prefix),
elf_path,
'--debug-info',
'--summarize-types',
'--recurse-depth=0',
]
stdout = subprocess.check_output(cmd,
stderr=subprocess.DEVNULL,
encoding='utf-8')
return _ParseDumpOutput(stdout.splitlines())
def _ParseDumpOutput(lines):
"""Parses passed-in dwarfdump stdout."""
# List of (_AddressRange, source path) tuples.
range_info_list = []
line_it = iter(lines)
line = next(line_it, None)
while line is not None:
if 'DW_TAG_compile_unit' not in line:
line = next(line_it, None)
continue
line, address_ranges, source_path = _ParseCompileUnit(line_it)
if source_path and address_ranges:
for address_range in address_ranges:
range_info_list.append((address_range, source_path))
return sorted(range_info_list)
def _ParseCompileUnit(line_it):
"""Parses DW_AT_compile_unit block.
Example:
0x000026: DW_AT_compile_unit
DW_AT_low_pc (0x02f)
DW_AT_high_pc (0x03f)
DW_AT_name (foo.cc)
"""
source_path = None
single_range = _AddressRange(0, 0)
range_addresses = []
while True:
line = next(line_it, None)
dw_index = 0 if line is None else line.find('DW_')
if dw_index < 0:
continue
if line is None or line.startswith('DW_TAG', dw_index):
if range_addresses:
# If compile unit specifies both DW_AT_ranges and DW_AT_low_pc,
# DW_AT_low_pc is base offset. Base offset is currently unsupported.
assert single_range.start == 0
elif single_range.start > 0:
range_addresses.append(single_range)
return (line, range_addresses, source_path)
if line.startswith('DW_AT_low_pc', dw_index):
single_range.start = int(_ExtractDwValue(line), 16)
if single_range.stop == 0:
single_range.stop = single_range.start + 1
continue
if line.startswith('DW_AT_high_pc', dw_index):
single_range.stop = int(_ExtractDwValue(line), 16)
continue
if line.startswith('DW_AT_name', dw_index):
source_path = _ExtractDwValue(line)
continue
if line.startswith('DW_AT_ranges', dw_index):
range_addresses = _ParseRanges(line_it)
def _ParseRanges(line_it):
"""Parses DW_AT_ranges from dwarfdump stdout.
Example:
[0x1, 0x2)
[0x5, 0x10))
"""
range_addresses = []
line = next(line_it, None)
while line is not None:
num_opening_brackets = line.count('(') + line.count('[')
num_closing_brackets = line.count(')') + line.count(']')
tokens = line.strip('([]) \t').split(',')
if len(tokens) == 2:
start_address = int(tokens[0], 16)
end_address = int(tokens[1], 16)
# Dwarf spec does not assign special meaning to empty ranges.
if start_address != end_address:
range_addresses.append(_AddressRange(start_address, end_address))
if num_closing_brackets > num_opening_brackets:
break
line = next(line_it, None)
return range_addresses
def _ExtractDwValue(line):
"""Extract DW_AT_ value from dwarfdump stdout.
Examples:
DW_AT_name ("foo.cc")
DW_AT_decl_line (177)
DW_AT_low_pc (0x2)
"""
lparen_index = line.rfind('(')
if lparen_index < 0:
return None
rparen_index = line.find(')', lparen_index + 1)
if rparen_index < 0:
return None
if (lparen_index < rparen_index - 2 and line[lparen_index + 1] == '"'
and line[rparen_index - 1] == '"'):
lparen_index += 1
rparen_index -= 1
return line[lparen_index + 1:rparen_index]