tools/binary_size/libsupersize/dwarfdump.py - chromium/src - Git at Google

 #!/usr/bin/env python3
 # Copyright 2021 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 """Runs dwarfdump on passed-in .so."""

 import bisect
 import dataclasses
 import subprocess
 import typing

 import path_util


 @dataclasses.dataclass(order=True)
 class _AddressRange:
   start: int
   stop: int


 class _SourceMapper:
   def __init__(self, range_info_list):
     self._range_info_list = range_info_list
     self._largest_address = 0
     self._unmatched_queries_count = 0
     self._total_queries_count = 0

     if self._range_info_list:
       self._largest_address = self._range_info_list[-1][0].stop

   def FindSourceForTextAddress(self, address):
     """Returns source file path matching passed-in symbol address.

     Only symbols in the .text section of the elf file are supported.
     """
     self._total_queries_count += 1
     # Bisect against stop = self._largest_address + 1 to avoid bisecting against
     # the "source path" tuple component.
     bisect_index = bisect.bisect_right(
         self._range_info_list,
         (_AddressRange(address, self._largest_address + 1), '')) - 1
     if bisect_index >= 0:
       info = self._range_info_list[bisect_index]
       if info[0].start <= address < info[0].stop:
         return info[1]

     self._unmatched_queries_count += 1
     return None

   @property
   def unmatched_queries_ratio(self):
     return self._unmatched_queries_count / self._total_queries_count


 def CreateAddressSourceMapper(elf_path, tool_prefix):
   """Runs dwarfdump. Returns object for querying source path given address."""
   return _SourceMapper(_Parse(elf_path, tool_prefix))


 def CreateAddressSourceMapperForTest(lines):
   return _SourceMapper(_ParseDumpOutput(lines))


 def ParseDumpOutputForTest(lines):
   return _ParseDumpOutput(lines)


 def _Parse(elf_path, tool_prefix):
   cmd = [
       path_util.GetDwarfdumpPath(tool_prefix),
       elf_path,
       '--debug-info',
       '--summarize-types',
       '--recurse-depth=0',
   ]
   stdout = subprocess.check_output(cmd,
                                    stderr=subprocess.DEVNULL,
                                    encoding='utf-8')
   return _ParseDumpOutput(stdout.splitlines())


 def _ParseDumpOutput(lines):
   """Parses passed-in dwarfdump stdout."""

   # List of (_AddressRange, source path) tuples.
   range_info_list = []

   line_it = iter(lines)
   line = next(line_it, None)
   while line is not None:
     if 'DW_TAG_compile_unit' not in line:
       line = next(line_it, None)
       continue

     line, address_ranges, source_path = _ParseCompileUnit(line_it)
     if source_path and address_ranges:
       for address_range in address_ranges:
         range_info_list.append((address_range, source_path))

   return sorted(range_info_list)


 def _ParseCompileUnit(line_it):
   """Parses DW_AT_compile_unit block.

   Example:
   0x000026: DW_AT_compile_unit
               DW_AT_low_pc   (0x02f)
               DW_AT_high_pc  (0x03f)
               DW_AT_name     (foo.cc)
   """
   source_path = None
   single_range = _AddressRange(0, 0)
   range_addresses = []

   while True:
     line = next(line_it, None)

     dw_index = 0 if line is None else line.find('DW_')
     if dw_index < 0:
       continue

     if line is None or line.startswith('DW_TAG', dw_index):
       if range_addresses:
         # If compile unit specifies both DW_AT_ranges and DW_AT_low_pc,
         # DW_AT_low_pc is base offset. Base offset is currently unsupported.
         assert single_range.start == 0
       elif single_range.start > 0:
         range_addresses.append(single_range)
       return (line, range_addresses, source_path)

     if line.startswith('DW_AT_low_pc', dw_index):
       single_range.start = int(_ExtractDwValue(line), 16)
       if single_range.stop == 0:
         single_range.stop = single_range.start + 1
       continue
     if line.startswith('DW_AT_high_pc', dw_index):
       single_range.stop = int(_ExtractDwValue(line), 16)
       continue
     if line.startswith('DW_AT_name', dw_index):
       source_path = _ExtractDwValue(line)
       continue

     if line.startswith('DW_AT_ranges', dw_index):
       range_addresses = _ParseRanges(line_it)


 def _ParseRanges(line_it):
   """Parses DW_AT_ranges from dwarfdump stdout.

   Example:
   [0x1, 0x2)
   [0x5, 0x10))
   """
   range_addresses = []

   line = next(line_it, None)
   while line is not None:
     num_opening_brackets = line.count('(') + line.count('[')
     num_closing_brackets = line.count(')') + line.count(']')

     tokens = line.strip('([]) \t').split(',')
     if len(tokens) == 2:
       start_address = int(tokens[0], 16)
       end_address = int(tokens[1], 16)
       # Dwarf spec does not assign special meaning to empty ranges.
       if start_address != end_address:
         range_addresses.append(_AddressRange(start_address, end_address))

     if num_closing_brackets > num_opening_brackets:
       break
     line = next(line_it, None)

   return range_addresses


 def _ExtractDwValue(line):
   """Extract DW_AT_ value from dwarfdump stdout.

   Examples:
   DW_AT_name ("foo.cc")
   DW_AT_decl_line (177)
   DW_AT_low_pc (0x2)
   """
   lparen_index = line.rfind('(')
   if lparen_index < 0:
     return None
   rparen_index = line.find(')', lparen_index + 1)
   if rparen_index < 0:
     return None
   if (lparen_index < rparen_index - 2 and line[lparen_index + 1] == '"'
       and line[rparen_index - 1] == '"'):
     lparen_index += 1
     rparen_index -= 1
   return line[lparen_index + 1:rparen_index]
	#!/usr/bin/env python3
	# Copyright 2021 The Chromium Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.
	"""Runs dwarfdump on passed-in .so."""

	import bisect
	import dataclasses
	import subprocess
	import typing

	import path_util


	@dataclasses.dataclass(order=True)
	class _AddressRange:
	start: int
	stop: int


	class _SourceMapper:
	def __init__(self, range_info_list):
	self._range_info_list = range_info_list
	self._largest_address = 0
	self._unmatched_queries_count = 0
	self._total_queries_count = 0

	if self._range_info_list:
	self._largest_address = self._range_info_list[-1][0].stop

	def FindSourceForTextAddress(self, address):
	"""Returns source file path matching passed-in symbol address.

	Only symbols in the .text section of the elf file are supported.
	"""
	self._total_queries_count += 1
	# Bisect against stop = self._largest_address + 1 to avoid bisecting against
	# the "source path" tuple component.
	bisect_index = bisect.bisect_right(
	self._range_info_list,
	(_AddressRange(address, self._largest_address + 1), '')) - 1
	if bisect_index >= 0:
	info = self._range_info_list[bisect_index]
	if info[0].start <= address < info[0].stop:
	return info[1]

	self._unmatched_queries_count += 1
	return None

	@property
	def unmatched_queries_ratio(self):
	return self._unmatched_queries_count / self._total_queries_count


	def CreateAddressSourceMapper(elf_path, tool_prefix):
	"""Runs dwarfdump. Returns object for querying source path given address."""
	return _SourceMapper(_Parse(elf_path, tool_prefix))


	def CreateAddressSourceMapperForTest(lines):
	return _SourceMapper(_ParseDumpOutput(lines))


	def ParseDumpOutputForTest(lines):
	return _ParseDumpOutput(lines)


	def _Parse(elf_path, tool_prefix):
	cmd = [
	path_util.GetDwarfdumpPath(tool_prefix),
	elf_path,
	'--debug-info',
	'--summarize-types',
	'--recurse-depth=0',
	]
	stdout = subprocess.check_output(cmd,
	stderr=subprocess.DEVNULL,
	encoding='utf-8')
	return _ParseDumpOutput(stdout.splitlines())


	def _ParseDumpOutput(lines):
	"""Parses passed-in dwarfdump stdout."""

	# List of (_AddressRange, source path) tuples.
	range_info_list = []

	line_it = iter(lines)
	line = next(line_it, None)
	while line is not None:
	if 'DW_TAG_compile_unit' not in line:
	line = next(line_it, None)
	continue

	line, address_ranges, source_path = _ParseCompileUnit(line_it)
	if source_path and address_ranges:
	for address_range in address_ranges:
	range_info_list.append((address_range, source_path))

	return sorted(range_info_list)


	def _ParseCompileUnit(line_it):
	"""Parses DW_AT_compile_unit block.

	Example:
	0x000026: DW_AT_compile_unit
	DW_AT_low_pc (0x02f)
	DW_AT_high_pc (0x03f)
	DW_AT_name (foo.cc)
	"""
	source_path = None
	single_range = _AddressRange(0, 0)
	range_addresses = []

	while True:
	line = next(line_it, None)

	dw_index = 0 if line is None else line.find('DW_')
	if dw_index < 0:
	continue

	if line is None or line.startswith('DW_TAG', dw_index):
	if range_addresses:
	# If compile unit specifies both DW_AT_ranges and DW_AT_low_pc,
	# DW_AT_low_pc is base offset. Base offset is currently unsupported.
	assert single_range.start == 0
	elif single_range.start > 0:
	range_addresses.append(single_range)
	return (line, range_addresses, source_path)

	if line.startswith('DW_AT_low_pc', dw_index):
	single_range.start = int(_ExtractDwValue(line), 16)
	if single_range.stop == 0:
	single_range.stop = single_range.start + 1
	continue
	if line.startswith('DW_AT_high_pc', dw_index):
	single_range.stop = int(_ExtractDwValue(line), 16)
	continue
	if line.startswith('DW_AT_name', dw_index):
	source_path = _ExtractDwValue(line)
	continue

	if line.startswith('DW_AT_ranges', dw_index):
	range_addresses = _ParseRanges(line_it)


	def _ParseRanges(line_it):
	"""Parses DW_AT_ranges from dwarfdump stdout.

	Example:
	[0x1, 0x2)
	[0x5, 0x10))
	"""
	range_addresses = []

	line = next(line_it, None)
	while line is not None:
	num_opening_brackets = line.count('(') + line.count('[')
	num_closing_brackets = line.count(')') + line.count(']')

	tokens = line.strip('([]) \t').split(',')
	if len(tokens) == 2:
	start_address = int(tokens[0], 16)
	end_address = int(tokens[1], 16)
	# Dwarf spec does not assign special meaning to empty ranges.
	if start_address != end_address:
	range_addresses.append(_AddressRange(start_address, end_address))

	if num_closing_brackets > num_opening_brackets:
	break
	line = next(line_it, None)

	return range_addresses


	def _ExtractDwValue(line):
	"""Extract DW_AT_ value from dwarfdump stdout.

	Examples:
	DW_AT_name ("foo.cc")
	DW_AT_decl_line (177)
	DW_AT_low_pc (0x2)
	"""
	lparen_index = line.rfind('(')
	if lparen_index < 0:
	return None
	rparen_index = line.find(')', lparen_index + 1)
	if rparen_index < 0:
	return None
	if (lparen_index < rparen_index - 2 and line[lparen_index + 1] == '"'
	and line[rparen_index - 1] == '"'):
	lparen_index += 1
	rparen_index -= 1
	return line[lparen_index + 1:rparen_index]