tools/find_runtime_symbols/static_symbols.py - chromium/src - Git at Google

 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 import bisect
 import re


 _ARGUMENT_TYPE_PATTERN = re.compile('\([^()]*\)(\s*const)?')
 _TEMPLATE_ARGUMENT_PATTERN = re.compile('<[^<>]*>')
 _LEADING_TYPE_PATTERN = re.compile('^.*\s+(\w+::)')
 _READELF_SECTION_HEADER_PATTER = re.compile(
     '^\s*\[\s*(Nr|\d+)\]\s+(|\S+)\s+([A-Z_]+)\s+([0-9a-f]+)\s+'
     '([0-9a-f]+)\s+([0-9a-f]+)\s+([0-9]+)\s+([WAXMSILGxOop]*)\s+'
     '([0-9]+)\s+([0-9]+)\s+([0-9]+)')


 class ParsingException(Exception):
   def __str__(self):
     return repr(self.args[0])


 class AddressMapping(object):
   def __init__(self):
     self._symbol_map = {}

   def append(self, start, entry):
     self._symbol_map[start] = entry

   def find(self, address):
     return self._symbol_map.get(address)


 class RangeAddressMapping(AddressMapping):
   def __init__(self):
     super(RangeAddressMapping, self).__init__()
     self._sorted_start_list = []
     self._is_sorted = True

   def append(self, start, entry):
     if self._sorted_start_list:
       if self._sorted_start_list[-1] > start:
         self._is_sorted = False
       elif self._sorted_start_list[-1] == start:
         return
     self._sorted_start_list.append(start)
     self._symbol_map[start] = entry

   def find(self, address):
     if not self._sorted_start_list:
       return None
     if not self._is_sorted:
       self._sorted_start_list.sort()
       self._is_sorted = True
     found_index = bisect.bisect_left(self._sorted_start_list, address)
     found_start_address = self._sorted_start_list[found_index - 1]
     return self._symbol_map[found_start_address]


 class Procedure(object):
   """A class for a procedure symbol and an address range for the symbol."""

   def __init__(self, start, end, name):
     self.start = start
     self.end = end
     self.name = name

   def __eq__(self, other):
     return (self.start == other.start and
             self.end == other.end and
             self.name == other.name)

   def __ne__(self, other):
     return not self.__eq__(other)

   def __str__(self):
     return '%x-%x: %s' % (self.start, self.end, self.name)


 class ElfSection(object):
   """A class for an elf section header."""

   def __init__(
       self, number, name, stype, address, offset, size, es, flg, lk, inf, al):
     self.number = number
     self.name = name
     self.stype = stype
     self.address = address
     self.offset = offset
     self.size = size
     self.es = es
     self.flg = flg
     self.lk = lk
     self.inf = inf
     self.al = al

   def __eq__(self, other):
     return (self.number == other.number and
             self.name == other.name and
             self.stype == other.stype and
             self.address == other.address and
             self.offset == other.offset and
             self.size == other.size and
             self.es == other.es and
             self.flg == other.flg and
             self.lk == other.lk and
             self.inf == other.inf and
             self.al == other.al)

   def __ne__(self, other):
     return not self.__eq__(other)

   def __str__(self):
     return '%x+%x(%x) %s' % (self.address, self.size, self.offset, self.name)


 class StaticSymbolsInFile(object):
   """Represents static symbol information in a binary file."""

   def __init__(self, my_name):
     self.my_name = my_name
     self._elf_sections = []
     self._procedures = RangeAddressMapping()
     self._sourcefiles = RangeAddressMapping()
     self._typeinfos = AddressMapping()

   def _append_elf_section(self, elf_section):
     self._elf_sections.append(elf_section)

   def _append_procedure(self, start, procedure):
     self._procedures.append(start, procedure)

   def _append_sourcefile(self, start, sourcefile):
     self._sourcefiles.append(start, sourcefile)

   def _append_typeinfo(self, start, typeinfo):
     self._typeinfos.append(start, typeinfo)

   def _find_symbol_by_runtime_address(self, address, vma, target):
     if not (vma.begin <= address < vma.end):
       return None

     if vma.name != self.my_name:
       return None

     file_offset = address - (vma.begin - vma.offset)
     elf_address = None
     for section in self._elf_sections:
       if section.offset <= file_offset < (section.offset + section.size):
         elf_address = section.address + file_offset - section.offset
     if not elf_address:
       return None

     return target.find(elf_address)

   def find_procedure_by_runtime_address(self, address, vma):
     return self._find_symbol_by_runtime_address(address, vma, self._procedures)

   def find_sourcefile_by_runtime_address(self, address, vma):
     return self._find_symbol_by_runtime_address(address, vma, self._sourcefiles)

   def find_typeinfo_by_runtime_address(self, address, vma):
     return self._find_symbol_by_runtime_address(address, vma, self._typeinfos)

   def load_readelf_ew(self, f):
     found_header = False
     for line in f:
       if line.rstrip() == 'Section Headers:':
         found_header = True
         break
     if not found_header:
       return None

     for line in f:
       line = line.rstrip()
       matched = _READELF_SECTION_HEADER_PATTER.match(line)
       if matched:
         self._append_elf_section(ElfSection(
             int(matched.group(1), 10), # number
             matched.group(2), # name
             matched.group(3), # stype
             int(matched.group(4), 16), # address
             int(matched.group(5), 16), # offset
             int(matched.group(6), 16), # size
             matched.group(7), # es
             matched.group(8), # flg
             matched.group(9), # lk
             matched.group(10), # inf
             matched.group(11) # al
             ))
       else:
         if line in ('Key to Flags:', 'Program Headers:'):
           break

   def load_readelf_debug_decodedline_file(self, input_file):
     for line in input_file:
       splitted = line.rstrip().split(None, 2)
       self._append_sourcefile(int(splitted[0], 16), splitted[1])

   @staticmethod
   def _parse_nm_bsd_line(line):
     if line[8] == ' ':
       return line[0:8], line[9], line[11:]
     elif line[16] == ' ':
       return line[0:16], line[17], line[19:]
     raise ParsingException('Invalid nm output.')

   @staticmethod
   def _get_short_function_name(function):
     while True:
       function, number = _ARGUMENT_TYPE_PATTERN.subn('', function)
       if not number:
         break
     while True:
       function, number = _TEMPLATE_ARGUMENT_PATTERN.subn('', function)
       if not number:
         break
     return _LEADING_TYPE_PATTERN.sub('\g<1>', function)

   def load_nm_bsd(self, f, mangled=False):
     last_start = 0
     routine = ''

     for line in f:
       line = line.rstrip()
       sym_value, sym_type, sym_name = self._parse_nm_bsd_line(line)

       if sym_value[0] == ' ':
         continue

       start_val = int(sym_value, 16)

       if (sym_type in ('r', 'R', 'D', 'U', 'd', 'V') and
           (not mangled and sym_name.startswith('typeinfo'))):
         self._append_typeinfo(start_val, sym_name)

       # It's possible for two symbols to share the same address, if
       # one is a zero-length variable (like __start_google_malloc) or
       # one symbol is a weak alias to another (like __libc_malloc).
       # In such cases, we want to ignore all values except for the
       # actual symbol, which in nm-speak has type "T".  The logic
       # below does this, though it's a bit tricky: what happens when
       # we have a series of lines with the same address, is the first
       # one gets queued up to be processed.  However, it won't
       # *actually* be processed until later, when we read a line with
       # a different address.  That means that as long as we're reading
       # lines with the same address, we have a chance to replace that
       # item in the queue, which we do whenever we see a 'T' entry --
       # that is, a line with type 'T'.  If we never see a 'T' entry,
       # we'll just go ahead and process the first entry (which never
       # got touched in the queue), and ignore the others.
       if start_val == last_start and (sym_type == 't' or sym_type == 'T'):
         # We are the 'T' symbol at this address, replace previous symbol.
         routine = sym_name
         continue
       elif start_val == last_start:
         # We're not the 'T' symbol at this address, so ignore us.
         continue

       # Tag this routine with the starting address in case the image
       # has multiple occurrences of this routine.  We use a syntax
       # that resembles template paramters that are automatically
       # stripped out by ShortFunctionName()
       sym_name += "<%016x>" % start_val

       if not mangled:
         routine = self._get_short_function_name(routine)
       self._append_procedure(
           last_start, Procedure(last_start, start_val, routine))

       last_start = start_val
       routine = sym_name

     if not mangled:
       routine = self._get_short_function_name(routine)
     self._append_procedure(
         last_start, Procedure(last_start, last_start, routine))
	# Copyright (c) 2012 The Chromium Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	import bisect
	import re


	_ARGUMENT_TYPE_PATTERN = re.compile('\([^()]\)(\sconst)?')
	_TEMPLATE_ARGUMENT_PATTERN = re.compile('<[^<>]*>')
	_LEADING_TYPE_PATTERN = re.compile('^.*\s+(\w+::)')
	_READELF_SECTION_HEADER_PATTER = re.compile(
	'^\s\[\s(Nr\|\d+)\]\s+(\|\S+)\s+([A-Z_]+)\s+([0-9a-f]+)\s+'
	'([0-9a-f]+)\s+([0-9a-f]+)\s+([0-9]+)\s+([WAXMSILGxOop]*)\s+'
	'([0-9]+)\s+([0-9]+)\s+([0-9]+)')


	class ParsingException(Exception):
	def __str__(self):
	return repr(self.args[0])


	class AddressMapping(object):
	def __init__(self):
	self._symbol_map = {}

	def append(self, start, entry):
	self._symbol_map[start] = entry

	def find(self, address):
	return self._symbol_map.get(address)


	class RangeAddressMapping(AddressMapping):
	def __init__(self):
	super(RangeAddressMapping, self).__init__()
	self._sorted_start_list = []
	self._is_sorted = True

	def append(self, start, entry):
	if self._sorted_start_list:
	if self._sorted_start_list[-1] > start:
	self._is_sorted = False
	elif self._sorted_start_list[-1] == start:
	return
	self._sorted_start_list.append(start)
	self._symbol_map[start] = entry

	def find(self, address):
	if not self._sorted_start_list:
	return None
	if not self._is_sorted:
	self._sorted_start_list.sort()
	self._is_sorted = True
	found_index = bisect.bisect_left(self._sorted_start_list, address)
	found_start_address = self._sorted_start_list[found_index - 1]
	return self._symbol_map[found_start_address]


	class Procedure(object):
	"""A class for a procedure symbol and an address range for the symbol."""

	def __init__(self, start, end, name):
	self.start = start
	self.end = end
	self.name = name

	def __eq__(self, other):
	return (self.start == other.start and
	self.end == other.end and
	self.name == other.name)

	def __ne__(self, other):
	return not self.__eq__(other)

	def __str__(self):
	return '%x-%x: %s' % (self.start, self.end, self.name)


	class ElfSection(object):
	"""A class for an elf section header."""

	def __init__(
	self, number, name, stype, address, offset, size, es, flg, lk, inf, al):
	self.number = number
	self.name = name
	self.stype = stype
	self.address = address
	self.offset = offset
	self.size = size
	self.es = es
	self.flg = flg
	self.lk = lk
	self.inf = inf
	self.al = al

	def __eq__(self, other):
	return (self.number == other.number and
	self.name == other.name and
	self.stype == other.stype and
	self.address == other.address and
	self.offset == other.offset and
	self.size == other.size and
	self.es == other.es and
	self.flg == other.flg and
	self.lk == other.lk and
	self.inf == other.inf and
	self.al == other.al)

	def __ne__(self, other):
	return not self.__eq__(other)

	def __str__(self):
	return '%x+%x(%x) %s' % (self.address, self.size, self.offset, self.name)


	class StaticSymbolsInFile(object):
	"""Represents static symbol information in a binary file."""

	def __init__(self, my_name):
	self.my_name = my_name
	self._elf_sections = []
	self._procedures = RangeAddressMapping()
	self._sourcefiles = RangeAddressMapping()
	self._typeinfos = AddressMapping()

	def _append_elf_section(self, elf_section):
	self._elf_sections.append(elf_section)

	def _append_procedure(self, start, procedure):
	self._procedures.append(start, procedure)

	def _append_sourcefile(self, start, sourcefile):
	self._sourcefiles.append(start, sourcefile)

	def _append_typeinfo(self, start, typeinfo):
	self._typeinfos.append(start, typeinfo)

	def _find_symbol_by_runtime_address(self, address, vma, target):
	if not (vma.begin <= address < vma.end):
	return None

	if vma.name != self.my_name:
	return None

	file_offset = address - (vma.begin - vma.offset)
	elf_address = None
	for section in self._elf_sections:
	if section.offset <= file_offset < (section.offset + section.size):
	elf_address = section.address + file_offset - section.offset
	if not elf_address:
	return None

	return target.find(elf_address)

	def find_procedure_by_runtime_address(self, address, vma):
	return self._find_symbol_by_runtime_address(address, vma, self._procedures)

	def find_sourcefile_by_runtime_address(self, address, vma):
	return self._find_symbol_by_runtime_address(address, vma, self._sourcefiles)

	def find_typeinfo_by_runtime_address(self, address, vma):
	return self._find_symbol_by_runtime_address(address, vma, self._typeinfos)

	def load_readelf_ew(self, f):
	found_header = False
	for line in f:
	if line.rstrip() == 'Section Headers:':
	found_header = True
	break
	if not found_header:
	return None

	for line in f:
	line = line.rstrip()
	matched = _READELF_SECTION_HEADER_PATTER.match(line)
	if matched:
	self._append_elf_section(ElfSection(
	int(matched.group(1), 10), # number
	matched.group(2), # name
	matched.group(3), # stype
	int(matched.group(4), 16), # address
	int(matched.group(5), 16), # offset
	int(matched.group(6), 16), # size
	matched.group(7), # es
	matched.group(8), # flg
	matched.group(9), # lk
	matched.group(10), # inf
	matched.group(11) # al
	))
	else:
	if line in ('Key to Flags:', 'Program Headers:'):
	break

	def load_readelf_debug_decodedline_file(self, input_file):
	for line in input_file:
	splitted = line.rstrip().split(None, 2)
	self._append_sourcefile(int(splitted[0], 16), splitted[1])

	@staticmethod
	def _parse_nm_bsd_line(line):
	if line[8] == ' ':
	return line[0:8], line[9], line[11:]
	elif line[16] == ' ':
	return line[0:16], line[17], line[19:]
	raise ParsingException('Invalid nm output.')

	@staticmethod
	def _get_short_function_name(function):
	while True:
	function, number = _ARGUMENT_TYPE_PATTERN.subn('', function)
	if not number:
	break
	while True:
	function, number = _TEMPLATE_ARGUMENT_PATTERN.subn('', function)
	if not number:
	break
	return _LEADING_TYPE_PATTERN.sub('\g<1>', function)

	def load_nm_bsd(self, f, mangled=False):
	last_start = 0
	routine = ''

	for line in f:
	line = line.rstrip()
	sym_value, sym_type, sym_name = self._parse_nm_bsd_line(line)

	if sym_value[0] == ' ':
	continue

	start_val = int(sym_value, 16)

	if (sym_type in ('r', 'R', 'D', 'U', 'd', 'V') and
	(not mangled and sym_name.startswith('typeinfo'))):
	self._append_typeinfo(start_val, sym_name)

	# It's possible for two symbols to share the same address, if
	# one is a zero-length variable (like __start_google_malloc) or
	# one symbol is a weak alias to another (like __libc_malloc).
	# In such cases, we want to ignore all values except for the
	# actual symbol, which in nm-speak has type "T". The logic
	# below does this, though it's a bit tricky: what happens when
	# we have a series of lines with the same address, is the first
	# one gets queued up to be processed. However, it won't
	# actually be processed until later, when we read a line with
	# a different address. That means that as long as we're reading
	# lines with the same address, we have a chance to replace that
	# item in the queue, which we do whenever we see a 'T' entry --
	# that is, a line with type 'T'. If we never see a 'T' entry,
	# we'll just go ahead and process the first entry (which never
	# got touched in the queue), and ignore the others.
	if start_val == last_start and (sym_type == 't' or sym_type == 'T'):
	# We are the 'T' symbol at this address, replace previous symbol.
	routine = sym_name
	continue
	elif start_val == last_start:
	# We're not the 'T' symbol at this address, so ignore us.
	continue

	# Tag this routine with the starting address in case the image
	# has multiple occurrences of this routine. We use a syntax
	# that resembles template paramters that are automatically
	# stripped out by ShortFunctionName()
	sym_name += "<%016x>" % start_val

	if not mangled:
	routine = self._get_short_function_name(routine)
	self._append_procedure(
	last_start, Procedure(last_start, start_val, routine))

	last_start = start_val
	routine = sym_name

	if not mangled:
	routine = self._get_short_function_name(routine)
	self._append_procedure(
	last_start, Procedure(last_start, last_start, routine))