tools/cygprofile/patch_orderfile.py - chromium/src - Git at Google

 #!/usr/bin/python
 # Copyright 2013 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 """Patch an orderfile.

 Starting with a list of symbols in a binary and an orderfile (ordered list of
 symbols), matches the symbols in the orderfile and augments each symbol with the
 symbols residing at the same address (due to having identical code).

 Note: It is possible to have.
 - Several symbols mapping to the same offset in the binary.
 - Several offsets for a given symbol (because we strip the ".clone." suffix)

 TODO(lizeb): Since the suffix ".clone." is only used with -O3 that we don't
 currently use, simplify the logic by removing the suffix handling.

 The general pipeline is:
 1. Get the symbol infos (offset, length, name) from the binary
 2. Get the symbol names from the orderfile
 3. Find the orderfile symbol names in the symbols coming from the binary
 4. For each symbol found, get all the symbols at the same address
 5. Output them to an updated orderfile, with several different prefixes
 """

 import collections
 import logging
 import subprocess
 import sys

 # Prefixes for the symbols. We strip them from the incoming symbols, and add
 # them back in the output file.
 _PREFIXES = ('.text.startup.', '.text.hot.', '.text.unlikely.', '.text.')


 SymbolInfo = collections.namedtuple('SymbolInfo', ['offset', 'size', 'name'])


 def _RemoveClone(name):
   """Return name up to the ".clone." marker."""
   clone_index = name.find('.clone.')
   if clone_index != -1:
     return name[:clone_index]
   return name


 def _GetSymbolInfosFromStream(nm_lines):
   """Parses the output of nm, and get all the symbols from a binary.

   Args:
     nm_lines: An iterable of lines

   Returns:
     The same output as GetSymbolsFromBinary.
   """
   # TODO(lizeb): Consider switching to objdump to simplify parsing.
   symbol_infos = []
   for line in nm_lines:
     # We are interested in two types of lines:
     # This:
     # 00210d59 00000002 t _ZN34BrowserPluginHostMsg_Attach_ParamsD2Ev
     # offset size <symbol_type> symbol_name
     # And that:
     # 0070ee8c T WebRtcSpl_ComplexBitReverse
     # In the second case we don't have a size, so use -1 as a sentinel
     parts = line.split()
     if len(parts) == 4:
       symbol_infos.append(SymbolInfo(
           offset=int(parts[0], 16), size=int(parts[1], 16), name=parts[3]))
     elif len(parts) == 3:
       symbol_infos.append(SymbolInfo(
           offset=int(parts[0], 16), size=-1, name=parts[2]))
   # Map the addresses to symbols.
   offset_to_symbol_infos = collections.defaultdict(list)
   name_to_symbol_infos = collections.defaultdict(list)
   for symbol in symbol_infos:
     symbol = SymbolInfo(symbol[0], symbol[1], _RemoveClone(symbol[2]))
     offset_to_symbol_infos[symbol.offset].append(symbol)
     name_to_symbol_infos[symbol.name].append(symbol)
   return (offset_to_symbol_infos, name_to_symbol_infos)


 def _GetSymbolInfosFromBinary(binary_filename):
   """Runs nm to get all the symbols from a binary.

   Args:
     binary_filename: path to the binary.

   Returns:
     A tuple of collection.defaultdict:
     (offset_to_symbol_infos, name_to_symbol_infos):
     - offset_to_symbol_infos: {offset: [symbol_info1, ...]}
     - name_to_symbol_infos: {name: [symbol_info1, ...]}
   """
   command = 'nm -S -n %s | egrep "( t )|( W )|( T )"' % binary_filename
   p = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
   try:
     result = _GetSymbolInfosFromStream(p.stdout)
     return result
   finally:
     p.wait()


 def _StripPrefix(line):
   """Get the symbol from a line with a linker section name.

   Args:
     line: a line from an orderfile, usually in the form:
           .text.SymbolName

   Returns:
     The symbol, SymbolName in the example above.
   """
   line = line.rstrip('\n')
   for prefix in _PREFIXES:
     if line.startswith(prefix):
       return line[len(prefix):]
   return line  # Unprefixed case


 def _GetSymbolsFromStream(lines):
   """Get the symbols from an iterable of lines.
      Filters out wildcards and lines which do not correspond to symbols.

   Args:
     lines: iterable of lines from an orderfile.

   Returns:
     Same as GetSymbolsFromOrderfile
   """
   # TODO(lizeb): Retain the prefixes later in the processing stages.
   symbols = []
   unique_symbols = set()
   for line in lines:
     line = _StripPrefix(line)
     name = _RemoveClone(line)
     if name == '' or name == '*' or name == '.text':
       continue
     if not line in unique_symbols:
       symbols.append(line)
       unique_symbols.add(line)
   return symbols


 def _GetSymbolsFromOrderfile(filename):
   """Return the symbols from an orderfile.

   Args:
     filename: The name of the orderfile.

   Returns:
     A list of symbol names.
   """
   with open(filename, 'r') as f:
     return _GetSymbolsFromStream(f.xreadlines())

 def _SymbolsWithSameOffset(profiled_symbol, name_to_symbol_info,
                            offset_to_symbol_info):
   """Expand a profiled symbol to include all symbols which share an offset
      with that symbol.
   Args:
     profiled_symbol: the string symbol name to be expanded.
     name_to_symbol_info: {name: [symbol_info1], ...}, as returned by
         GetSymbolInfosFromBinary
     offset_to_symbol_info: {offset: [symbol_info1, ...], ...}

   Returns:
     A list of symbol names, or an empty list if profiled_symbol was not in
     name_to_symbol_info.
   """
   if not profiled_symbol in name_to_symbol_info:
     return []
   symbol_infos = name_to_symbol_info[profiled_symbol]
   expanded = []
   for symbol_info in symbol_infos:
     expanded += (s.name for s in offset_to_symbol_info[symbol_info.offset])
   return expanded

 def _ExpandSymbols(profiled_symbols, name_to_symbol_infos,
                    offset_to_symbol_infos):
   """Expand all of the symbols in profiled_symbols to include any symbols which
      share the same address.

   Args:
     profiled_symbols: Symbols to match
     name_to_symbol_infos: {name: [symbol_info1], ...}, as returned by
         GetSymbolInfosFromBinary
     offset_to_symbol_infos: {offset: [symbol_info1, ...], ...}

   Returns:
     A list of the symbol names.
   """
   found_symbols = 0
   missing_symbols = []
   all_symbols = []
   for name in profiled_symbols:
     expansion = _SymbolsWithSameOffset(name,
         name_to_symbol_infos, offset_to_symbol_infos)
     if expansion:
       found_symbols += 1
       all_symbols += expansion
     else:
       all_symbols.append(name)
       missing_symbols.append(name)
   logging.info('symbols found: %d\n' % found_symbols)
   if missing_symbols > 0:
     logging.warning('%d missing symbols.' % len(missing_symbols))
     missing_symbols_to_show = min(100, len(missing_symbols))
     logging.warning('First %d missing symbols:\n%s' % (
         missing_symbols_to_show,
         '\n'.join(missing_symbols[:missing_symbols_to_show])))
   return all_symbols


 def _PrintSymbolsWithPrefixes(symbol_names, output_file):
   """For each symbol, outputs it to output_file with the prefixes."""
   unique_outputs = set()
   for name in symbol_names:
     for prefix in _PREFIXES:
       linker_section = prefix + name
       if not linker_section in unique_outputs:
         output_file.write(linker_section + '\n')
         unique_outputs.add(linker_section)


 def main(argv):
   if len(argv) != 3:
     print 'Usage: %s <unpatched_orderfile> <libchrome.so>' % argv[0]
     return 1
   orderfile_filename = argv[1]
   binary_filename = argv[2]
   (offset_to_symbol_infos, name_to_symbol_infos) = _GetSymbolInfosFromBinary(
       binary_filename)
   profiled_symbols = _GetSymbolsFromOrderfile(orderfile_filename)
   expanded_symbols = _ExpandSymbols(
       profiled_symbols, name_to_symbol_infos, offset_to_symbol_infos)
   _PrintSymbolsWithPrefixes(expanded_symbols, sys.stdout)
   # The following is needed otherwise Gold only applies a partial sort.
   print '.text'    # gets methods not in a section, such as assembly
   print '.text.*'  # gets everything else
   return 0


 if __name__ == '__main__':
   logging.basicConfig(level=logging.INFO)
   sys.exit(main(sys.argv))
	#!/usr/bin/python
	# Copyright 2013 The Chromium Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	"""Patch an orderfile.

	Starting with a list of symbols in a binary and an orderfile (ordered list of
	symbols), matches the symbols in the orderfile and augments each symbol with the
	symbols residing at the same address (due to having identical code).

	Note: It is possible to have.
	- Several symbols mapping to the same offset in the binary.
	- Several offsets for a given symbol (because we strip the ".clone." suffix)

	TODO(lizeb): Since the suffix ".clone." is only used with -O3 that we don't
	currently use, simplify the logic by removing the suffix handling.

	The general pipeline is:
	1. Get the symbol infos (offset, length, name) from the binary
	2. Get the symbol names from the orderfile
	3. Find the orderfile symbol names in the symbols coming from the binary
	4. For each symbol found, get all the symbols at the same address
	5. Output them to an updated orderfile, with several different prefixes
	"""

	import collections
	import logging
	import subprocess
	import sys

	# Prefixes for the symbols. We strip them from the incoming symbols, and add
	# them back in the output file.
	_PREFIXES = ('.text.startup.', '.text.hot.', '.text.unlikely.', '.text.')


	SymbolInfo = collections.namedtuple('SymbolInfo', ['offset', 'size', 'name'])


	def _RemoveClone(name):
	"""Return name up to the ".clone." marker."""
	clone_index = name.find('.clone.')
	if clone_index != -1:
	return name[:clone_index]
	return name


	def _GetSymbolInfosFromStream(nm_lines):
	"""Parses the output of nm, and get all the symbols from a binary.

	Args:
	nm_lines: An iterable of lines

	Returns:
	The same output as GetSymbolsFromBinary.
	"""
	# TODO(lizeb): Consider switching to objdump to simplify parsing.
	symbol_infos = []
	for line in nm_lines:
	# We are interested in two types of lines:
	# This:
	# 00210d59 00000002 t _ZN34BrowserPluginHostMsg_Attach_ParamsD2Ev
	# offset size <symbol_type> symbol_name
	# And that:
	# 0070ee8c T WebRtcSpl_ComplexBitReverse
	# In the second case we don't have a size, so use -1 as a sentinel
	parts = line.split()
	if len(parts) == 4:
	symbol_infos.append(SymbolInfo(
	offset=int(parts[0], 16), size=int(parts[1], 16), name=parts[3]))
	elif len(parts) == 3:
	symbol_infos.append(SymbolInfo(
	offset=int(parts[0], 16), size=-1, name=parts[2]))
	# Map the addresses to symbols.
	offset_to_symbol_infos = collections.defaultdict(list)
	name_to_symbol_infos = collections.defaultdict(list)
	for symbol in symbol_infos:
	symbol = SymbolInfo(symbol[0], symbol[1], _RemoveClone(symbol[2]))
	offset_to_symbol_infos[symbol.offset].append(symbol)
	name_to_symbol_infos[symbol.name].append(symbol)
	return (offset_to_symbol_infos, name_to_symbol_infos)


	def _GetSymbolInfosFromBinary(binary_filename):
	"""Runs nm to get all the symbols from a binary.

	Args:
	binary_filename: path to the binary.

	Returns:
	A tuple of collection.defaultdict:
	(offset_to_symbol_infos, name_to_symbol_infos):
	- offset_to_symbol_infos: {offset: [symbol_info1, ...]}
	- name_to_symbol_infos: {name: [symbol_info1, ...]}
	"""
	command = 'nm -S -n %s \| egrep "( t )\|( W )\|( T )"' % binary_filename
	p = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
	try:
	result = _GetSymbolInfosFromStream(p.stdout)
	return result
	finally:
	p.wait()


	def _StripPrefix(line):
	"""Get the symbol from a line with a linker section name.

	Args:
	line: a line from an orderfile, usually in the form:
	.text.SymbolName

	Returns:
	The symbol, SymbolName in the example above.
	"""
	line = line.rstrip('\n')
	for prefix in _PREFIXES:
	if line.startswith(prefix):
	return line[len(prefix):]
	return line # Unprefixed case


	def _GetSymbolsFromStream(lines):
	"""Get the symbols from an iterable of lines.
	Filters out wildcards and lines which do not correspond to symbols.

	Args:
	lines: iterable of lines from an orderfile.

	Returns:
	Same as GetSymbolsFromOrderfile
	"""
	# TODO(lizeb): Retain the prefixes later in the processing stages.
	symbols = []
	unique_symbols = set()
	for line in lines:
	line = _StripPrefix(line)
	name = _RemoveClone(line)
	if name == '' or name == '*' or name == '.text':
	continue
	if not line in unique_symbols:
	symbols.append(line)
	unique_symbols.add(line)
	return symbols


	def _GetSymbolsFromOrderfile(filename):
	"""Return the symbols from an orderfile.

	Args:
	filename: The name of the orderfile.

	Returns:
	A list of symbol names.
	"""
	with open(filename, 'r') as f:
	return _GetSymbolsFromStream(f.xreadlines())

	def _SymbolsWithSameOffset(profiled_symbol, name_to_symbol_info,
	offset_to_symbol_info):
	"""Expand a profiled symbol to include all symbols which share an offset
	with that symbol.
	Args:
	profiled_symbol: the string symbol name to be expanded.
	name_to_symbol_info: {name: [symbol_info1], ...}, as returned by
	GetSymbolInfosFromBinary
	offset_to_symbol_info: {offset: [symbol_info1, ...], ...}

	Returns:
	A list of symbol names, or an empty list if profiled_symbol was not in
	name_to_symbol_info.
	"""
	if not profiled_symbol in name_to_symbol_info:
	return []
	symbol_infos = name_to_symbol_info[profiled_symbol]
	expanded = []
	for symbol_info in symbol_infos:
	expanded += (s.name for s in offset_to_symbol_info[symbol_info.offset])
	return expanded

	def _ExpandSymbols(profiled_symbols, name_to_symbol_infos,
	offset_to_symbol_infos):
	"""Expand all of the symbols in profiled_symbols to include any symbols which
	share the same address.

	Args:
	profiled_symbols: Symbols to match
	name_to_symbol_infos: {name: [symbol_info1], ...}, as returned by
	GetSymbolInfosFromBinary
	offset_to_symbol_infos: {offset: [symbol_info1, ...], ...}

	Returns:
	A list of the symbol names.
	"""
	found_symbols = 0
	missing_symbols = []
	all_symbols = []
	for name in profiled_symbols:
	expansion = _SymbolsWithSameOffset(name,
	name_to_symbol_infos, offset_to_symbol_infos)
	if expansion:
	found_symbols += 1
	all_symbols += expansion
	else:
	all_symbols.append(name)
	missing_symbols.append(name)
	logging.info('symbols found: %d\n' % found_symbols)
	if missing_symbols > 0:
	logging.warning('%d missing symbols.' % len(missing_symbols))
	missing_symbols_to_show = min(100, len(missing_symbols))
	logging.warning('First %d missing symbols:\n%s' % (
	missing_symbols_to_show,
	'\n'.join(missing_symbols[:missing_symbols_to_show])))
	return all_symbols


	def _PrintSymbolsWithPrefixes(symbol_names, output_file):
	"""For each symbol, outputs it to output_file with the prefixes."""
	unique_outputs = set()
	for name in symbol_names:
	for prefix in _PREFIXES:
	linker_section = prefix + name
	if not linker_section in unique_outputs:
	output_file.write(linker_section + '\n')
	unique_outputs.add(linker_section)


	def main(argv):
	if len(argv) != 3:
	print 'Usage: %s <unpatched_orderfile> <libchrome.so>' % argv[0]
	return 1
	orderfile_filename = argv[1]
	binary_filename = argv[2]
	(offset_to_symbol_infos, name_to_symbol_infos) = _GetSymbolInfosFromBinary(
	binary_filename)
	profiled_symbols = _GetSymbolsFromOrderfile(orderfile_filename)
	expanded_symbols = _ExpandSymbols(
	profiled_symbols, name_to_symbol_infos, offset_to_symbol_infos)
	_PrintSymbolsWithPrefixes(expanded_symbols, sys.stdout)
	# The following is needed otherwise Gold only applies a partial sort.
	print '.text' # gets methods not in a section, such as assembly
	print '.text.*' # gets everything else
	return 0


	if __name__ == '__main__':
	logging.basicConfig(level=logging.INFO)
	sys.exit(main(sys.argv))