tools/binary_size/libsupersize/linker_map_parser.py - chromium/src - Git at Google

 # Copyright 2017 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 import logging

 import models

 # About linker maps:
 # * "Discarded input sections" include symbols merged with other symbols
 #   (aliases), so the information there is not actually a list of unused things.
 # * Linker maps include symbols that do not have names (with object path),
 #   whereas "nm" skips over these (they don't account for much though).
 # * The parse time for compressed linker maps is dominated by ungzipping.


 class MapFileParser(object):
   """Parses a linker map file (tested only on files from gold linker)."""
   # Map file writer for gold linker:
   # https://github.com/gittup/binutils/blob/HEAD/gold/mapfile.cc

   def __init__(self):
     self._common_symbols = []
     self._symbols = []
     self._section_sizes = {}
     self._lines = None

   def Parse(self, lines):
     """Parses a linker map file.

     Args:
       lines: Iterable of lines.

     Returns:
       A tuple of (section_sizes, symbols).
     """
     self._lines = iter(lines)
     logging.debug('Scanning for Header')

     while True:
       line = self._SkipToLineWithPrefix('Common symbol', 'Memory map')
       if line.startswith('Common symbol'):
         self._common_symbols = self._ParseCommonSymbols()
         logging.debug('.bss common entries: %d', len(self._common_symbols))
         continue
       elif line.startswith('Memory map'):
         self._ParseSections()
       break
     return self._section_sizes, self._symbols

   def _SkipToLineWithPrefix(self, prefix, prefix2=None):
     for l in self._lines:
       if l.startswith(prefix) or (prefix2 and l.startswith(prefix2)):
         return l

   def _ParsePossiblyWrappedParts(self, line, count):
     parts = line.split(None, count - 1)
     if not parts:
       return None
     if len(parts) != count:
       line = next(self._lines)
       parts.extend(line.split(None, count - len(parts) - 1))
       assert len(parts) == count, 'parts: ' + ' '.join(parts)
     parts[-1] = parts[-1].rstrip()
     return parts

   def _ParseCommonSymbols(self):
 # Common symbol       size              file
 #
 # ff_cos_131072       0x40000           obj/third_party/<snip>
 # ff_cos_131072_fixed
 #                     0x20000           obj/third_party/<snip>
     ret = []
     next(self._lines)  # Skip past blank line

     name, size_str, path = None, None, None
     for l in self._lines:
       parts = self._ParsePossiblyWrappedParts(l, 3)
       if not parts:
         break
       name, size_str, path = parts
       sym = models.Symbol('.bss',  int(size_str[2:], 16), full_name=name,
                           object_path=path)
       ret.append(sym)
     return ret

   def _ParseSections(self):
 # .text           0x0028c600  0x22d3468
 #  .text.startup._GLOBAL__sub_I_bbr_sender.cc
 #                 0x0028c600       0x38 obj/net/net/bbr_sender.o
 #  .text._reset   0x00339d00       0xf0 obj/third_party/icu/icuuc/ucnv.o
 #  ** fill        0x0255fb00   0x02
 #  .text._ZN4base8AutoLockD2Ev
 #                 0x00290710        0xe obj/net/net/file_name.o
 #                 0x00290711                base::AutoLock::~AutoLock()
 #                 0x00290711                base::AutoLock::~AutoLock()
 # .text._ZNK5blink15LayoutBlockFlow31mustSeparateMarginAfterForChildERK...
 #                0xffffffffffffffff       0x46 obj/...
 #                0x006808e1                blink::LayoutBlockFlow::...
 # .bss
 #  .bss._ZGVZN11GrProcessor11initClassIDI10LightingFPEEvvE8kClassID
 #                0x02d4b294        0x4 obj/skia/skia/SkLightingShader.o
 #                0x02d4b294   guard variable for void GrProcessor::initClassID
 # .data           0x0028c600  0x22d3468
 #  .data.rel.ro._ZTVN3gvr7android19ScopedJavaGlobalRefIP12_jfloatArrayEE
 #                0x02d1e668       0x10 ../../third_party/.../libfoo.a(bar.o)
 #                0x02d1e668   vtable for gvr::android::GlobalRef<_jfloatArray*>
 #  ** merge strings
 #                 0x0255fb00   0x1f2424
 #  ** merge constants
 #                 0x0255fb00   0x8
 # ** common      0x02db5700   0x13ab48
     syms = self._symbols
     symbol_gap_count = 0
     while True:
       line = self._SkipToLineWithPrefix('.')
       if not line:
         break
       section_name = None
       try:
         # Parse section name and size.
         parts = self._ParsePossiblyWrappedParts(line, 3)
         if not parts:
           break
         section_name, section_address_str, section_size_str = parts
         section_address = int(section_address_str[2:], 16)
         section_size = int(section_size_str[2:], 16)
         self._section_sizes[section_name] = section_size
         if (section_name in ('.bss', '.rodata', '.text') or
             section_name.startswith('.data')):
           logging.info('Parsing %s', section_name)
           if section_name == '.bss':
             # Common symbols have no address.
             syms.extend(self._common_symbols)
           prefix_len = len(section_name) + 1  # + 1 for the trailing .
           merge_symbol_start_address = section_address
           sym_count_at_start = len(syms)
           line = next(self._lines)
           # Parse section symbols.
           while True:
             if not line or line.isspace():
               break
             if line.startswith(' **'):
               zero_index = line.find('0')
               if zero_index == -1:
                 # Line wraps.
                 name = line.strip()
                 line = next(self._lines)
               else:
                 # Line does not wrap.
                 name = line[:zero_index].strip()
                 line = line[zero_index:]
               address_str, size_str = self._ParsePossiblyWrappedParts(line, 2)
               line = next(self._lines)
               # These bytes are already accounted for.
               if name == '** common':
                 continue
               address = int(address_str[2:], 16)
               size = int(size_str[2:], 16)
               path = None
               sym = models.Symbol(section_name, size, address=address,
                                   full_name=name, object_path=path)
               syms.append(sym)
             else:
               # A normal symbol entry.
               subsection_name, address_str, size_str, path = (
                   self._ParsePossiblyWrappedParts(line, 4))
               size = int(size_str[2:], 16)
               assert subsection_name.startswith(section_name), (
                   'subsection name was: ' + subsection_name)
               mangled_name = subsection_name[prefix_len:]
               name = None
               address_str2 = None
               while True:
                 line = next(self._lines).rstrip()
                 if not line or line.startswith(' .'):
                   break
                 # clang includes ** fill, but gcc does not.
                 if line.startswith(' ** fill'):
                   # Alignment explicitly recorded in map file. Rather than
                   # record padding based on these entries, we calculate it
                   # using addresses. We do this because fill lines are not
                   # present when compiling with gcc (only for clang).
                   continue
                 elif line.startswith(' **'):
                   break
                 elif name is None:
                   address_str2, name = self._ParsePossiblyWrappedParts(line, 2)

               if address_str == '0xffffffffffffffff':
                 # The section needs special handling (e.g., a merge section)
                 # It also generally has a large offset after it, so don't
                 # penalize the subsequent symbol for this gap (e.g. a 50kb gap).
                 # There seems to be no corelation between where these gaps occur
                 # and the symbols they come in-between.
                 # TODO(agrieve): Learn more about why this happens.
                 if address_str2:
                   address = int(address_str2[2:], 16) - 1
                 elif syms and syms[-1].address > 0:
                   # Merge sym with no second line showing real address.
                   address = syms[-1].end_address
                 else:
                   logging.warning('First symbol of section had address -1')
                   address = 0

                 merge_symbol_start_address = address + size
               else:
                 address = int(address_str[2:], 16)
                 # Finish off active address gap / merge section.
                 if merge_symbol_start_address:
                   merge_size = address - merge_symbol_start_address
                   merge_symbol_start_address = 0
                   if merge_size > 0:
                     # merge_size == 0 for the initial symbol generally.
                     logging.debug('Merge symbol of size %d found at:\n  %r',
                                   merge_size, syms[-1])
                     # Set size=0 so that it will show up as padding.
                     sym = models.Symbol(
                         section_name, 0,
                         address=address,
                         full_name='** symbol gap %d' % symbol_gap_count)
                     symbol_gap_count += 1
                     syms.append(sym)

               sym = models.Symbol(section_name, size, address=address,
                                   full_name=name or mangled_name,
                                   object_path=path)
               syms.append(sym)
           section_end_address = section_address + section_size
           if section_name != '.bss' and (
               syms[-1].end_address < section_end_address):
             # Set size=0 so that it will show up as padding.
             sym = models.Symbol(
                 section_name, 0,
                 address=section_end_address,
                 full_name=(
                     '** symbol gap %d (end of section)' % symbol_gap_count))
             syms.append(sym)
           logging.debug('Symbol count for %s: %d', section_name,
                         len(syms) - sym_count_at_start)
       except:
         logging.error('Problem line: %r', line)
         logging.error('In section: %r', section_name)
         raise
	# Copyright 2017 The Chromium Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	import logging

	import models

	# About linker maps:
	# * "Discarded input sections" include symbols merged with other symbols
	# (aliases), so the information there is not actually a list of unused things.
	# * Linker maps include symbols that do not have names (with object path),
	# whereas "nm" skips over these (they don't account for much though).
	# * The parse time for compressed linker maps is dominated by ungzipping.


	class MapFileParser(object):
	"""Parses a linker map file (tested only on files from gold linker)."""
	# Map file writer for gold linker:
	# https://github.com/gittup/binutils/blob/HEAD/gold/mapfile.cc

	def __init__(self):
	self._common_symbols = []
	self._symbols = []
	self._section_sizes = {}
	self._lines = None

	def Parse(self, lines):
	"""Parses a linker map file.

	Args:
	lines: Iterable of lines.

	Returns:
	A tuple of (section_sizes, symbols).
	"""
	self._lines = iter(lines)
	logging.debug('Scanning for Header')

	while True:
	line = self._SkipToLineWithPrefix('Common symbol', 'Memory map')
	if line.startswith('Common symbol'):
	self._common_symbols = self._ParseCommonSymbols()
	logging.debug('.bss common entries: %d', len(self._common_symbols))
	continue
	elif line.startswith('Memory map'):
	self._ParseSections()
	break
	return self._section_sizes, self._symbols

	def _SkipToLineWithPrefix(self, prefix, prefix2=None):
	for l in self._lines:
	if l.startswith(prefix) or (prefix2 and l.startswith(prefix2)):
	return l

	def _ParsePossiblyWrappedParts(self, line, count):
	parts = line.split(None, count - 1)
	if not parts:
	return None
	if len(parts) != count:
	line = next(self._lines)
	parts.extend(line.split(None, count - len(parts) - 1))
	assert len(parts) == count, 'parts: ' + ' '.join(parts)
	parts[-1] = parts[-1].rstrip()
	return parts

	def _ParseCommonSymbols(self):
	# Common symbol size file
	#
	# ff_cos_131072 0x40000 obj/third_party/<snip>
	# ff_cos_131072_fixed
	# 0x20000 obj/third_party/<snip>
	ret = []
	next(self._lines) # Skip past blank line

	name, size_str, path = None, None, None
	for l in self._lines:
	parts = self._ParsePossiblyWrappedParts(l, 3)
	if not parts:
	break
	name, size_str, path = parts
	sym = models.Symbol('.bss', int(size_str[2:], 16), full_name=name,
	object_path=path)
	ret.append(sym)
	return ret

	def _ParseSections(self):
	# .text 0x0028c600 0x22d3468
	# .text.startup._GLOBAL__sub_I_bbr_sender.cc
	# 0x0028c600 0x38 obj/net/net/bbr_sender.o
	# .text._reset 0x00339d00 0xf0 obj/third_party/icu/icuuc/ucnv.o
	# ** fill 0x0255fb00 0x02
	# .text._ZN4base8AutoLockD2Ev
	# 0x00290710 0xe obj/net/net/file_name.o
	# 0x00290711 base::AutoLock::~AutoLock()
	# 0x00290711 base::AutoLock::~AutoLock()
	# .text._ZNK5blink15LayoutBlockFlow31mustSeparateMarginAfterForChildERK...
	# 0xffffffffffffffff 0x46 obj/...
	# 0x006808e1 blink::LayoutBlockFlow::...
	# .bss
	# .bss._ZGVZN11GrProcessor11initClassIDI10LightingFPEEvvE8kClassID
	# 0x02d4b294 0x4 obj/skia/skia/SkLightingShader.o
	# 0x02d4b294 guard variable for void GrProcessor::initClassID
	# .data 0x0028c600 0x22d3468
	# .data.rel.ro._ZTVN3gvr7android19ScopedJavaGlobalRefIP12_jfloatArrayEE
	# 0x02d1e668 0x10 ../../third_party/.../libfoo.a(bar.o)
	# 0x02d1e668 vtable for gvr::android::GlobalRef<_jfloatArray*>
	# ** merge strings
	# 0x0255fb00 0x1f2424
	# ** merge constants
	# 0x0255fb00 0x8
	# ** common 0x02db5700 0x13ab48
	syms = self._symbols
	symbol_gap_count = 0
	while True:
	line = self._SkipToLineWithPrefix('.')
	if not line:
	break
	section_name = None
	try:
	# Parse section name and size.
	parts = self._ParsePossiblyWrappedParts(line, 3)
	if not parts:
	break
	section_name, section_address_str, section_size_str = parts
	section_address = int(section_address_str[2:], 16)
	section_size = int(section_size_str[2:], 16)
	self._section_sizes[section_name] = section_size
	if (section_name in ('.bss', '.rodata', '.text') or
	section_name.startswith('.data')):
	logging.info('Parsing %s', section_name)
	if section_name == '.bss':
	# Common symbols have no address.
	syms.extend(self._common_symbols)
	prefix_len = len(section_name) + 1 # + 1 for the trailing .
	merge_symbol_start_address = section_address
	sym_count_at_start = len(syms)
	line = next(self._lines)
	# Parse section symbols.
	while True:
	if not line or line.isspace():
	break
	if line.startswith(' **'):
	zero_index = line.find('0')
	if zero_index == -1:
	# Line wraps.
	name = line.strip()
	line = next(self._lines)
	else:
	# Line does not wrap.
	name = line[:zero_index].strip()
	line = line[zero_index:]
	address_str, size_str = self._ParsePossiblyWrappedParts(line, 2)
	line = next(self._lines)
	# These bytes are already accounted for.
	if name == '** common':
	continue
	address = int(address_str[2:], 16)
	size = int(size_str[2:], 16)
	path = None
	sym = models.Symbol(section_name, size, address=address,
	full_name=name, object_path=path)
	syms.append(sym)
	else:
	# A normal symbol entry.
	subsection_name, address_str, size_str, path = (
	self._ParsePossiblyWrappedParts(line, 4))
	size = int(size_str[2:], 16)
	assert subsection_name.startswith(section_name), (
	'subsection name was: ' + subsection_name)
	mangled_name = subsection_name[prefix_len:]
	name = None
	address_str2 = None
	while True:
	line = next(self._lines).rstrip()
	if not line or line.startswith(' .'):
	break
	# clang includes ** fill, but gcc does not.
	if line.startswith(' ** fill'):
	# Alignment explicitly recorded in map file. Rather than
	# record padding based on these entries, we calculate it
	# using addresses. We do this because fill lines are not
	# present when compiling with gcc (only for clang).
	continue
	elif line.startswith(' **'):
	break
	elif name is None:
	address_str2, name = self._ParsePossiblyWrappedParts(line, 2)

	if address_str == '0xffffffffffffffff':
	# The section needs special handling (e.g., a merge section)
	# It also generally has a large offset after it, so don't
	# penalize the subsequent symbol for this gap (e.g. a 50kb gap).
	# There seems to be no corelation between where these gaps occur
	# and the symbols they come in-between.
	# TODO(agrieve): Learn more about why this happens.
	if address_str2:
	address = int(address_str2[2:], 16) - 1
	elif syms and syms[-1].address > 0:
	# Merge sym with no second line showing real address.
	address = syms[-1].end_address
	else:
	logging.warning('First symbol of section had address -1')
	address = 0

	merge_symbol_start_address = address + size
	else:
	address = int(address_str[2:], 16)
	# Finish off active address gap / merge section.
	if merge_symbol_start_address:
	merge_size = address - merge_symbol_start_address
	merge_symbol_start_address = 0
	if merge_size > 0:
	# merge_size == 0 for the initial symbol generally.
	logging.debug('Merge symbol of size %d found at:\n %r',
	merge_size, syms[-1])
	# Set size=0 so that it will show up as padding.
	sym = models.Symbol(
	section_name, 0,
	address=address,
	full_name='** symbol gap %d' % symbol_gap_count)
	symbol_gap_count += 1
	syms.append(sym)

	sym = models.Symbol(section_name, size, address=address,
	full_name=name or mangled_name,
	object_path=path)
	syms.append(sym)
	section_end_address = section_address + section_size
	if section_name != '.bss' and (
	syms[-1].end_address < section_end_address):
	# Set size=0 so that it will show up as padding.
	sym = models.Symbol(
	section_name, 0,
	address=section_end_address,
	full_name=(
	'** symbol gap %d (end of section)' % symbol_gap_count))
	syms.append(sym)
	logging.debug('Symbol count for %s: %d', section_name,
	len(syms) - sym_count_at_start)
	except:
	logging.error('Problem line: %r', line)
	logging.error('In section: %r', section_name)
	raise