blob: 9cf6e2a37089ae92c54229bebc7fc4bfda8dc73f [file] [log] [blame]
# Copyright 2017 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Logic for diffing two SizeInfo objects."""
import collections
import logging
import re
import models
_STRIP_NUMBER_SUFFIX_PATTERN = re.compile(r'[.0-9]+$')
_NORMALIZE_STAR_SYMBOLS_PATTERN = re.compile(r'\s+\d+( \(.*\))?$')
def _ExactMatchKey(s):
# Remove numbers and periods for symbols defined by macros that use __line__
# in names, or for linker symbols like ".L.ref.tmp.2".
name = _STRIP_NUMBER_SUFFIX_PATTERN.sub('', s.full_name)
# Use section rather than section_name since clang & gcc use
# .data.rel.ro vs. .data.rel.ro.local.
return s.section, name, s.object_path, s.size_without_padding
def _GoodMatchKey(symbol):
"""Returns a tuple that can be used to see if two Symbol are the same.
Keys are not guaranteed to be unique within a SymbolGroup. When multiple
symbols have the same key, they will be matched up in order of appearance.
We do this because the numbering of these generated symbols is not stable.
Examples of symbols with shared keys:
"** merge strings"
"** symbol gap 3", "** symbol gap 5 (end of section)"
"foo() [clone ##]"
"CSWTCH.61", "CSWTCH.62"
".L.ref.tmp.2", ".L.ref.tmp"
"._468", "._467"
".L__unnamed_1193", ".L__unnamed_712"
"""
if symbol.IsPak():
# full_name looks like "about_ui_resources.grdp: IDR_ABOUT_UI_CREDITS_HTML".
# name is just "IDR_ABOUT_UI_CREDITS_HTML".
name = symbol.name
else:
name = _STRIP_NUMBER_SUFFIX_PATTERN.sub('', symbol.full_name)
clone_idx = name.find(' [clone ')
if clone_idx != -1:
name = name[:clone_idx]
if name.startswith('*'):
# "symbol gap 3 (bar)" -> "symbol gaps"
name = _NORMALIZE_STAR_SYMBOLS_PATTERN.sub('s', name)
return symbol.section, symbol.object_path, name
def _PoorMatchKey(symbol):
section, _, name = _GoodMatchKey(symbol)
return section, name
def _MatchSymbols(before, after, key_func, padding_by_section_name):
logging.debug('%s: Building symbol index', key_func.__name__)
before_symbols_by_key = collections.defaultdict(list)
for s in before:
before_symbols_by_key[key_func(s)].append(s)
logging.debug('%s: Creating delta symbols', key_func.__name__)
unmatched_after = []
delta_symbols = []
for after_sym in after:
before_sym = before_symbols_by_key.get(key_func(after_sym))
if before_sym:
before_sym = before_sym.pop(0)
# Padding tracked in aggregate, except for padding-only symbols.
if before_sym.size_without_padding != 0:
padding_by_section_name[before_sym.section_name] += (
after_sym.padding_pss - before_sym.padding_pss)
delta_symbols.append(models.DeltaSymbol(before_sym, after_sym))
else:
unmatched_after.append(after_sym)
logging.debug('%s: Matched %d of %d symbols', key_func.__name__,
len(delta_symbols), len(after))
unmatched_before = []
for syms in before_symbols_by_key.itervalues():
unmatched_before.extend(syms)
return delta_symbols, unmatched_before, unmatched_after
def _DiffSymbolGroups(before, after):
# For changed symbols, padding is zeroed out. In order to not lose the
# information entirely, store it in aggregate.
padding_by_section_name = collections.defaultdict(int)
# Usually >90% of symbols are exact matches, so all of the time is spent in
# this first pass.
all_deltas, before, after = _MatchSymbols(
before, after, _ExactMatchKey, padding_by_section_name)
for key_func in (_GoodMatchKey, _PoorMatchKey):
delta_syms, before, after = _MatchSymbols(
before, after, key_func, padding_by_section_name)
all_deltas.extend(delta_syms)
logging.debug('Creating %d unmatched symbols', len(after) + len(before))
for after_sym in after:
all_deltas.append(models.DeltaSymbol(None, after_sym))
for before_sym in before:
all_deltas.append(models.DeltaSymbol(before_sym, None))
# Create a DeltaSymbol to represent the zero'd out padding of matched symbols.
for section_name, padding in padding_by_section_name.iteritems():
if padding != 0:
after_sym = models.Symbol(
section_name,
padding,
name="Overhead: aggregate padding of diff'ed symbols")
after_sym.padding = padding
all_deltas.append(models.DeltaSymbol(None, after_sym))
return models.DeltaSymbolGroup(all_deltas)
def Diff(before, after, sort=False):
"""Diffs two SizeInfo objects. Returns a DeltaSizeInfo."""
assert isinstance(before, models.SizeInfo)
assert isinstance(after, models.SizeInfo)
section_sizes = {k: after.section_sizes.get(k, 0) - v
for k, v in before.section_sizes.iteritems()}
for k, v in after.section_sizes.iteritems():
if k not in section_sizes:
section_sizes[k] = v
symbol_diff = _DiffSymbolGroups(before.raw_symbols, after.raw_symbols)
ret = models.DeltaSizeInfo(before, after, section_sizes, symbol_diff)
if sort:
syms = ret.symbols # Triggers clustering.
logging.debug('Grouping')
# Group path aliases so that functions defined in headers will be sorted
# by their actual size rather than shown as many small symbols.
syms = syms.GroupedByAliases(same_name_only=True)
logging.debug('Sorting')
ret.symbols = syms.Sorted()
logging.debug('Diff complete')
return ret