blob: c84791f79da914b3c3199d873c99a703f2c1f495 [file] [log] [blame]
# Copyright 2017 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Logic for diffing two SizeInfo objects."""
import collections
import logging
import re
import models
_STRIP_NUMBER_SUFFIX_PATTERN = re.compile(r'[.0-9]+$')
_NORMALIZE_STAR_SYMBOLS_PATTERN = re.compile(r'\s+\d+( \(.*\))?$')
# Matches symbols that are unchanged (will generally be the majority).
# Matching by size is important for string literals, which all have the same
# name, but which one addition would shift their order.
def _Key1(s):
# Remove numbers and periods for symbols defined by macros that use __line__
# in names, or for linker symbols like ".L.ref.tmp.2".
# TODO(agrieve): Should this strip numbers only when periods are present?
name = _STRIP_NUMBER_SUFFIX_PATTERN.sub('', s.full_name)
# Prefer source_path over object_path since object_path for native files have
# the target_name in it (which can get renamed).
path = s.source_path or s.object_path
# Use section rather than section_name since clang & gcc use
# .data.rel.ro vs. .data.rel.ro.local.
return s.section, name, path, s.size_without_padding
# Same as _Key1, but size can change.
def _Key2(s):
return _Key1(s)[:3]
# Same as _Key2, but allow signature changes (uses name rather than full_name).
def _Key3(s):
path = s.source_path or s.object_path
name = _STRIP_NUMBER_SUFFIX_PATTERN.sub('', s.name)
clone_idx = name.find(' [clone ')
if clone_idx != -1:
name = name[:clone_idx]
if name.startswith('*'):
# "* symbol gap 3 (bar)" -> "* symbol gaps"
name = _NORMALIZE_STAR_SYMBOLS_PATTERN.sub('s', name)
return s.section, name, path
# Match on full name, but without path (to account for file moves).
def _Key4(s):
if not s.IsNameUnique():
return None
return s.section, s.full_name
def _MatchSymbols(before, after, key_func, padding_by_section_name):
logging.debug('%s: Building symbol index', key_func.__name__)
before_symbols_by_key = collections.defaultdict(list)
for s in before:
before_symbols_by_key[key_func(s)].append(s)
logging.debug('%s: Creating delta symbols', key_func.__name__)
unmatched_after = []
delta_symbols = []
for after_sym in after:
key = key_func(after_sym)
before_sym = key and before_symbols_by_key.get(key)
if before_sym:
before_sym = before_sym.pop(0)
# Padding tracked in aggregate, except for padding-only symbols.
if before_sym.size_without_padding != 0:
padding_by_section_name[before_sym.section_name] += (
after_sym.padding_pss - before_sym.padding_pss)
delta_symbols.append(models.DeltaSymbol(before_sym, after_sym))
else:
unmatched_after.append(after_sym)
logging.debug('%s: Matched %d of %d symbols', key_func.__name__,
len(delta_symbols), len(after))
unmatched_before = []
for syms in before_symbols_by_key.itervalues():
unmatched_before.extend(syms)
return delta_symbols, unmatched_before, unmatched_after
def _DiffSymbolGroups(before, after):
# For changed symbols, padding is zeroed out. In order to not lose the
# information entirely, store it in aggregate.
padding_by_section_name = collections.defaultdict(int)
# Usually >90% of symbols are exact matches, so all of the time is spent in
# this first pass.
all_deltas, before, after = _MatchSymbols(before, after, _Key1,
padding_by_section_name)
for key_func in (_Key2, _Key3, _Key4):
delta_syms, before, after = _MatchSymbols(
before, after, key_func, padding_by_section_name)
all_deltas.extend(delta_syms)
logging.debug('Creating %d unmatched symbols', len(after) + len(before))
for after_sym in after:
all_deltas.append(models.DeltaSymbol(None, after_sym))
for before_sym in before:
all_deltas.append(models.DeltaSymbol(before_sym, None))
# Create a DeltaSymbol to represent the zero'd out padding of matched symbols.
for section_name, padding in padding_by_section_name.iteritems():
if padding != 0:
after_sym = models.Symbol(
section_name,
padding,
name="Overhead: aggregate padding of diff'ed symbols")
after_sym.padding = padding
all_deltas.append(models.DeltaSymbol(None, after_sym))
return models.DeltaSymbolGroup(all_deltas)
def Diff(before, after, sort=False):
"""Diffs two SizeInfo objects. Returns a DeltaSizeInfo."""
assert isinstance(before, models.SizeInfo)
assert isinstance(after, models.SizeInfo)
section_sizes = {k: after.section_sizes.get(k, 0) - v
for k, v in before.section_sizes.iteritems()}
for k, v in after.section_sizes.iteritems():
if k not in section_sizes:
section_sizes[k] = v
symbol_diff = _DiffSymbolGroups(before.raw_symbols, after.raw_symbols)
ret = models.DeltaSizeInfo(before, after, section_sizes, symbol_diff)
if sort:
syms = ret.symbols # Triggers clustering.
logging.debug('Grouping')
# Group path aliases so that functions defined in headers will be sorted
# by their actual size rather than shown as many small symbols.
# Grouping these is nice since adding or remove just one path alias changes
# the PSS of all symbols (a lot of noise).
syms = syms.GroupedByAliases(same_name_only=True)
logging.debug('Sorting')
ret.symbols = syms.Sorted()
logging.debug('Diff complete')
return ret