blob: 9523c55f15f6e7362b84e4fe5f96a51078181d8c [file] [log] [blame]
# Copyright 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Creates an html report that allows you to view binary size by component."""
import codecs
import collections
import itertools
import json
import logging
import os
import archive
import diff
import models
import path_util
# These must match |_KEYS| in shared.js.
_COMPACT_FILE_COMPONENT_INDEX_KEY = 'c'
_COMPACT_FILE_PATH_KEY = 'p'
_COMPACT_FILE_SYMBOLS_KEY = 's'
_COMPACT_SYMBOL_BYTE_SIZE_KEY = 'b'
_COMPACT_SYMBOL_COUNT_KEY = 'u'
_COMPACT_SYMBOL_FLAGS_KEY = 'f'
_COMPACT_SYMBOL_NAME_KEY = 'n'
_COMPACT_SYMBOL_NUM_ALIASES_KEY = 'a'
_COMPACT_SYMBOL_TYPE_KEY = 't'
# Always emit this many distict symbols (if present), even when small.
# No need to optimize file size at this point).
_MIN_SYMBOL_COUNT = 1000
# Small symbols grouped into "other" symbols may not comprise more than this
# fraction of total size.
_MAX_OTHER_SYMBOL_COVERAGE = .05
# Don't insert "other" symbols smaller than this (just noise at this point).
_MIN_OTHER_PSS = 1
def _GetOrAddFileNode(path, component, file_nodes, components):
file_node = file_nodes.get(path)
if file_node is None:
component_index = components.GetOrAdd(component)
file_node = {
_COMPACT_FILE_PATH_KEY: path,
_COMPACT_FILE_COMPONENT_INDEX_KEY: component_index,
_COMPACT_FILE_SYMBOLS_KEY: [],
}
file_nodes[path] = file_node
return file_node
class IndexedSet(object):
"""Set-like object where values are unique and indexed.
Values must be immutable.
"""
def __init__(self):
self._index_dict = {} # Value -> Index dict
self.value_list = [] # List containing all the set items
def GetOrAdd(self, value):
"""Get the index of the value in the list. Append it if not yet present."""
index = self._index_dict.get(value)
if index is None:
self.value_list.append(value)
index = len(self.value_list) - 1
self._index_dict[value] = index
return index
def _PartitionSymbols(symbols):
# Dex methods are whitelisted for the method_count mode in the UI.
# Vtable entries are interesting to query for, so whitelist them as well.
interesting_symbols = symbols.Filter(
lambda s: s.IsDex() or '[vtable]' in s.name)
ordered_symbols = interesting_symbols.Inverted().Sorted()
abs_pss_target = (1 - _MAX_OTHER_SYMBOL_COVERAGE) * sum(
abs(s.pss) for s in ordered_symbols)
running_abs_pss = 0
ordered_count = 0
for ordered_count, s in enumerate(ordered_symbols):
running_abs_pss += abs(s.pss)
if running_abs_pss > abs_pss_target and ordered_count >= _MIN_SYMBOL_COUNT:
break
main_symbols = itertools.chain(interesting_symbols,
ordered_symbols[:ordered_count])
extra_symbols = ordered_symbols[ordered_count:]
logging.info('Found %d large symbols, %s small symbols',
len(interesting_symbols) + ordered_count, len(extra_symbols))
return main_symbols, extra_symbols
def _MakeTreeViewList(symbols, include_all_symbols):
"""Builds JSON data of the symbols for the tree view HTML report.
As the tree is built on the client-side, this function creates a flat list
of files, where each file object contains symbols that have the same path.
Args:
symbols: A SymbolGroup containing all symbols.
include_all_symbols: If true, include all symbols in the data file.
"""
file_nodes = {}
components = IndexedSet()
# Dict of path -> type -> accumulated pss.
small_symbol_pss = collections.defaultdict(
lambda: collections.defaultdict(float))
# For delta symbols, most counts should 0, so use that as default. Else use 1.
default_symbol_count = 0 if symbols.IsDelta() else 1
if include_all_symbols:
main_symbols, extra_symbols = symbols, []
else:
logging.info('Partitioning symbols...')
main_symbols, extra_symbols = _PartitionSymbols(symbols)
# Bundle symbols by the file they belong to.
# Add all the file buckets into file_nodes.
for symbol in main_symbols:
symbol_size = round(symbol.pss, 2)
if symbol_size.is_integer():
symbol_size = int(symbol_size)
symbol_count = default_symbol_count
if symbol.IsDelta():
symbol_count = models.DIFF_COUNT_DELTA[symbol.diff_status]
path = symbol.source_path or symbol.object_path
file_node = _GetOrAddFileNode(
path, symbol.component, file_nodes, components)
name = symbol.full_name if symbol.IsDex() else symbol.template_name
symbol_entry = {
_COMPACT_SYMBOL_BYTE_SIZE_KEY: symbol_size,
_COMPACT_SYMBOL_NAME_KEY: name,
_COMPACT_SYMBOL_TYPE_KEY: symbol.section,
}
if symbol.num_aliases != 1:
symbol_entry[_COMPACT_SYMBOL_NUM_ALIASES_KEY] = symbol.num_aliases
# We use symbol count for the method count mode in the diff mode report.
# Negative values are used to indicate a symbol was removed, so it should
# count as -1 rather than the default, 1.
# We don't care about accurate counts for other symbol types currently,
# so this data is only included for methods.
is_dex_method = symbol.section_name == models.SECTION_DEX_METHOD
if is_dex_method and symbol_count != default_symbol_count:
symbol_entry[_COMPACT_SYMBOL_COUNT_KEY] = symbol_count
if symbol.flags:
symbol_entry[_COMPACT_SYMBOL_FLAGS_KEY] = symbol.flags
file_node[_COMPACT_FILE_SYMBOLS_KEY].append(symbol_entry)
# Collect small symbols into a per-path dict.
for symbol in extra_symbols:
path = symbol.source_path or symbol.object_path
tup = (path, symbol.component)
small_symbol_pss[tup][symbol.section_name] += symbol.pss
# Insert small symbols.
inserted_smalls_count = 0
inserted_smalls_abs_pss = 0
skipped_smalls_count = 0
skipped_smalls_abs_pss = 0
for tup, type_to_pss in small_symbol_pss.iteritems():
path, component = tup
for section_name, pss in type_to_pss.iteritems():
if abs(pss) < _MIN_OTHER_PSS:
skipped_smalls_count += 1
skipped_smalls_abs_pss += abs(pss)
else:
inserted_smalls_count += 1
inserted_smalls_abs_pss += abs(pss)
file_node = _GetOrAddFileNode(path, component, file_nodes, components)
file_node[_COMPACT_FILE_SYMBOLS_KEY].append({
_COMPACT_SYMBOL_NAME_KEY: 'Other ' + section_name,
_COMPACT_SYMBOL_TYPE_KEY:
models.SECTION_NAME_TO_SECTION[section_name],
_COMPACT_SYMBOL_BYTE_SIZE_KEY: pss,
})
logging.debug(
'Created %d "other" symbols with PSS=%.1f. Omitted %d with PSS=%.1f',
inserted_smalls_count, inserted_smalls_abs_pss, skipped_smalls_count,
skipped_smalls_abs_pss)
meta = {
'components': components.value_list,
'total': symbols.pss,
}
return meta, file_nodes.values()
def BuildReportFromSizeInfo(out_path, size_info, all_symbols=False):
"""Builds a .ndjson report for a .size file.
Args:
out_path: Path to save JSON report to.
size_info: A SizeInfo or DeltaSizeInfo to use for the report.
all_symbols: If true, all symbols will be included in the report rather
than truncated.
"""
logging.info('Reading .size file')
symbols = size_info.raw_symbols
is_diff = symbols.IsDelta()
if is_diff:
symbols = symbols.WhereDiffStatusIs(models.DIFF_STATUS_UNCHANGED).Inverted()
meta, tree_nodes = _MakeTreeViewList(symbols, all_symbols)
logging.info('Created %d tree nodes', len(tree_nodes))
meta.update({
'diff_mode': is_diff,
'section_sizes': size_info.section_sizes,
})
if is_diff:
meta.update({
'before_metadata': size_info.before.metadata,
'after_metadata': size_info.after.metadata,
})
else:
meta['metadata'] = size_info.metadata
# Write newline-delimited JSON file
logging.info('Serializing JSON')
# Use separators without whitespace to get a smaller file.
json_dump_args = {
'separators': (',', ':'),
'ensure_ascii': True,
'check_circular': False,
}
with codecs.open(out_path, 'w', encoding='ascii') as out_file:
json.dump(meta, out_file, **json_dump_args)
out_file.write('\n')
for tree_node in tree_nodes:
json.dump(tree_node, out_file, **json_dump_args)
out_file.write('\n')
def _MakeDirIfDoesNotExist(rel_path):
"""Ensures a directory exists."""
abs_path = os.path.abspath(rel_path)
try:
os.makedirs(abs_path)
except OSError:
if not os.path.isdir(abs_path):
raise
def AddArguments(parser):
parser.add_argument('input_size_file',
help='Path to input .size file.')
parser.add_argument('output_report_file',
help='Write generated data to the specified '
'.ndjson file.')
parser.add_argument('--all-symbols', action='store_true',
help='Include all symbols. Will cause the data file to '
'take longer to load.')
parser.add_argument('--diff-with',
help='Diffs the input_file against an older .size file')
def Run(args, parser):
if not args.input_size_file.endswith('.size'):
parser.error('Input must end with ".size"')
if args.diff_with and not args.diff_with.endswith('.size'):
parser.error('Diff input must end with ".size"')
if not args.output_report_file.endswith('.ndjson'):
parser.error('Output must end with ".ndjson"')
size_info = archive.LoadAndPostProcessSizeInfo(args.input_size_file)
if args.diff_with:
before_size_info = archive.LoadAndPostProcessSizeInfo(args.diff_with)
size_info = diff.Diff(before_size_info, size_info)
BuildReportFromSizeInfo(
args.output_report_file, size_info, all_symbols=args.all_symbols)
msg = [
'Done!',
'View using a local server via: ',
' %s start_server %s',
'or upload to the hosted version here:',
' https://storage.googleapis.com/chrome-supersize/viewer.html'
]
supersize_path = os.path.relpath(os.path.join(
path_util.SRC_ROOT, 'tools', 'binary_size', 'supersize'))
logging.warning('\n'.join(msg), supersize_path, args.output_report_file)