blob: db534e014c36e834701fd0948545c8e3f4322b96 [file] [log] [blame]
# Copyright 2017 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Deals with loading & saving .size files.
The .size file is written in the following format. There are no section
delimeters, instead the end of a section is usually determined by a row count
on the first line of a section, followed by that amount of rows. In other
cases, the sections have a known size.
Header
------
4 lines long.
Line 0 of the file is a header comment.
Line 1 is the serialization version of the file.
Line 2 is the number of characters in the metadata string.
Line 3 is the metadata string, a stringified JSON object.
Path list
---------
A list of paths. The first line is the size of the list,
and the next N lines that follow are items in the list. Each item is a tuple
of (object_path, source_path) where the two parts are tab seperated.
Component list
--------------
A list of components. The first line is the size of the list,
and the next N lines that follow are items in the list. Each item is a unique
COMPONENT which is referenced later.
This section is only present if 'has_components' is True in the metadata.
Symbol counts
-------------
2 lines long.
The first line is a tab seperated list of section names.
The second line is a tab seperated list of symbol group lengths, in the same
order as the previous line.
Numeric values
--------------
In each section, the number of rows is the same as the number of section names
in Symbol counts. The values on a row are space seperated, in the order of the
symbols in each group.
Addressses
~~~~~~~~~~
Symbol start addresses which are delta-encoded.
Sizes
~~~~~
The number of bytes this symbol takes up.
Path indicies
~~~~~~~~~~~~~
Indicies that reference paths in the prior Path list section. Delta-encoded.
Component indicies
~~~~~~~~~~~~~~~~~~
Indicies that reference components in the prior Component list section.
Delta-encoded.
Symbols
-------
The final section contains details info on each symbol. Each line represents
a single symbol. Values are tab seperated and follow this format:
symbol.full_name, symbol.num_aliases, symbol.flags
|num_aliases| will be omitted if the aliases of the symbol are the same as the
previous line. |flags| will be omitted if there are no flags.
"""
import cStringIO
import contextlib
import gzip
import itertools
import json
import logging
import os
import shutil
import models
# File format version for .size files.
_SERIALIZATION_VERSION = 'Size File Format v1'
def _LogSize(file_obj, desc):
if not hasattr(file_obj, 'fileno'):
return
file_obj.flush()
size = os.fstat(file_obj.fileno()).st_size
logging.debug('File size with %s: %d' % (desc, size))
def _SaveSizeInfoToFile(size_info, file_obj):
"""Saves size info to a .size file.
Args:
size_info: Data to write to the file
file_object: File opened for writing
"""
# Created by supersize header
file_obj.write('# Created by //tools/binary_size\n')
file_obj.write('%s\n' % _SERIALIZATION_VERSION)
# JSON metadata
headers = {
'metadata': size_info.metadata,
'section_sizes': size_info.section_sizes,
'has_components': True,
}
metadata_str = json.dumps(headers, file_obj, indent=2, sort_keys=True)
file_obj.write('%d\n' % len(metadata_str))
file_obj.write(metadata_str)
file_obj.write('\n')
_LogSize(file_obj, 'header') # For libchrome: 570 bytes.
# Store a single copy of all paths and have them referenced by index.
unique_path_tuples = sorted(set(
(s.object_path, s.source_path) for s in size_info.raw_symbols))
path_tuples = {tup: i for i, tup in enumerate(unique_path_tuples)}
file_obj.write('%d\n' % len(unique_path_tuples))
file_obj.writelines('%s\t%s\n' % pair for pair in unique_path_tuples)
_LogSize(file_obj, 'paths') # For libchrome, adds 200kb.
# Store a single copy of all components and have them referenced by index.
unique_components = sorted(set(s.component for s in size_info.raw_symbols))
components = {comp: i for i, comp in enumerate(unique_components)}
file_obj.write('%d\n' % len(unique_components))
file_obj.writelines('%s\n' % comp for comp in unique_components)
_LogSize(file_obj, 'components')
# Symbol counts by section.
by_section = size_info.raw_symbols.GroupedBySectionName()
file_obj.write('%s\n' % '\t'.join(g.name for g in by_section))
file_obj.write('%s\n' % '\t'.join(str(len(g)) for g in by_section))
# Addresses, sizes, path indicies, component indicies
def write_numeric(func, delta=False):
"""Write the result of func(symbol) for each symbol in each symbol group.
Each line written represents one symbol group in |by_section|.
The values in each line are space seperated and are the result of calling
|func| with the Nth symbol in the group.
If |delta| is True, the differences in values are written instead.
"""
for group in by_section:
prev_value = 0
last_sym = group[-1]
for symbol in group:
value = func(symbol)
if delta:
value, prev_value = value - prev_value, value
file_obj.write(str(value))
if symbol is not last_sym:
file_obj.write(' ')
file_obj.write('\n')
write_numeric(lambda s: s.address, delta=True)
_LogSize(file_obj, 'addresses') # For libchrome, adds 300kb.
# Do not write padding except for overhead symbols, it will be recalculated
# from addresses on load.
write_numeric(lambda s: s.size if s.IsOverhead() else s.size_without_padding)
_LogSize(file_obj, 'sizes') # For libchrome, adds 300kb
write_numeric(lambda s: path_tuples[(s.object_path, s.source_path)],
delta=True)
_LogSize(file_obj, 'path indices') # For libchrome: adds 125kb.
write_numeric(lambda s: components[s.component], delta=True)
_LogSize(file_obj, 'component indices')
prev_aliases = None
for group in by_section:
for symbol in group:
file_obj.write(symbol.full_name)
if symbol.aliases and symbol.aliases is not prev_aliases:
file_obj.write('\t0%x' % symbol.num_aliases)
prev_aliases = symbol.aliases
if symbol.flags:
file_obj.write('\t%x' % symbol.flags)
file_obj.write('\n')
_LogSize(file_obj, 'names (final)') # For libchrome: adds 3.5mb.
def _ReadLine(file_iter):
"""Read a line from a file object iterator and remove the newline character.
Args:
file_iter: File object iterator
Returns:
String
"""
# str[:-1] removes the last character from a string, specifically the newline
return next(file_iter)[:-1]
def _ReadValuesFromLine(file_iter, split):
"""Read a list of values from a line in a file object iterator.
Args:
file_iter: File object iterator
split: Splits the line with the given string
Returns:
List of string values
"""
return _ReadLine(file_iter).split(split)
def _LoadSizeInfoFromFile(file_obj, size_path):
"""Loads a size_info from the given file.
See _SaveSizeInfoToFile for details on the .size file format.
Args:
file_obj: File to read, should be a GzipFile
"""
lines = iter(file_obj)
_ReadLine(lines) # Line 0: Created by supersize header
actual_version = _ReadLine(lines)
assert actual_version == _SERIALIZATION_VERSION, (
'Version mismatch. Need to write some upgrade code.')
# JSON metadata
json_len = int(_ReadLine(lines))
json_str = file_obj.read(json_len)
headers = json.loads(json_str)
section_sizes = headers['section_sizes']
metadata = headers.get('metadata')
has_components = headers.get('has_components', False)
lines = iter(file_obj)
_ReadLine(lines)
# Path list
num_path_tuples = int(_ReadLine(lines)) # Line 4 - number of paths in list
# Read the path list values and store for later
path_tuples = [_ReadValuesFromLine(lines, split='\t')
for _ in xrange(num_path_tuples)]
# Component list
if has_components:
num_components = int(_ReadLine(lines)) # number of components in list
components = [_ReadLine(lines) for _ in xrange(num_components)]
# Symbol counts by section.
section_names = _ReadValuesFromLine(lines, split='\t')
section_counts = [int(c) for c in _ReadValuesFromLine(lines, split='\t')]
# Addresses, sizes, path indicies, component indicies
def read_numeric(delta=False):
"""Read numeric values, where each line corresponds to a symbol group.
The values in each line are space seperated.
If |delta| is True, the numbers are read as a value to add to the sum of the
prior values in the line, or as the amount to change by.
"""
ret = []
delta_multiplier = int(delta)
for _ in section_counts:
value = 0
fields = []
for f in _ReadValuesFromLine(lines, split=' '):
value = value * delta_multiplier + int(f)
fields.append(value)
ret.append(fields)
return ret
addresses = read_numeric(delta=True)
sizes = read_numeric(delta=False)
path_indices = read_numeric(delta=True)
if has_components:
component_indices = read_numeric(delta=True)
else:
component_indices = [None] * len(section_names)
raw_symbols = [None] * sum(section_counts)
symbol_idx = 0
for (cur_section_name, cur_section_count, cur_addresses, cur_sizes,
cur_path_indicies, cur_component_indices) in itertools.izip(
section_names, section_counts, addresses, sizes, path_indices,
component_indices):
alias_counter = 0
for i in xrange(cur_section_count):
parts = _ReadValuesFromLine(lines, split='\t')
full_name = parts[0]
flags_part = None
aliases_part = None
# aliases_part or flags_part may have been omitted.
if len(parts) == 3:
# full_name aliases_part flags_part
aliases_part = parts[1]
flags_part = parts[2]
elif len(parts) == 2:
if parts[1][0] == '0':
# full_name aliases_part
aliases_part = parts[1]
else:
# full_name flags_part
flags_part = parts[1]
# Use a bit less RAM by using the same instance for this common string.
if full_name == models.STRING_LITERAL_NAME:
full_name = models.STRING_LITERAL_NAME
flags = int(flags_part, 16) if flags_part else 0
num_aliases = int(aliases_part, 16) if aliases_part else 0
# Skip the constructor to avoid default value checks
new_sym = models.Symbol.__new__(models.Symbol)
new_sym.section_name = cur_section_name
new_sym.full_name = full_name
new_sym.address = cur_addresses[i]
new_sym.size = cur_sizes[i]
paths = path_tuples[cur_path_indicies[i]]
new_sym.object_path, new_sym.source_path = paths
component = components[cur_component_indices[i]] if has_components else ''
new_sym.component = component
new_sym.flags = flags
# Derived
new_sym.padding = 0
new_sym.template_name = ''
new_sym.name = ''
if num_aliases:
assert alias_counter == 0
new_sym.aliases = [new_sym]
alias_counter = num_aliases - 1
elif alias_counter > 0:
new_sym.aliases = raw_symbols[symbol_idx - 1].aliases
new_sym.aliases.append(new_sym)
alias_counter -= 1
else:
new_sym.aliases = None
raw_symbols[symbol_idx] = new_sym
symbol_idx += 1
return models.SizeInfo(section_sizes, raw_symbols, metadata=metadata,
size_path=size_path)
@contextlib.contextmanager
def _OpenGzipForWrite(path, file_obj=None):
# Open in a way that doesn't set any gzip header fields.
if file_obj:
with gzip.GzipFile(filename='', mode='wb', fileobj=file_obj, mtime=0) as fz:
yield fz
else:
with open(path, 'wb') as f:
with gzip.GzipFile(filename='', mode='wb', fileobj=f, mtime=0) as fz:
yield fz
def SaveSizeInfo(size_info, path, file_obj=None):
"""Saves |size_info| to |path}."""
if os.environ.get('SUPERSIZE_MEASURE_GZIP') == '1':
with _OpenGzipForWrite(path, file_obj=file_obj) as f:
_SaveSizeInfoToFile(size_info, f)
else:
# It is seconds faster to do gzip in a separate step. 6s -> 3.5s.
stringio = cStringIO.StringIO()
_SaveSizeInfoToFile(size_info, stringio)
logging.debug('Serialization complete. Gzipping...')
stringio.seek(0)
with _OpenGzipForWrite(path, file_obj=file_obj) as f:
shutil.copyfileobj(stringio, f)
def LoadSizeInfo(filename, file_obj=None):
"""Returns a SizeInfo loaded from |filename|."""
with gzip.GzipFile(filename=filename, fileobj=file_obj) as f:
return _LoadSizeInfoFromFile(f, filename)