blob: 5da6fca7f9a19be41b47e32af5189c75d9fb06c3 [file] [log] [blame]
# Copyright 2017 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Deals with loading & saving .size and .sizediff files.
The .size file is written in the following format. There are no section
delimiters, instead the end of a section is usually determined by a row count on
the first line of a section, followed by that number of rows. In other cases,
the sections have a known size.
Header
------
4 lines long.
Line 0 of the file is a header comment.
Line 1 is the serialization version of the file.
Line 2 is the number of characters in the header fields string.
Line 3 is the header fields string, a stringified JSON object.
Path list
---------
A list of paths. The first line is the size of the list,
and the next N lines that follow are items in the list. Each item is a tuple
of (object_path, source_path) where the two parts are tab separated.
Component list
--------------
A list of components. The first line is the size of the list,
and the next N lines that follow are items in the list. Each item is a unique
COMPONENT which is referenced later.
This section is only present if 'has_components' is True in header fields.
Symbol counts
-------------
2 lines long.
The first line is a tab separated list of section names.
The second line is a tab separated list of symbol group lengths, in the same
order as the previous line.
Numeric values
--------------
In each section, the number of rows is the same as the number of section names
in Symbol counts. The values on a row are space separated, in the order of the
symbols in each group.
Addresses
~~~~~~~~~~
Symbol start addresses which are delta-encoded.
Sizes
~~~~~
The number of bytes this symbol takes up.
Padding
~~~~~~~
The number of padding bytes this symbol has.
This section is only present if 'has_padding' is True in header fields.
Path indices
~~~~~~~~~~~~~
Indices that reference paths in the prior Path list section. Delta-encoded.
Component indices
~~~~~~~~~~~~~~~~~~
Indices that reference components in the prior Component list section.
Delta-encoded.
This section is only present if 'has_components' is True in header fields.
Symbols
-------
The final section contains details info on each symbol. Each line represents
a single symbol. Values are tab separated and follow this format:
symbol.full_name, symbol.num_aliases, symbol.flags
|num_aliases| will be omitted if the aliases of the symbol are the same as the
previous line. |flags| will be omitted if there are no flags.
The .sizediff file stores a sparse representation of a difference between .size
files. Each .sizediff file stores two sparse .size files, before and after,
containing only symbols that differed between "before" and "after". They can
be rendered via the Tiger viewer. .sizediff files use the following format:
Header
------
3 lines long.
Line 0 of the file is a header comment.
Line 1 is the number of characters in the header fields string.
Line 2 is the header fields string, a stringified JSON object. This currently
contains two fields, 'before_length' (the length in bytes of the 'before'
section) and 'version', which is always 1.
Before
------
The next |header.before_length| bytes are a valid gzipped sparse .size file
containing the "before" snapshot.
After
-----
All remaining bytes are a valid gzipped sparse .size file containing the
"after" snapshot.
"""
import contextlib
import gzip
import io
import itertools
import json
import logging
import os
import sys
import models
import parallel
_COMMON_HEADER = b'# Created by //tools/binary_size\n'
# File format version for .size files.
_SIZE_HEADER_SINGLE_CONTAINER = b'Size File Format v1\n'
_SIZE_HEADER_MULTI_CONTAINER = b'Size File Format v1.1\n'
# Header for .sizediff files
_SIZEDIFF_HEADER = b'DIFF\n'
_SIZEDIFF_VERSION = 1
# Native sections are sorted by address.
_SECTION_SORT_ORDER = {
models.SECTION_DATA: 0,
models.SECTION_DATA_REL_RO_LOCAL: 0,
models.SECTION_DATA_REL_RO: 0,
models.SECTION_RODATA: 0,
models.SECTION_TEXT: 0,
models.SECTION_BSS: 1,
models.SECTION_BSS_REL_RO: 1,
models.SECTION_PART_END: 1,
models.SECTION_DEX: 2,
models.SECTION_DEX_METHOD: 3,
models.SECTION_PAK_NONTRANSLATED: 4,
models.SECTION_PAK_TRANSLATIONS: 5,
models.SECTION_OTHER: 6,
}
# Ensure each |models.SECTION_*| (except |SECTION_MULTIPLE|) has an entry.
assert len(_SECTION_SORT_ORDER) + 1 == len(models.SECTION_NAME_TO_SECTION)
class _Writer:
"""Helper to format and write data to a file object."""
def __init__(self, file_obj):
self.file_obj_ = file_obj
def WriteBytes(self, b):
# Direct write of raw bytes.
self.file_obj_.write(b)
def WriteString(self, s):
self.file_obj_.write(s.encode('ascii'))
def WriteLine(self, s):
self.file_obj_.write(s.encode('ascii'))
self.file_obj_.write(b'\n')
def WriteNumberList(self, gen):
"""Writes numbers from |gen| separated by space, in one line."""
sep = b''
for num in gen:
self.WriteBytes(sep)
self.WriteString(str(num))
sep = b' '
self.WriteBytes(b'\n')
def LogSize(self, desc):
self.file_obj_.flush()
size = self.file_obj_.tell()
logging.debug('File size with %s: %d' % (desc, size))
def SortSymbols(raw_symbols, check_already_mostly_sorted=True):
"""Sorts the given symbols in the order that they should be archived in.
The sort order is chosen such that:
* Padding can be discarded.
* Ordering is deterministic (total ordering).
Also sorts |aliases| such that they match the order within |raw_symbols|.
Args:
raw_symbols: List of symbols to sort.
check_already_mostly_sorted: Whether to assert that there are a low number
of out-of-order elements in raw_symbols. Older .size files are not
properly sorted, this check makes sense only for "supersize archive".
"""
def sort_key(s):
# size_without_padding so that "** symbol gap" sorts before other symbols
# with same address (necessary for correctness within CalculatePadding()).
return (
_SECTION_SORT_ORDER[s.section_name],
s.IsOverhead(),
s.address,
# Only use size_without_padding for native symbols (that have
# addresses) since padding-only symbols must come first for
# correctness.
# DEX also has 0-size symbols (for nested classes, not sure why)
# and we don't want to sort them differently since they don't have
# any padding either.
s.address and s.size_without_padding > 0,
s.full_name.startswith('**'),
s.full_name,
s.object_path)
def describe(syms):
return ''.join('%r: %r\n' % (s, sort_key(s)) for s in syms)
logging.debug('Sorting %d symbols', len(raw_symbols))
# Sort aliases first to make raw_symbols quicker to sort.
# Although sorting is done when aliases are first created, aliases that differ
# only by path can later become out-of-order due to path normalization.
i = 0
count = len(raw_symbols)
while i < count:
s = raw_symbols[i]
num_aliases = s.num_aliases
if s.aliases:
expected = raw_symbols[i:i + num_aliases]
assert s.aliases == expected, 'Aliases out of order:\n{}\n{}'.format(
describe(s.aliases), describe(expected))
s.aliases.sort(key=sort_key)
raw_symbols[i:i + num_aliases] = s.aliases
i += num_aliases
else:
i += 1
if check_already_mostly_sorted:
count = sum(
int(sort_key(raw_symbols[i]) > sort_key(raw_symbols[i + 1]))
for i in range(len(raw_symbols) - 1))
logging.debug('Number of out-of-order symbols: %d', count)
if count > 20:
logging.error('Number of out-of-order symbols: %d', count)
logging.error('Showing first 10')
num_reported = 0
for i in range(len(raw_symbols) - 1):
if sort_key(raw_symbols[i]) > sort_key(raw_symbols[i + 1]):
num_reported += 1
logging.error('\n%s', describe(raw_symbols[i:i + 2]))
if num_reported == 10:
break
# Python's sort() is faster when the input list is already mostly sorted.
raw_symbols.sort(key=sort_key)
def CalculatePadding(raw_symbols):
"""Populates the |padding| field based on symbol addresses. """
logging.info('Calculating padding')
seen_container_and_sections = set()
for i, symbol in enumerate(raw_symbols[1:]):
prev_symbol = raw_symbols[i]
if symbol.IsOverhead():
# Overhead symbols are not actionable so should be padding-only.
symbol.padding = symbol.size
if (prev_symbol.container.name != symbol.container.name
or prev_symbol.section_name != symbol.section_name):
container_and_section = (symbol.container.name, symbol.section_name)
assert container_and_section not in seen_container_and_sections, """\
Input symbols must be sorted by container, section, then address.
Found: {}
Then: {}
""".format(prev_symbol, symbol)
seen_container_and_sections.add(container_and_section)
continue
if (symbol.address <= 0 or prev_symbol.address <= 0
or not symbol.IsNative() or not prev_symbol.IsNative()):
continue
if symbol.address == prev_symbol.address:
if symbol.aliases and symbol.aliases is prev_symbol.aliases:
symbol.padding = prev_symbol.padding
symbol.size = prev_symbol.size
continue
# Padding-only symbols happen for ** symbol gaps.
assert prev_symbol.size_without_padding == 0, (
'Found duplicate symbols:\n%r\n%r' % (prev_symbol, symbol))
padding = symbol.address - prev_symbol.end_address
symbol.padding = padding
symbol.size += padding
assert symbol.size >= 0, (
'Symbol has negative size (likely not sorted propertly): '
'%r\nprev symbol: %r' % (symbol, prev_symbol))
def _ExpandSparseSymbols(sparse_symbols):
"""Expands a symbol list with all aliases of all symbols in the list.
Args:
sparse_symbols: A list or SymbolGroup to expand.
"""
representative_symbols = set()
raw_symbols = []
logging.debug('Expanding sparse_symbols with aliases of included symbols')
for sym in sparse_symbols:
if sym.aliases:
num_syms = len(representative_symbols)
representative_symbols.add(sym.aliases[0])
if num_syms < len(representative_symbols):
raw_symbols.extend(sym.aliases)
else:
raw_symbols.append(sym)
logging.debug('Done expanding sparse_symbols')
return models.SymbolGroup(raw_symbols)
def _SaveSizeInfoToFile(size_info,
file_obj,
include_padding=False,
sparse_symbols=None):
"""Saves size info to a .size file.
Args:
size_info: Data to write to the file
file_obj: File opened for writing.
include_padding: Whether to save padding data, useful if adding a subset of
symbols.
sparse_symbols: If present, only save these symbols to the file.
"""
if sparse_symbols is not None:
# Any aliases of sparse symbols must also be included, or else file
# parsing will attribute symbols that happen to follow an incomplete alias
# group to that alias group.
raw_symbols = _ExpandSparseSymbols(sparse_symbols)
else:
raw_symbols = size_info.raw_symbols
num_containers = len(size_info.containers)
has_multi_containers = (num_containers > 1)
file_obj.write(_COMMON_HEADER)
if has_multi_containers:
file_obj.write(_SIZE_HEADER_MULTI_CONTAINER)
else:
file_obj.write(_SIZE_HEADER_SINGLE_CONTAINER)
# JSON header fields
fields = {
'has_components': True,
'has_padding': include_padding,
}
if has_multi_containers:
# Write using new format.
assert len(set(c.name for c in size_info.containers)) == num_containers, (
'Container names must be distinct.')
fields['build_config'] = size_info.build_config
fields['containers'] = [{
'name': c.name,
'metadata': c.metadata,
'section_sizes': c.section_sizes,
} for c in size_info.containers]
else:
# Write using old format.
fields['metadata'] = size_info.metadata_legacy
fields['section_sizes'] = size_info.containers[0].section_sizes
fields_str = json.dumps(fields, indent=2, sort_keys=True)
w = _Writer(file_obj)
w.WriteLine(str(len(fields_str)))
w.WriteLine(fields_str)
w.LogSize('header') # For libchrome: 570 bytes.
# Store a single copy of all paths and reference them by index.
unique_path_tuples = sorted(
set((s.object_path, s.source_path) for s in raw_symbols))
path_tuples = {tup: i for i, tup in enumerate(unique_path_tuples)}
w.WriteLine(str(len(unique_path_tuples)))
for pair in unique_path_tuples:
w.WriteLine('%s\t%s' % pair)
w.LogSize('paths') # For libchrome, adds 200kb.
# Store a single copy of all components and have them referenced by index.
unique_components = sorted(set(s.component for s in raw_symbols))
components = {comp: i for i, comp in enumerate(unique_components)}
w.WriteLine(str(len(unique_components)))
for comp in unique_components:
w.WriteLine(comp)
w.LogSize('components')
# Symbol counts by "segments", defined as (container, section) tuples.
symbol_group_by_segment = raw_symbols.GroupedByContainerAndSectionName()
if has_multi_containers:
container_name_to_index = {
c.name: i
for i, c in enumerate(size_info.containers)
}
w.WriteLine('\t'.join('<%d>%s' %
(container_name_to_index[g.name[0]], g.name[1])
for g in symbol_group_by_segment))
else:
w.WriteLine('\t'.join(g.name[1] for g in symbol_group_by_segment))
w.WriteLine('\t'.join(str(len(g)) for g in symbol_group_by_segment))
def gen_delta(gen, prev_value=0):
"""Adapts a generator of numbers to deltas."""
for value in gen:
yield value - prev_value
prev_value = value
def write_groups(func, delta=False):
"""Write func(symbol) for each symbol in each symbol group.
Each line written represents one symbol group in |symbol_group_by_segment|.
The values in each line are space separated and are the result of calling
|func| with the Nth symbol in the group.
If |delta| is True, the differences in values are written instead."""
for group in symbol_group_by_segment:
gen = map(func, group)
w.WriteNumberList(gen_delta(gen) if delta else gen)
write_groups(lambda s: s.address, delta=True)
w.LogSize('addresses') # For libchrome, adds 300kb.
write_groups(lambda s: s.size if s.IsOverhead() else s.size_without_padding)
w.LogSize('sizes') # For libchrome, adds 300kb
# Padding for non-padding-only symbols is recalculated from addresses on
# load, so we only need to write it if we're writing a subset of symbols.
if include_padding:
write_groups(lambda s: s.padding)
w.LogSize('paddings') # For libchrome, adds 300kb
write_groups(
lambda s: path_tuples[(s.object_path, s.source_path)], delta=True)
w.LogSize('path indices') # For libchrome: adds 125kb.
write_groups(lambda s: components[s.component], delta=True)
w.LogSize('component indices')
prev_aliases = None
for group in symbol_group_by_segment:
for symbol in group:
w.WriteString(symbol.full_name)
if symbol.aliases and symbol.aliases is not prev_aliases:
w.WriteString('\t0%x' % symbol.num_aliases)
prev_aliases = symbol.aliases
if symbol.flags:
w.WriteString('\t%x' % symbol.flags)
w.WriteBytes(b'\n')
w.LogSize('names (final)') # For libchrome: adds 3.5mb.
def _ReadLine(file_iter):
"""Read a line from a file object iterator and remove the newline character.
Args:
file_iter: File object iterator
Returns:
String
"""
# str[:-1] removes the last character from a string, specifically the newline
return next(file_iter)[:-1]
def _ReadValuesFromLine(file_iter, split):
"""Read a list of values from a line in a file object iterator.
Args:
file_iter: File object iterator
split: Splits the line with the given string
Returns:
List of string values
"""
return _ReadLine(file_iter).split(split)
def _LoadSizeInfoFromFile(file_obj, size_path):
"""Loads a size_info from the given file.
See _SaveSizeInfoToFile() for details on the .size file format.
Args:
file_obj: File to read, should be a GzipFile
"""
# Split lines on '\n', since '\r' can appear in some lines!
lines = io.TextIOWrapper(file_obj, newline='\n')
header_line = _ReadLine(lines).encode('ascii')
assert header_line == _COMMON_HEADER[:-1], 'was ' + str(header_line)
header_line = _ReadLine(lines).encode('ascii')
if header_line == _SIZE_HEADER_SINGLE_CONTAINER[:-1]:
has_multi_containers = False
elif header_line == _SIZE_HEADER_MULTI_CONTAINER[:-1]:
has_multi_containers = True
else:
raise ValueError('Version mismatch. Need to write some upgrade code.')
# JSON header fields
json_len = int(_ReadLine(lines))
json_str = lines.read(json_len)
fields = json.loads(json_str)
assert ('containers' in fields) == has_multi_containers
assert ('build_config' in fields) == has_multi_containers
assert ('containers' in fields) == has_multi_containers
assert ('metadata' not in fields) == has_multi_containers
assert ('section_sizes' not in fields) == has_multi_containers
containers = []
if has_multi_containers: # New format.
build_config = fields['build_config']
for cfield in fields['containers']:
c = models.Container(name=cfield['name'],
metadata=cfield['metadata'],
section_sizes=cfield['section_sizes'])
containers.append(c)
else: # Old format.
build_config = {}
metadata = fields.get('metadata')
if metadata:
for key in models.BUILD_CONFIG_KEYS:
if key in metadata:
build_config[key] = metadata[key]
del metadata[key]
section_sizes = fields['section_sizes']
containers.append(
models.Container(name='',
metadata=metadata,
section_sizes=section_sizes))
models.BaseContainer.AssignShortNames(containers)
has_components = fields.get('has_components', False)
has_padding = fields.get('has_padding', False)
# Eat empty line.
_ReadLine(lines)
# Path list.
num_path_tuples = int(_ReadLine(lines)) # Number of paths in list.
# Read the path list values and store for later.
path_tuples = [
_ReadValuesFromLine(lines, split='\t') for _ in range(num_path_tuples)
]
if num_path_tuples == 0:
logging.warning('File contains no symbols: %s', size_path)
return models.SizeInfo(build_config, containers, [], size_path=size_path)
# Component list.
if has_components:
num_components = int(_ReadLine(lines)) # Number of components in list.
components = [_ReadLine(lines) for _ in range(num_components)]
# Symbol counts by "segments", defined as (container, section) tuples.
segment_names = _ReadValuesFromLine(lines, split='\t')
symbol_counts = [int(c) for c in _ReadValuesFromLine(lines, split='\t')]
# Addresses, sizes, paddings, path indices, component indices.
def read_numeric(delta=False):
"""Read numeric values, where each line corresponds to a symbol group.
The values in each line are space separated.
If |delta| is True, the numbers are read as a value to add to the sum of the
prior values in the line, or as the amount to change by.
"""
ret = []
delta_multiplier = int(delta)
for _ in symbol_counts:
value = 0
fields = []
for f in _ReadValuesFromLine(lines, split=' '):
value = value * delta_multiplier + int(f)
fields.append(value)
ret.append(fields)
return ret
addresses = read_numeric(delta=True)
sizes = read_numeric(delta=False)
if has_padding:
paddings = read_numeric(delta=False)
else:
paddings = [None] * len(segment_names)
path_indices = read_numeric(delta=True)
if has_components:
component_indices = read_numeric(delta=True)
else:
component_indices = [None] * len(segment_names)
raw_symbols = [None] * sum(symbol_counts)
symbol_idx = 0
for (cur_segment_name, cur_symbol_count, cur_addresses, cur_sizes,
cur_paddings, cur_path_indices,
cur_component_indices) in zip(segment_names, symbol_counts, addresses,
sizes, paddings, path_indices,
component_indices):
if has_multi_containers:
# Extract '<cur_container_idx_str>cur_section_name'.
assert cur_segment_name.startswith('<')
cur_container_idx_str, cur_section_name = (cur_segment_name[1:].split(
'>', 1))
cur_container = containers[int(cur_container_idx_str)]
else:
cur_section_name = cur_segment_name
cur_container = containers[0]
alias_counter = 0
for i in range(cur_symbol_count):
parts = _ReadValuesFromLine(lines, split='\t')
full_name = parts[0]
flags_part = None
aliases_part = None
# aliases_part or flags_part may have been omitted.
if len(parts) == 3:
# full_name aliases_part flags_part
aliases_part = parts[1]
flags_part = parts[2]
elif len(parts) == 2:
if parts[1][0] == '0':
# full_name aliases_part
aliases_part = parts[1]
else:
# full_name flags_part
flags_part = parts[1]
# Use a bit less RAM by using the same instance for this common string.
if full_name == models.STRING_LITERAL_NAME:
full_name = models.STRING_LITERAL_NAME
flags = int(flags_part, 16) if flags_part else 0
num_aliases = int(aliases_part, 16) if aliases_part else 0
# Skip the constructor to avoid default value checks.
new_sym = models.Symbol.__new__(models.Symbol)
new_sym.container = cur_container
new_sym.section_name = cur_section_name
new_sym.full_name = full_name
new_sym.address = cur_addresses[i]
new_sym.size = cur_sizes[i]
paths = path_tuples[cur_path_indices[i]]
new_sym.object_path, new_sym.source_path = paths
component = components[cur_component_indices[i]] if has_components else ''
new_sym.component = component
new_sym.flags = flags
# Derived.
if cur_paddings:
new_sym.padding = cur_paddings[i]
if not new_sym.IsOverhead():
new_sym.size += new_sym.padding
else:
new_sym.padding = 0 # Computed below.
new_sym.template_name = ''
new_sym.name = ''
if num_aliases:
assert alias_counter == 0
new_sym.aliases = [new_sym]
alias_counter = num_aliases - 1
elif alias_counter > 0:
new_sym.aliases = raw_symbols[symbol_idx - 1].aliases
new_sym.aliases.append(new_sym)
alias_counter -= 1
else:
new_sym.aliases = None
raw_symbols[symbol_idx] = new_sym
symbol_idx += 1
if not has_padding:
CalculatePadding(raw_symbols)
return models.SizeInfo(build_config,
containers,
raw_symbols,
size_path=size_path)
@contextlib.contextmanager
def _OpenGzipForWrite(path, file_obj=None):
# Open in a way that doesn't set any gzip header fields.
if file_obj:
with gzip.GzipFile(filename='', mode='wb', fileobj=file_obj, mtime=0) as fz:
yield fz
else:
with open(path, 'wb') as f:
with gzip.GzipFile(filename='', mode='wb', fileobj=f, mtime=0) as fz:
yield fz
def SaveSizeInfo(size_info,
path,
file_obj=None,
include_padding=False,
sparse_symbols=None):
"""Saves |size_info| to |path|."""
if os.environ.get('SUPERSIZE_MEASURE_GZIP') == '1':
# Doing serialization and Gzip together.
with _OpenGzipForWrite(path, file_obj=file_obj) as f:
_SaveSizeInfoToFile(
size_info,
f,
include_padding=include_padding,
sparse_symbols=sparse_symbols)
else:
# Doing serizliation and Gzip separately.
# This turns out to be faster. On Python 3: 40s -> 14s.
bytesio = io.BytesIO()
_SaveSizeInfoToFile(
size_info,
bytesio,
include_padding=include_padding,
sparse_symbols=sparse_symbols)
logging.debug('Serialization complete. Gzipping...')
with _OpenGzipForWrite(path, file_obj=file_obj) as f:
f.write(bytesio.getvalue())
def LoadSizeInfo(filename, file_obj=None):
"""Returns a SizeInfo loaded from |filename|."""
with gzip.GzipFile(filename=filename, fileobj=file_obj) as f:
return _LoadSizeInfoFromFile(f, filename)
def SaveDeltaSizeInfo(delta_size_info, path, file_obj=None):
"""Saves |delta_size_info| to |path|."""
if not file_obj:
with open(path, 'wb') as f:
return SaveDeltaSizeInfo(delta_size_info, path, f)
changed_symbols = delta_size_info.raw_symbols \
.WhereDiffStatusIs(models.DIFF_STATUS_UNCHANGED).Inverted()
before_symbols = models.SymbolGroup(
[sym.before_symbol for sym in changed_symbols if sym.before_symbol])
after_symbols = models.SymbolGroup(
[sym.after_symbol for sym in changed_symbols if sym.after_symbol])
before_size_file = io.BytesIO()
after_size_file = io.BytesIO()
after_promise = parallel.CallOnThread(
SaveSizeInfo,
delta_size_info.after,
'',
file_obj=after_size_file,
include_padding=True,
sparse_symbols=after_symbols)
SaveSizeInfo(
delta_size_info.before,
'',
file_obj=before_size_file,
include_padding=True,
sparse_symbols=before_symbols)
w = _Writer(file_obj)
w.WriteBytes(_COMMON_HEADER + _SIZEDIFF_HEADER)
# JSON header fields
fields = {
'version': _SIZEDIFF_VERSION,
'before_length': before_size_file.tell(),
}
fields_str = json.dumps(fields, indent=2, sort_keys=True)
w.WriteLine(str(len(fields_str)))
w.WriteLine(fields_str)
w.WriteBytes(before_size_file.getvalue())
after_promise.get()
w.WriteBytes(after_size_file.getvalue())
return None
def LoadDeltaSizeInfo(path, file_obj=None):
"""Returns a tuple of size infos (before, after).
To reconstruct the DeltaSizeInfo, diff the two size infos.
"""
if not file_obj:
with open(path, 'rb') as f:
return LoadDeltaSizeInfo(path, f)
combined_header = _COMMON_HEADER + _SIZEDIFF_HEADER
actual_header = file_obj.read(len(combined_header))
if actual_header != combined_header:
raise Exception('Bad file header.')
json_len = int(file_obj.readline())
json_str = file_obj.read(json_len + 1) # + 1 for \n
fields = json.loads(json_str)
assert fields['version'] == _SIZEDIFF_VERSION
after_pos = file_obj.tell() + fields['before_length']
before_size_info = LoadSizeInfo(path, file_obj)
file_obj.seek(after_pos)
after_size_info = LoadSizeInfo(path, file_obj)
return before_size_info, after_size_info