| # Copyright 2017 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """Main Python API for analyzing binary size.""" |
| |
| import argparse |
| import calendar |
| import collections |
| import datetime |
| import gzip |
| import itertools |
| import logging |
| import os |
| import posixpath |
| import re |
| import subprocess |
| import sys |
| import tempfile |
| import zipfile |
| import zlib |
| |
| import apkanalyzer |
| import ar |
| import concurrent |
| import demangle |
| import describe |
| import file_format |
| import function_signature |
| import linker_map_parser |
| import models |
| import ninja_parser |
| import nm |
| import obj_analyzer |
| import path_util |
| |
| sys.path.insert(1, os.path.join(path_util.SRC_ROOT, 'tools', 'grit')) |
| from grit.format import data_pack |
| |
| _OWNERS_FILENAME = 'OWNERS' |
| _COMPONENT_REGEX = re.compile(r'\s*#\s*COMPONENT\s*:\s*(\S+)') |
| _FILE_PATH_REGEX = re.compile(r'\s*file://(\S+)') |
| _UNCOMPRESSED_COMPRESSION_RATIO_THRESHOLD = 0.9 |
| |
| # Holds computation state that is live only when an output directory exists. |
| _OutputDirectoryContext = collections.namedtuple('_OutputDirectoryContext', [ |
| 'elf_object_paths', # Only when elf_path is also provided. |
| 'known_inputs', # Only when elf_path is also provided. |
| 'output_directory', |
| 'source_mapper', |
| 'thin_archives', |
| ]) |
| |
| |
| # Tunable "knobs" for CreateSectionSizesAndSymbols(). |
| class SectionSizeKnobs(object): |
| def __init__(self): |
| # A limit on the number of symbols an address can have, before these symbols |
| # are compacted into shared symbols. Increasing this value causes more data |
| # to be stored .size files, but is also more expensive. |
| # Effect of max_same_name_alias_count (as of Oct 2017, with min_pss = max): |
| # 1: shared .text syms = 1772874 bytes, file size = 9.43MiB (645476 syms). |
| # 2: shared .text syms = 1065654 bytes, file size = 9.58MiB (669952 syms). |
| # 6: shared .text syms = 464058 bytes, file size = 10.11MiB (782693 syms). |
| # 10: shared .text syms = 365648 bytes, file size = 10.24MiB (813758 syms). |
| # 20: shared .text syms = 86202 bytes, file size = 10.38MiB (854548 syms). |
| # 40: shared .text syms = 48424 bytes, file size = 10.50MiB (890396 syms). |
| # 50: shared .text syms = 41860 bytes, file size = 10.54MiB (902304 syms). |
| # max: shared .text syms = 0 bytes, file size = 11.10MiB (1235449 syms). |
| self.max_same_name_alias_count = 40 # 50kb is basically negligable. |
| |
| # An estimate of pak translation compression ratio to make comparisons |
| # between .size files reasonable. Otherwise this can differ every pak |
| # change. |
| self.pak_compression_ratio = 0.33 |
| |
| # File name: Source file. |
| self.apk_other_files = { |
| 'assets/icudtl.dat': '../../third_party/icu/android/icudtl.dat', |
| 'assets/snapshot_blob_32.bin': '../../v8/snapshot_blob_32.bin', |
| 'assets/snapshot_blob_64.bin': '../../v8/snapshot_blob_64.bin', |
| 'assets/natives_blob.bin': '../../v8/natives_blob.bin', |
| 'assets/unwind_cfi_32': '../../base/trace_event/cfi_backtrace_android.cc', |
| 'assets/webapk_dex_version.txt': ( |
| '../../chrome/android/webapk/libs/runtime_library_version.gni'), |
| 'lib/armeabi-v7a/libarcore_sdk_c_minimal.so': ( |
| '../../third_party/arcore-android-sdk'), |
| } |
| |
| self.apk_expected_other_files = set([ |
| # From Monochrome.apk |
| 'AndroidManifest.xml', |
| 'resources.arsc', |
| 'assets/AndroidManifest.xml', |
| 'assets/metaresources.arsc', |
| 'META-INF/CHROMIUM.SF', |
| 'META-INF/CHROMIUM.RSA', |
| 'META-INF/MANIFEST.MF', |
| ]) |
| |
| self.src_root = path_util.SRC_ROOT |
| |
| |
| def _OpenMaybeGz(path): |
| """Calls `gzip.open()` if |path| ends in ".gz", otherwise calls `open()`.""" |
| if path.endswith('.gz'): |
| return gzip.open(path, 'rb') |
| return open(path, 'rb') |
| |
| |
| def _StripLinkerAddedSymbolPrefixes(raw_symbols): |
| """Removes prefixes sometimes added to symbol names during link |
| |
| Removing prefixes make symbol names match up with those found in .o files. |
| """ |
| for symbol in raw_symbols: |
| full_name = symbol.full_name |
| if full_name.startswith('startup.'): |
| symbol.flags |= models.FLAG_STARTUP |
| symbol.full_name = full_name[8:] |
| elif full_name.startswith('unlikely.'): |
| symbol.flags |= models.FLAG_UNLIKELY |
| symbol.full_name = full_name[9:] |
| elif full_name.startswith('rel.local.'): |
| symbol.flags |= models.FLAG_REL_LOCAL |
| symbol.full_name = full_name[10:] |
| elif full_name.startswith('rel.'): |
| symbol.flags |= models.FLAG_REL |
| symbol.full_name = full_name[4:] |
| elif full_name.startswith('hot.'): |
| symbol.flags |= models.FLAG_HOT |
| symbol.full_name = full_name[4:] |
| elif full_name.startswith('.L.str'): |
| symbol.full_name = models.STRING_LITERAL_NAME |
| |
| |
| def _NormalizeNames(raw_symbols): |
| """Ensures that all names are formatted in a useful way. |
| |
| This includes: |
| - Deriving |name| and |template_name| from |full_name|. |
| - Stripping of return types (for functions). |
| - Moving "vtable for" and the like to be suffixes rather than prefixes. |
| """ |
| found_prefixes = set() |
| for symbol in raw_symbols: |
| full_name = symbol.full_name |
| |
| # See comment in _CalculatePadding() about when this can happen. Don't |
| # process names for non-native sections. |
| if symbol.IsPak(): |
| # full_name: "about_ui_resources.grdp: IDR_ABOUT_UI_CREDITS_HTML". |
| space_idx = full_name.rindex(' ') |
| name = full_name[space_idx + 1:] |
| symbol.template_name = name |
| symbol.name = name |
| elif (full_name.startswith('*') or |
| symbol.IsOverhead() or |
| symbol.IsOther()): |
| symbol.template_name = full_name |
| symbol.name = full_name |
| elif symbol.IsDex(): |
| symbol.full_name, symbol.template_name, symbol.name = ( |
| function_signature.ParseJava(full_name)) |
| elif symbol.IsNative(): |
| # Remove [clone] suffix, and set flag accordingly. |
| # Search from left-to-right, as multiple [clone]s can exist. |
| # Example name suffixes: |
| # [clone .part.322] # GCC |
| # [clone .isra.322] # GCC |
| # [clone .constprop.1064] # GCC |
| # [clone .11064] # clang |
| # http://unix.stackexchange.com/questions/223013/function-symbol-gets-part-suffix-after-compilation |
| idx = full_name.find(' [clone ') |
| if idx != -1: |
| full_name = full_name[:idx] |
| symbol.flags |= models.FLAG_CLONE |
| |
| # Clones for C symbols. |
| if symbol.section == 't': |
| idx = full_name.rfind('.') |
| if idx != -1 and full_name[idx + 1:].isdigit(): |
| new_name = full_name[:idx] |
| # Generated symbols that end with .123 but are not clones. |
| # Find these via: |
| # size_info.symbols.WhereInSection('t').WhereIsGroup().SortedByCount() |
| if new_name not in ('__tcf_0', 'startup'): |
| full_name = new_name |
| symbol.flags |= models.FLAG_CLONE |
| # Remove .part / .isra / .constprop. |
| idx = full_name.rfind('.', 0, idx) |
| if idx != -1: |
| full_name = full_name[:idx] |
| |
| # E.g.: vtable for FOO |
| idx = full_name.find(' for ', 0, 30) |
| if idx != -1: |
| found_prefixes.add(full_name[:idx + 4]) |
| full_name = '{} [{}]'.format(full_name[idx + 5:], full_name[:idx]) |
| |
| # E.g.: virtual thunk to FOO |
| idx = full_name.find(' to ', 0, 30) |
| if idx != -1: |
| found_prefixes.add(full_name[:idx + 3]) |
| full_name = '{} [{}]'.format(full_name[idx + 4:], full_name[:idx]) |
| |
| # Strip out return type, and split out name, template_name. |
| # Function parsing also applies to non-text symbols. |
| # E.g. Function statics. |
| symbol.full_name, symbol.template_name, symbol.name = ( |
| function_signature.Parse(full_name)) |
| |
| # Remove anonymous namespaces (they just harm clustering). |
| symbol.template_name = symbol.template_name.replace( |
| '(anonymous namespace)::', '') |
| symbol.full_name = symbol.full_name.replace( |
| '(anonymous namespace)::', '') |
| non_anonymous_name = symbol.name.replace('(anonymous namespace)::', '') |
| if symbol.name != non_anonymous_name: |
| symbol.flags |= models.FLAG_ANONYMOUS |
| symbol.name = non_anonymous_name |
| |
| # Allow using "is" to compare names (and should help with RAM). This applies |
| # to all symbols. |
| function_signature.InternSameNames(symbol) |
| |
| logging.debug('Found name prefixes of: %r', found_prefixes) |
| |
| |
| def _NormalizeObjectPath(path): |
| """Normalizes object paths. |
| |
| Prefixes are removed: obj/, ../../ |
| Archive names made more pathy: foo/bar.a(baz.o) -> foo/bar.a/baz.o |
| """ |
| if path.startswith('obj/'): |
| # Convert obj/third_party/... -> third_party/... |
| path = path[4:] |
| elif path.startswith('../../'): |
| # Convert ../../third_party/... -> third_party/... |
| path = path[6:] |
| if path.endswith(')'): |
| # Convert foo/bar.a(baz.o) -> foo/bar.a/baz.o so that hierarchical |
| # breakdowns consider the .o part to be a separate node. |
| start_idx = path.rindex('(') |
| path = os.path.join(path[:start_idx], path[start_idx + 1:-1]) |
| return path |
| |
| |
| def _NormalizeSourcePath(path): |
| """Returns (is_generated, normalized_path)""" |
| if path.startswith('gen/'): |
| # Convert gen/third_party/... -> third_party/... |
| return True, path[4:] |
| if path.startswith('../../'): |
| # Convert ../../third_party/... -> third_party/... |
| return False, path[6:] |
| return True, path |
| |
| |
| def _ExtractSourcePathsAndNormalizeObjectPaths(raw_symbols, source_mapper): |
| """Fills in the |source_path| attribute and normalizes |object_path|.""" |
| if source_mapper: |
| logging.info('Looking up source paths from ninja files') |
| for symbol in raw_symbols: |
| object_path = symbol.object_path |
| if symbol.IsDex() or symbol.IsOther(): |
| if symbol.source_path: |
| symbol.generated_source, symbol.source_path = _NormalizeSourcePath( |
| symbol.source_path) |
| elif object_path: |
| # We don't have source info for prebuilt .a files. |
| if not os.path.isabs(object_path) and not object_path.startswith('..'): |
| source_path = source_mapper.FindSourceForPath(object_path) |
| if source_path: |
| symbol.generated_source, symbol.source_path = ( |
| _NormalizeSourcePath(source_path)) |
| symbol.object_path = _NormalizeObjectPath(object_path) |
| assert source_mapper.unmatched_paths_count == 0, ( |
| 'One or more source file paths could not be found. Likely caused by ' |
| '.ninja files being generated at a different time than the .map file.') |
| else: |
| logging.info('Normalizing object paths') |
| for symbol in raw_symbols: |
| if symbol.object_path: |
| symbol.object_path = _NormalizeObjectPath(symbol.object_path) |
| |
| |
| def _ComputeAncestorPath(path_list, symbol_count): |
| """Returns the common ancestor of the given paths.""" |
| if not path_list: |
| return '' |
| |
| prefix = os.path.commonprefix(path_list) |
| # Check if all paths were the same. |
| if prefix == path_list[0]: |
| return prefix |
| |
| # Put in buckets to cut down on the number of unique paths. |
| if symbol_count >= 100: |
| symbol_count_str = '100+' |
| elif symbol_count >= 50: |
| symbol_count_str = '50-99' |
| elif symbol_count >= 20: |
| symbol_count_str = '20-49' |
| elif symbol_count >= 10: |
| symbol_count_str = '10-19' |
| else: |
| symbol_count_str = str(symbol_count) |
| |
| # Put the path count as a subdirectory so that grouping by path will show |
| # "{shared}" as a bucket, and the symbol counts as leafs. |
| if not prefix: |
| return os.path.join('{shared}', symbol_count_str) |
| return os.path.join(os.path.dirname(prefix), '{shared}', symbol_count_str) |
| |
| |
| def _CompactLargeAliasesIntoSharedSymbols(raw_symbols, knobs): |
| """Converts symbols with large number of aliases into single symbols. |
| |
| The merged symbol's path fields are changed to common-ancestor paths in |
| the form: common/dir/{shared}/$SYMBOL_COUNT |
| |
| Assumes aliases differ only by path (not by name). |
| """ |
| num_raw_symbols = len(raw_symbols) |
| num_shared_symbols = 0 |
| src_cursor = 0 |
| dst_cursor = 0 |
| while src_cursor < num_raw_symbols: |
| symbol = raw_symbols[src_cursor] |
| raw_symbols[dst_cursor] = symbol |
| dst_cursor += 1 |
| aliases = symbol.aliases |
| if aliases and len(aliases) > knobs.max_same_name_alias_count: |
| symbol.source_path = _ComputeAncestorPath( |
| [s.source_path for s in aliases if s.source_path], len(aliases)) |
| symbol.object_path = _ComputeAncestorPath( |
| [s.object_path for s in aliases if s.object_path], len(aliases)) |
| symbol.generated_source = all(s.generated_source for s in aliases) |
| symbol.aliases = None |
| num_shared_symbols += 1 |
| src_cursor += len(aliases) |
| else: |
| src_cursor += 1 |
| raw_symbols[dst_cursor:] = [] |
| num_removed = src_cursor - dst_cursor |
| logging.debug('Converted %d aliases into %d shared-path symbols', |
| num_removed, num_shared_symbols) |
| |
| |
| def _ConnectNmAliases(raw_symbols): |
| """Ensures |aliases| is set correctly for all symbols.""" |
| prev_sym = raw_symbols[0] |
| for sym in raw_symbols[1:]: |
| # Don't merge bss symbols. |
| if sym.address > 0 and prev_sym.address == sym.address: |
| # Don't merge padding-only symbols (** symbol gaps). |
| if prev_sym.size > 0: |
| # Don't merge if already merged. |
| if prev_sym.aliases is None or prev_sym.aliases is not sym.aliases: |
| if prev_sym.aliases: |
| prev_sym.aliases.append(sym) |
| else: |
| prev_sym.aliases = [prev_sym, sym] |
| sym.aliases = prev_sym.aliases |
| prev_sym = sym |
| |
| |
| def _AssignNmAliasPathsAndCreatePathAliases(raw_symbols, object_paths_by_name): |
| num_found_paths = 0 |
| num_unknown_names = 0 |
| num_path_mismatches = 0 |
| num_aliases_created = 0 |
| ret = [] |
| for symbol in raw_symbols: |
| ret.append(symbol) |
| full_name = symbol.full_name |
| # Don't skip if symbol.IsBss(). This is needed for LLD-LTO to work, since |
| # .bss object_path data are unavailable for linker_map_parser, and need to |
| # be extracted here. For regular LLD flow, incorrect aliased symbols can |
| # arise. But that's a lesser evil compared to having LLD-LTO .bss missing |
| # object_path and source_path. |
| # TODO(huangs): Fix aliased symbols for the LLD case. |
| if (symbol.IsStringLiteral() or |
| not full_name or |
| full_name[0] in '*.' or # e.g. ** merge symbols, .Lswitch.table |
| full_name == 'startup'): |
| continue |
| |
| object_paths = object_paths_by_name.get(full_name) |
| if object_paths: |
| num_found_paths += 1 |
| else: |
| if num_unknown_names < 10: |
| logging.warning('Symbol not found in any .o files: %r', symbol) |
| num_unknown_names += 1 |
| continue |
| |
| if symbol.object_path and symbol.object_path not in object_paths: |
| if num_path_mismatches < 10: |
| logging.warning('Symbol path reported by .map not found by nm.') |
| logging.warning('sym=%r', symbol) |
| logging.warning('paths=%r', object_paths) |
| object_paths.append(symbol.object_path) |
| object_paths.sort() |
| num_path_mismatches += 1 |
| |
| symbol.object_path = object_paths[0] |
| |
| if len(object_paths) > 1: |
| # Create one symbol for each object_path. |
| aliases = symbol.aliases or [symbol] |
| symbol.aliases = aliases |
| num_aliases_created += len(object_paths) - 1 |
| for object_path in object_paths[1:]: |
| new_sym = models.Symbol( |
| symbol.section_name, symbol.size, address=symbol.address, |
| full_name=full_name, object_path=object_path, aliases=aliases) |
| aliases.append(new_sym) |
| ret.append(new_sym) |
| |
| logging.debug('Cross-referenced %d symbols with nm output. ' |
| 'num_unknown_names=%d num_path_mismatches=%d ' |
| 'num_aliases_created=%d', |
| num_found_paths, num_unknown_names, num_path_mismatches, |
| num_aliases_created) |
| return ret |
| |
| |
| def _DiscoverMissedObjectPaths(raw_symbols, known_inputs): |
| # Missing object paths are caused by .a files added by -l flags, which are not |
| # listed as explicit inputs within .ninja rules. |
| missed_inputs = set() |
| for symbol in raw_symbols: |
| path = symbol.object_path |
| if path.endswith(')'): |
| # Convert foo/bar.a(baz.o) -> foo/bar.a |
| path = path[:path.rindex('(')] |
| if path and path not in known_inputs: |
| missed_inputs.add(path) |
| return missed_inputs |
| |
| |
| def _CreateMergeStringsReplacements(merge_string_syms, |
| list_of_positions_by_object_path): |
| """Creates replacement symbols for |merge_syms|.""" |
| ret = [] |
| STRING_LITERAL_NAME = models.STRING_LITERAL_NAME |
| assert len(merge_string_syms) == len(list_of_positions_by_object_path) |
| tups = itertools.izip(merge_string_syms, list_of_positions_by_object_path) |
| for merge_sym, positions_by_object_path in tups: |
| merge_sym_address = merge_sym.address |
| new_symbols = [] |
| ret.append(new_symbols) |
| for object_path, positions in positions_by_object_path.iteritems(): |
| for offset, size in positions: |
| address = merge_sym_address + offset |
| symbol = models.Symbol( |
| models.SECTION_RODATA, size, address, STRING_LITERAL_NAME, |
| object_path=object_path) |
| new_symbols.append(symbol) |
| |
| logging.debug('Created %d string literal symbols', sum(len(x) for x in ret)) |
| logging.debug('Sorting string literals') |
| for symbols in ret: |
| # In order to achieve a total ordering in the presense of aliases, need to |
| # include both |address| and |object_path|. |
| # In order to achieve consistent deduping, need to include |size|. |
| symbols.sort(key=lambda x: (x.address, -x.size, x.object_path)) |
| |
| logging.debug('Deduping string literals') |
| num_removed = 0 |
| size_removed = 0 |
| num_aliases = 0 |
| for i, symbols in enumerate(ret): |
| if not symbols: |
| continue |
| prev_symbol = symbols[0] |
| new_symbols = [prev_symbol] |
| for symbol in symbols[1:]: |
| padding = symbol.address - prev_symbol.end_address |
| if (prev_symbol.address == symbol.address and |
| prev_symbol.size == symbol.size): |
| # String is an alias. |
| num_aliases += 1 |
| aliases = prev_symbol.aliases |
| if aliases: |
| aliases.append(symbol) |
| symbol.aliases = aliases |
| else: |
| aliases = [prev_symbol, symbol] |
| prev_symbol.aliases = aliases |
| symbol.aliases = aliases |
| elif padding + symbol.size <= 0: |
| # String is a substring of prior one. |
| num_removed += 1 |
| size_removed += symbol.size |
| continue |
| elif padding < 0: |
| # String overlaps previous one. Adjust to not overlap. |
| symbol.address -= padding |
| symbol.size += padding |
| new_symbols.append(symbol) |
| prev_symbol = symbol |
| ret[i] = new_symbols |
| # Aliases come out in random order, so sort to be deterministic. |
| ret[i].sort(key=lambda s: (s.address, s.object_path)) |
| |
| logging.debug( |
| 'Removed %d overlapping string literals (%d bytes) & created %d aliases', |
| num_removed, size_removed, num_aliases) |
| return ret |
| |
| |
| def _CalculatePadding(raw_symbols): |
| """Populates the |padding| field based on symbol addresses. |
| |
| Symbols must already be sorted by |address|. |
| """ |
| seen_sections = set() |
| for i, symbol in enumerate(raw_symbols[1:]): |
| prev_symbol = raw_symbols[i] |
| if symbol.IsOverhead(): |
| # Overhead symbols are not actionable so should be padding-only. |
| symbol.padding = symbol.size |
| if prev_symbol.section_name != symbol.section_name: |
| assert symbol.section_name not in seen_sections, ( |
| 'Input symbols must be sorted by section, then address.') |
| seen_sections.add(symbol.section_name) |
| continue |
| if (symbol.address <= 0 or prev_symbol.address <= 0 or |
| not symbol.IsNative() or not prev_symbol.IsNative()): |
| continue |
| |
| if symbol.address == prev_symbol.address: |
| if symbol.aliases and symbol.aliases is prev_symbol.aliases: |
| symbol.padding = prev_symbol.padding |
| symbol.size = prev_symbol.size |
| continue |
| # Padding-only symbols happen for ** symbol gaps. |
| assert prev_symbol.size_without_padding == 0, ( |
| 'Found duplicate symbols:\n%r\n%r' % (prev_symbol, symbol)) |
| |
| padding = symbol.address - prev_symbol.end_address |
| # These thresholds were found by experimenting with arm32 Chrome. |
| # E.g.: Set them to 0 and see what warnings get logged, then take max value. |
| # TODO(agrieve): See if these thresholds make sense for architectures |
| # other than arm32. |
| if (not symbol.full_name.startswith('*') and |
| not symbol.IsStringLiteral() and ( |
| symbol.section in 'rd' and padding >= 256 or |
| symbol.section in 't' and padding >= 64)): |
| # Should not happen. |
| logging.warning('Large padding of %d between:\n A) %r\n B) %r' % ( |
| padding, prev_symbol, symbol)) |
| symbol.padding = padding |
| symbol.size += padding |
| assert symbol.size >= 0, ( |
| 'Symbol has negative size (likely not sorted propertly): ' |
| '%r\nprev symbol: %r' % (symbol, prev_symbol)) |
| |
| |
| def _ParseComponentFromOwners(filename): |
| """Searches an OWNERS file for lines that start with `# COMPONENT:`. |
| |
| If an OWNERS file has no COMPONENT but references another OWNERS file, follow |
| the reference and check that file instead. |
| |
| Args: |
| filename: Path to the file to parse. |
| Returns: |
| The text that follows the `# COMPONENT:` prefix, such as 'component>name'. |
| Empty string if no component found or the file didn't exist. |
| """ |
| reference_paths = [] |
| try: |
| with open(filename) as f: |
| for line in f: |
| component_matches = _COMPONENT_REGEX.match(line) |
| path_matches = _FILE_PATH_REGEX.match(line) |
| if component_matches: |
| return component_matches.group(1) |
| elif path_matches: |
| reference_paths.append(path_matches.group(1)) |
| except IOError: |
| return '' |
| |
| if len(reference_paths) == 1: |
| newpath = os.path.join(path_util.SRC_ROOT, reference_paths[0]) |
| return _ParseComponentFromOwners(newpath) |
| else: |
| return '' |
| |
| |
| def _FindComponentRoot(start_path, cache, knobs): |
| """Searches all parent directories for COMPONENT in OWNERS files. |
| |
| Args: |
| start_path: Path of directory to start searching from. Must be relative to |
| SRC_ROOT. |
| cache: Dict of OWNERS paths. Used instead of filesystem if paths are present |
| in the dict. |
| knobs: Instance of SectionSizeKnobs with tunable knobs and options. |
| |
| Returns: |
| COMPONENT belonging to |start_path|, or empty string if not found. |
| """ |
| prev_dir = None |
| test_dir = start_path |
| # This loop will traverse the directory structure upwards until reaching |
| # SRC_ROOT, where test_dir and prev_dir will both equal an empty string. |
| while test_dir != prev_dir: |
| cached_component = cache.get(test_dir) |
| if cached_component: |
| return cached_component |
| elif cached_component is None: |
| owners_path = os.path.join(knobs.src_root, test_dir, _OWNERS_FILENAME) |
| component = _ParseComponentFromOwners(owners_path) |
| cache[test_dir] = component |
| if component: |
| return component |
| prev_dir = test_dir |
| test_dir = os.path.dirname(test_dir) |
| return '' |
| |
| |
| def _PopulateComponents(raw_symbols, knobs): |
| """Populates the |component| field based on |source_path|. |
| |
| Symbols without a |source_path| are skipped. |
| |
| Args: |
| raw_symbols: list of Symbol objects. |
| knobs: Instance of SectionSizeKnobs. Tunable knobs and options. |
| """ |
| seen_paths = {} |
| for symbol in raw_symbols: |
| if symbol.source_path: |
| folder_path = os.path.dirname(symbol.source_path) |
| symbol.component = _FindComponentRoot(folder_path, seen_paths, knobs) |
| |
| |
| def _UpdateSymbolNamesFromNm(raw_symbols, names_by_address): |
| """Updates raw_symbols names with extra information from nm.""" |
| logging.debug('Update symbol names') |
| # linker_map_parser extracts '** outlined function' without knowing how many |
| # such symbols exist at each address. nm has this information, and stores the |
| # value as, e.g., '** outlined function * 5'. Copy the information over. |
| for s in raw_symbols: |
| if s.full_name.startswith('** outlined function'): |
| name_list = names_by_address.get(s.address) |
| if name_list: |
| for name in name_list: |
| if name.startswith('** outlined function'): |
| s.full_name = name |
| break |
| |
| |
| def _AddNmAliases(raw_symbols, names_by_address): |
| """Adds symbols that were removed by identical code folding.""" |
| # Step 1: Create list of (index_of_symbol, name_list). |
| logging.debug('Creating alias list') |
| replacements = [] |
| num_new_symbols = 0 |
| missing_names = collections.defaultdict(list) |
| for i, s in enumerate(raw_symbols): |
| # Don't alias padding-only symbols (e.g. ** symbol gap) |
| if s.size_without_padding == 0: |
| continue |
| name_list = names_by_address.get(s.address) |
| if name_list: |
| if s.full_name not in name_list: |
| missing_names[s.full_name].append(s.address) |
| logging.warning('Name missing from aliases: %s %s', s.full_name, |
| name_list) |
| continue |
| replacements.append((i, name_list)) |
| num_new_symbols += len(name_list) - 1 |
| |
| if missing_names and logging.getLogger().isEnabledFor(logging.INFO): |
| for address, names in names_by_address.iteritems(): |
| for name in names: |
| if name in missing_names: |
| logging.info('Missing name %s is at address %x instead of [%s]' % |
| (name, address, ','.join('%x' % a for a in missing_names[name]))) |
| |
| if float(num_new_symbols) / len(raw_symbols) < .05: |
| logging.warning('Number of aliases is oddly low (%.0f%%). It should ' |
| 'usually be around 25%%. Ensure --tool-prefix is correct. ', |
| float(num_new_symbols) / len(raw_symbols) * 100) |
| |
| # Step 2: Create new symbols as siblings to each existing one. |
| logging.debug('Creating %d new symbols from nm output', num_new_symbols) |
| expected_num_symbols = len(raw_symbols) + num_new_symbols |
| ret = [] |
| prev_src = 0 |
| for cur_src, name_list in replacements: |
| ret += raw_symbols[prev_src:cur_src] |
| prev_src = cur_src + 1 |
| sym = raw_symbols[cur_src] |
| # Create symbols (|sym| gets recreated and discarded). |
| new_syms = [] |
| for full_name in name_list: |
| # Do not set |aliases| in order to avoid being pruned by |
| # _CompactLargeAliasesIntoSharedSymbols(), which assumes aliases differ |
| # only by path. The field will be set afterwards by _ConnectNmAliases(). |
| new_syms.append(models.Symbol( |
| sym.section_name, sym.size, address=sym.address, full_name=full_name)) |
| ret += new_syms |
| ret += raw_symbols[prev_src:] |
| assert expected_num_symbols == len(ret) |
| return ret |
| |
| |
| def LoadAndPostProcessSizeInfo(path, file_obj=None): |
| """Returns a SizeInfo for the given |path|.""" |
| logging.debug('Loading results from: %s', path) |
| size_info = file_format.LoadSizeInfo(path, file_obj=file_obj) |
| logging.info('Normalizing symbol names') |
| _NormalizeNames(size_info.raw_symbols) |
| logging.info('Calculating padding') |
| _CalculatePadding(size_info.raw_symbols) |
| logging.info('Loaded %d symbols', len(size_info.raw_symbols)) |
| return size_info |
| |
| |
| def CreateMetadata(map_path, elf_path, apk_path, tool_prefix, output_directory, |
| linker_name): |
| """Creates metadata dict. |
| |
| Args: |
| map_path: Path to the linker .map(.gz) file to parse. |
| elf_path: Path to the corresponding unstripped ELF file. Used to find symbol |
| aliases and inlined functions. Can be None. |
| apk_path: Path to the .apk file to measure. |
| tool_prefix: Prefix for c++filt & nm. |
| output_directory: Build output directory. |
| linker_name: A coded linker name (see linker_map_parser.py). |
| |
| Returns: |
| None if |elf_path| is not supplied. Otherwise returns dict mapping string |
| constants to values. |
| If |elf_path| is supplied, git revision and elf info are included. |
| If |output_directory| is also supplied, then filenames will be included. |
| """ |
| metadata = None |
| if elf_path: |
| logging.debug('Constructing metadata') |
| git_rev = _DetectGitRevision(os.path.dirname(elf_path)) |
| architecture = _ArchFromElf(elf_path, tool_prefix) |
| build_id = BuildIdFromElf(elf_path, tool_prefix) |
| timestamp_obj = datetime.datetime.utcfromtimestamp(os.path.getmtime( |
| elf_path)) |
| timestamp = calendar.timegm(timestamp_obj.timetuple()) |
| relative_tool_prefix = path_util.ToSrcRootRelative(tool_prefix) |
| |
| metadata = { |
| models.METADATA_GIT_REVISION: git_rev, |
| models.METADATA_ELF_ARCHITECTURE: architecture, |
| models.METADATA_ELF_MTIME: timestamp, |
| models.METADATA_ELF_BUILD_ID: build_id, |
| models.METADATA_LINKER_NAME: linker_name, |
| models.METADATA_TOOL_PREFIX: relative_tool_prefix, |
| } |
| |
| if output_directory: |
| relative_to_out = lambda path: os.path.relpath(path, output_directory) |
| gn_args = _ParseGnArgs(os.path.join(output_directory, 'args.gn')) |
| metadata[models.METADATA_MAP_FILENAME] = relative_to_out(map_path) |
| metadata[models.METADATA_ELF_FILENAME] = relative_to_out(elf_path) |
| metadata[models.METADATA_GN_ARGS] = gn_args |
| |
| if apk_path: |
| metadata[models.METADATA_APK_FILENAME] = relative_to_out(apk_path) |
| metadata[models.METADATA_APK_SIZE] = os.path.getsize(apk_path) |
| return metadata |
| |
| |
| def _ResolveThinArchivePaths(raw_symbols, thin_archives): |
| """Converts object_paths for thin archives to external .o paths.""" |
| for symbol in raw_symbols: |
| object_path = symbol.object_path |
| if object_path.endswith(')'): |
| start_idx = object_path.rindex('(') |
| archive_path = object_path[:start_idx] |
| if archive_path in thin_archives: |
| subpath = object_path[start_idx + 1:-1] |
| symbol.object_path = ar.CreateThinObjectPath(archive_path, subpath) |
| |
| |
| def _ParseElfInfo(map_path, elf_path, tool_prefix, track_string_literals, |
| outdir_context=None, linker_name=None): |
| """Adds ELF section sizes and symbols.""" |
| if elf_path: |
| # Run nm on the elf file to retrieve the list of symbol names per-address. |
| # This list is required because the .map file contains only a single name |
| # for each address, yet multiple symbols are often coalesced when they are |
| # identical. This coalescing happens mainly for small symbols and for C++ |
| # templates. Such symbols make up ~500kb of libchrome.so on Android. |
| elf_nm_result = nm.CollectAliasesByAddressAsync(elf_path, tool_prefix) |
| |
| # Run nm on all .o/.a files to retrieve the symbol names within them. |
| # The list is used to detect when mutiple .o files contain the same symbol |
| # (e.g. inline functions), and to update the object_path / source_path |
| # fields accordingly. |
| # Looking in object files is required because the .map file choses a |
| # single path for these symbols. |
| # Rather than record all paths for each symbol, set the paths to be the |
| # common ancestor of all paths. |
| if outdir_context: |
| bulk_analyzer = obj_analyzer.BulkObjectFileAnalyzer( |
| tool_prefix, outdir_context.output_directory, |
| track_string_literals=track_string_literals) |
| bulk_analyzer.AnalyzePaths(outdir_context.elf_object_paths) |
| |
| logging.info('Parsing Linker Map') |
| with _OpenMaybeGz(map_path) as map_file: |
| section_sizes, raw_symbols = ( |
| linker_map_parser.MapFileParser().Parse(linker_name, map_file)) |
| |
| if outdir_context and outdir_context.thin_archives: |
| _ResolveThinArchivePaths(raw_symbols, outdir_context.thin_archives) |
| |
| if elf_path: |
| logging.debug('Validating section sizes') |
| elf_section_sizes = _SectionSizesFromElf(elf_path, tool_prefix) |
| for k, v in elf_section_sizes.iteritems(): |
| if v != section_sizes.get(k): |
| logging.error('ELF file and .map file do not agree on section sizes.') |
| logging.error('.map file: %r', section_sizes) |
| logging.error('readelf: %r', elf_section_sizes) |
| sys.exit(1) |
| |
| if elf_path and outdir_context: |
| missed_object_paths = _DiscoverMissedObjectPaths( |
| raw_symbols, outdir_context.known_inputs) |
| missed_object_paths = ar.ExpandThinArchives( |
| missed_object_paths, outdir_context.output_directory)[0] |
| bulk_analyzer.AnalyzePaths(missed_object_paths) |
| bulk_analyzer.SortPaths() |
| if track_string_literals: |
| merge_string_syms = [s for s in raw_symbols if |
| s.full_name == '** merge strings' or |
| s.full_name == '** lld merge strings'] |
| # More likely for there to be a bug in supersize than an ELF to not have a |
| # single string literal. |
| assert merge_string_syms |
| string_ranges = [(s.address, s.size) for s in merge_string_syms] |
| bulk_analyzer.AnalyzeStringLiterals(elf_path, string_ranges) |
| |
| logging.info('Stripping linker prefixes from symbol names') |
| _StripLinkerAddedSymbolPrefixes(raw_symbols) |
| # Map file for some reason doesn't demangle all names. |
| # Demangle prints its own log statement. |
| demangle.DemangleRemainingSymbols(raw_symbols, tool_prefix) |
| |
| object_paths_by_name = {} |
| if elf_path: |
| logging.info( |
| 'Adding symbols removed by identical code folding (as reported by nm)') |
| # This normally does not block (it's finished by this time). |
| names_by_address = elf_nm_result.get() |
| _UpdateSymbolNamesFromNm(raw_symbols, names_by_address) |
| |
| raw_symbols = _AddNmAliases(raw_symbols, names_by_address) |
| |
| if outdir_context: |
| object_paths_by_name = bulk_analyzer.GetSymbolNames() |
| logging.debug( |
| 'Fetched path information for %d symbols from %d files', |
| len(object_paths_by_name), |
| len(outdir_context.elf_object_paths) + len(missed_object_paths)) |
| # For aliases, this provides path information where there wasn't any. |
| logging.info('Creating aliases for symbols shared by multiple paths') |
| raw_symbols = _AssignNmAliasPathsAndCreatePathAliases( |
| raw_symbols, object_paths_by_name) |
| |
| if track_string_literals: |
| logging.info('Waiting for string literal extraction to complete.') |
| list_of_positions_by_object_path = bulk_analyzer.GetStringPositions() |
| bulk_analyzer.Close() |
| |
| if track_string_literals: |
| logging.info('Deconstructing ** merge strings into literals') |
| replacements = _CreateMergeStringsReplacements(merge_string_syms, |
| list_of_positions_by_object_path) |
| for merge_sym, literal_syms in itertools.izip( |
| merge_string_syms, replacements): |
| # Don't replace if no literals were found. |
| if literal_syms: |
| # Re-find the symbols since aliases cause their indices to change. |
| idx = raw_symbols.index(merge_sym) |
| # This assignment is a bit slow (causes array to be shifted), but |
| # is fast enough since len(merge_string_syms) < 10. |
| raw_symbols[idx:idx + 1] = literal_syms |
| |
| return section_sizes, raw_symbols, object_paths_by_name |
| |
| |
| def _ComputePakFileSymbols( |
| file_name, contents, res_info, symbols_by_id, compression_ratio=1): |
| id_map = {id(v): k |
| for k, v in sorted(contents.resources.items(), reverse=True)} |
| alias_map = {k: id_map[id(v)] for k, v in contents.resources.iteritems() |
| if id_map[id(v)] != k} |
| # Longest locale pak is es-419.pak |
| if len(os.path.basename(file_name)) <= 9: |
| section_name = models.SECTION_PAK_TRANSLATIONS |
| else: |
| section_name = models.SECTION_PAK_NONTRANSLATED |
| overhead = (12 + 6) * compression_ratio # Header size plus extra offset |
| symbols_by_id[hash(file_name)] = models.Symbol( |
| section_name, overhead, full_name='Overhead: {}'.format(file_name)) |
| for resource_id in sorted(contents.resources): |
| if resource_id in alias_map: |
| # 4 extra bytes of metadata (2 16-bit ints) |
| size = 4 |
| resource_id = alias_map[resource_id] |
| else: |
| resource_data = contents.resources[resource_id] |
| # 6 extra bytes of metadata (1 32-bit int, 1 16-bit int) |
| size = len(resource_data) + 6 |
| name, source_path = res_info[resource_id] |
| if resource_id not in symbols_by_id: |
| full_name = '{}: {}'.format(source_path, name) |
| new_symbol = models.Symbol( |
| section_name, 0, address=resource_id, full_name=full_name) |
| if (section_name == models.SECTION_PAK_NONTRANSLATED and |
| _IsPakContentUncompressed(resource_data)): |
| new_symbol.flags |= models.FLAG_UNCOMPRESSED |
| symbols_by_id[resource_id] = new_symbol |
| |
| size *= compression_ratio |
| symbols_by_id[resource_id].size += size |
| |
| |
| def _IsPakContentUncompressed(content): |
| raw_size = len(content) |
| # Assume anything less than 100 bytes cannot be compressed. |
| if raw_size < 100: |
| return False |
| |
| compressed_size = len(zlib.compress(content, 1)) |
| compression_ratio = compressed_size / float(raw_size) |
| return compression_ratio < _UNCOMPRESSED_COMPRESSION_RATIO_THRESHOLD |
| |
| |
| class _ResourceSourceMapper(object): |
| def __init__(self, apk_path, output_directory, knobs): |
| self._knobs = knobs |
| self._res_info = self._LoadResInfo(apk_path, output_directory) |
| self._pattern_dollar_underscore = re.compile(r'\$(.*?)__\d+') |
| self._pattern_version_suffix = re.compile(r'-v\d+/') |
| |
| @staticmethod |
| def _ParseResInfoFile(res_info_path): |
| with open(res_info_path, 'r') as info_file: |
| res_info = {} |
| renames = {} |
| for line in info_file.readlines(): |
| dest, source = line.strip().split(',') |
| # Allow indirection due to renames. |
| if dest.startswith('Rename:'): |
| dest = dest.split(':', 1)[1] |
| renames[dest] = source |
| else: |
| res_info[dest] = source |
| for dest, renamed_dest in renames.iteritems(): |
| # Allow one more level of indirection due to renaming renamed files |
| renamed_dest = renames.get(renamed_dest, renamed_dest) |
| actual_source = res_info.get(renamed_dest) |
| if actual_source: |
| res_info[dest] = actual_source |
| return res_info |
| |
| def _LoadResInfo(self, apk_path, output_directory): |
| apk_name = os.path.basename(apk_path) |
| apk_res_info_name = apk_name + '.res.info' |
| apk_res_info_path = os.path.join( |
| output_directory, 'size-info', apk_res_info_name) |
| res_info_without_root = self._ParseResInfoFile(apk_res_info_path) |
| # We package resources in the res/ folder only in the apk. |
| res_info = { |
| os.path.join('res', dest): source |
| for dest, source in res_info_without_root.iteritems() |
| } |
| res_info.update(self._knobs.apk_other_files) |
| return res_info |
| |
| def FindSourceForPath(self, path): |
| original_path = path |
| # Sometimes android adds $ in front and __# before extension. |
| path = self._pattern_dollar_underscore.sub(r'\1', path) |
| ret = self._res_info.get(path) |
| if ret: |
| return ret |
| # Android build tools may append extra -v flags for the root dir. |
| path = self._pattern_version_suffix.sub('/', path) |
| ret = self._res_info.get(path) |
| if ret: |
| return ret |
| if original_path not in self._knobs.apk_expected_other_files: |
| logging.warning('Unexpected file in apk: %s', original_path) |
| return None |
| |
| |
| def _ParsePakInfoFile(pak_info_path): |
| with open(pak_info_path, 'r') as info_file: |
| res_info = {} |
| for line in info_file.readlines(): |
| name, res_id, path = line.split(',') |
| res_info[int(res_id)] = (name, path.strip()) |
| return res_info |
| |
| |
| def _ParsePakSymbols( |
| section_sizes, symbols_by_id, object_paths_by_pak_id): |
| raw_symbols = [] |
| for resource_id, symbol in symbols_by_id.iteritems(): |
| raw_symbols.append(symbol) |
| paths = object_paths_by_pak_id.get(resource_id) |
| if not paths: |
| continue |
| symbol.object_path = paths.pop() |
| if not paths: |
| continue |
| aliases = symbol.aliases or [symbol] |
| symbol.aliases = aliases |
| for path in paths: |
| new_sym = models.Symbol( |
| symbol.section_name, symbol.size, address=symbol.address, |
| full_name=symbol.full_name, object_path=path, aliases=aliases) |
| aliases.append(new_sym) |
| raw_symbols.append(new_sym) |
| raw_symbols.sort(key=lambda s: (s.section_name, s.address, s.object_path)) |
| raw_total = 0.0 |
| int_total = 0 |
| for symbol in raw_symbols: |
| raw_total += symbol.size |
| # We truncate rather than round to ensure that we do not over attribute. It |
| # is easier to add another symbol to make up the difference. |
| symbol.size = int(symbol.size) |
| int_total += symbol.size |
| # Attribute excess to translations since only those are compressed. |
| raw_symbols.append(models.Symbol( |
| models.SECTION_PAK_TRANSLATIONS, int(round(raw_total - int_total)), |
| full_name='Overhead: Pak compression artifacts')) |
| |
| for symbol in raw_symbols: |
| prev = section_sizes.setdefault(symbol.section_name, 0) |
| section_sizes[symbol.section_name] = prev + symbol.size |
| return raw_symbols |
| |
| |
| def _ParseApkElfSectionSize(section_sizes, metadata, apk_elf_result): |
| if metadata: |
| logging.debug('Extracting section sizes from .so within .apk') |
| apk_build_id, apk_section_sizes, elf_overhead_size = apk_elf_result.get() |
| assert apk_build_id == metadata[models.METADATA_ELF_BUILD_ID], ( |
| 'BuildID from apk_elf_result did not match') |
| |
| packed_section_name = None |
| architecture = metadata[models.METADATA_ELF_ARCHITECTURE] |
| # Packing occurs enabled only arm32 & arm64. |
| if architecture == 'arm': |
| packed_section_name = '.rel.dyn' |
| elif architecture == 'arm64': |
| packed_section_name = '.rela.dyn' |
| |
| if packed_section_name: |
| logging.debug('Recording size of unpacked relocations') |
| if packed_section_name not in section_sizes: |
| logging.warning('Packed section not present: %s', packed_section_name) |
| else: |
| apk_section_sizes['%s (unpacked)' % packed_section_name] = ( |
| section_sizes.get(packed_section_name)) |
| return apk_section_sizes, elf_overhead_size |
| return section_sizes, 0 |
| |
| |
| def _ParseDexSymbols(section_sizes, apk_path, output_directory): |
| symbols = apkanalyzer.CreateDexSymbols(apk_path, output_directory) |
| prev = section_sizes.setdefault(models.SECTION_DEX, 0) |
| section_sizes[models.SECTION_DEX] = prev + sum(s.size for s in symbols) |
| return symbols |
| |
| |
| def _ParseApkOtherSymbols(section_sizes, apk_path, apk_so_path, |
| output_directory, knobs): |
| res_source_mapper = _ResourceSourceMapper(apk_path, output_directory, knobs) |
| apk_symbols = [] |
| zip_info_total = 0 |
| with zipfile.ZipFile(apk_path) as z: |
| for zip_info in z.infolist(): |
| zip_info_total += zip_info.compress_size |
| # Skip main shared library, pak, and dex files as they are accounted for. |
| if (zip_info.filename == apk_so_path |
| or zip_info.filename.endswith('.dex') |
| or zip_info.filename.endswith('.pak')): |
| continue |
| source_path = res_source_mapper.FindSourceForPath(zip_info.filename) |
| if source_path is None: |
| source_path = os.path.join(models.APK_PREFIX_PATH, zip_info.filename) |
| apk_symbols.append(models.Symbol( |
| models.SECTION_OTHER, zip_info.compress_size, |
| source_path=source_path, |
| full_name=zip_info.filename)) # Full name must disambiguate |
| overhead_size = os.path.getsize(apk_path) - zip_info_total |
| assert overhead_size >= 0, 'Apk overhead must be non-negative' |
| zip_overhead_symbol = models.Symbol( |
| models.SECTION_OTHER, overhead_size, full_name='Overhead: APK file') |
| apk_symbols.append(zip_overhead_symbol) |
| prev = section_sizes.setdefault(models.SECTION_OTHER, 0) |
| section_sizes[models.SECTION_OTHER] = prev + sum(s.size for s in apk_symbols) |
| return apk_symbols |
| |
| |
| def _CreatePakObjectMap(object_paths_by_name): |
| # IDS_ macro usages result in templated function calls that contain the |
| # resource ID in them. These names are collected along with all other symbols |
| # by running "nm" on them. We just need to extract the values from them. |
| object_paths_by_pak_id = {} |
| PREFIX = 'void ui::WhitelistedResource<' |
| id_start_idx = len(PREFIX) |
| id_end_idx = -len('>()') |
| for name in object_paths_by_name: |
| if name.startswith(PREFIX): |
| pak_id = int(name[id_start_idx:id_end_idx]) |
| object_paths_by_pak_id[pak_id] = object_paths_by_name[name] |
| return object_paths_by_pak_id |
| |
| |
| def _FindPakSymbolsFromApk(apk_path, output_directory, knobs): |
| with zipfile.ZipFile(apk_path) as z: |
| pak_zip_infos = (f for f in z.infolist() if f.filename.endswith('.pak')) |
| apk_info_name = os.path.basename(apk_path) + '.pak.info' |
| pak_info_path = os.path.join(output_directory, 'size-info', apk_info_name) |
| res_info = _ParsePakInfoFile(pak_info_path) |
| symbols_by_id = {} |
| total_compressed_size = 0 |
| total_uncompressed_size = 0 |
| for zip_info in pak_zip_infos: |
| contents = data_pack.ReadDataPackFromString(z.read(zip_info)) |
| compression_ratio = 1.0 |
| if zip_info.compress_size < zip_info.file_size: |
| total_compressed_size += zip_info.compress_size |
| total_uncompressed_size += zip_info.file_size |
| compression_ratio = knobs.pak_compression_ratio |
| _ComputePakFileSymbols( |
| zip_info.filename, contents, |
| res_info, symbols_by_id, compression_ratio=compression_ratio) |
| if total_uncompressed_size > 0: |
| actual_ratio = ( |
| float(total_compressed_size) / total_uncompressed_size) |
| logging.info('Pak Compression Ratio: %f Actual: %f Diff: %.0f', |
| knobs.pak_compression_ratio, actual_ratio, |
| (knobs.pak_compression_ratio - actual_ratio) * |
| total_uncompressed_size) |
| return symbols_by_id |
| |
| |
| def _FindPakSymbolsFromFiles(pak_files, pak_info_path, output_directory): |
| """Uses files from args to find and add pak symbols.""" |
| res_info = _ParsePakInfoFile(pak_info_path) |
| symbols_by_id = {} |
| for pak_file_path in pak_files: |
| with open(pak_file_path, 'r') as f: |
| contents = data_pack.ReadDataPackFromString(f.read()) |
| _ComputePakFileSymbols( |
| os.path.relpath(pak_file_path, output_directory), contents, res_info, |
| symbols_by_id) |
| return symbols_by_id |
| |
| |
| def _CalculateElfOverhead(section_sizes, elf_path): |
| if elf_path: |
| section_sizes_total_without_bss = sum( |
| s for k, s in section_sizes.iteritems() if k != models.SECTION_BSS) |
| elf_overhead_size = ( |
| os.path.getsize(elf_path) - section_sizes_total_without_bss) |
| assert elf_overhead_size >= 0, ( |
| 'Negative ELF overhead {}'.format(elf_overhead_size)) |
| return elf_overhead_size |
| return 0 |
| |
| |
| def CreateSectionSizesAndSymbols( |
| map_path=None, tool_prefix=None, output_directory=None, elf_path=None, |
| apk_path=None, track_string_literals=True, metadata=None, |
| apk_so_path=None, pak_files=None, pak_info_file=None, linker_name=None, |
| knobs=SectionSizeKnobs()): |
| """Creates sections sizes and symbols for a SizeInfo. |
| |
| Args: |
| map_path: Path to the linker .map(.gz) file to parse. |
| tool_prefix: Prefix for c++filt & nm (required). |
| output_directory: Build output directory. If None, source_paths and symbol |
| alias information will not be recorded. |
| elf_path: Path to the corresponding unstripped ELF file. Used to find symbol |
| aliases and inlined functions. Can be None. |
| apk_path: Path to the .apk file to measure. |
| track_string_literals: Whether to break down "** merge string" sections into |
| smaller symbols (requires output_directory). |
| metadata: Metadata dict from CreateMetadata(). |
| apk_so_path: Path to an .so file within an APK file. |
| pak_files: List of paths to .pak files. |
| pak_info_file: Path to a .pak.info file. |
| linker_name: A coded linker name (see linker_map_parser.py). |
| knobs: Instance of SectionSizeKnobs with tunable knobs and options. |
| |
| Returns: |
| A tuple of (section_sizes, raw_symbols). |
| section_sizes is a dict mapping section names to their size |
| raw_symbols is a list of Symbol objects |
| """ |
| if apk_path and elf_path: |
| # Extraction takes around 1 second, so do it in parallel. |
| apk_elf_result = concurrent.ForkAndCall( |
| _ElfInfoFromApk, (apk_path, apk_so_path, tool_prefix)) |
| |
| outdir_context = None |
| source_mapper = None |
| if output_directory: |
| # Start by finding the elf_object_paths, so that nm can run on them while |
| # the linker .map is being parsed. |
| logging.info('Parsing ninja files.') |
| source_mapper, ninja_elf_object_paths = ( |
| ninja_parser.Parse(output_directory, elf_path)) |
| logging.debug('Parsed %d .ninja files.', source_mapper.parsed_file_count) |
| assert not elf_path or ninja_elf_object_paths, ( |
| 'Failed to find link command in ninja files for ' + |
| os.path.relpath(elf_path, output_directory)) |
| |
| if ninja_elf_object_paths: |
| elf_object_paths, thin_archives = ar.ExpandThinArchives( |
| ninja_elf_object_paths, output_directory) |
| known_inputs = set(elf_object_paths) |
| known_inputs.update(ninja_elf_object_paths) |
| else: |
| elf_object_paths = None |
| known_inputs = None |
| # When we don't know which elf file is used, just search all paths. |
| thin_archives = set( |
| p for p in source_mapper.IterAllPaths() |
| if p.endswith('.a') and ar.IsThinArchive( |
| os.path.join(output_directory, p))) |
| |
| outdir_context = _OutputDirectoryContext( |
| elf_object_paths=elf_object_paths, |
| known_inputs=known_inputs, |
| output_directory=output_directory, |
| source_mapper=source_mapper, |
| thin_archives=thin_archives) |
| |
| section_sizes, raw_symbols, object_paths_by_name = _ParseElfInfo( |
| map_path, elf_path, tool_prefix, track_string_literals, |
| outdir_context=outdir_context, linker_name=linker_name) |
| elf_overhead_size = _CalculateElfOverhead(section_sizes, elf_path) |
| |
| pak_symbols_by_id = None |
| if apk_path: |
| pak_symbols_by_id = _FindPakSymbolsFromApk( |
| apk_path, output_directory, knobs) |
| if elf_path: |
| section_sizes, elf_overhead_size = _ParseApkElfSectionSize( |
| section_sizes, metadata, apk_elf_result) |
| raw_symbols.extend( |
| _ParseDexSymbols(section_sizes, apk_path, output_directory)) |
| raw_symbols.extend( |
| _ParseApkOtherSymbols(section_sizes, apk_path, apk_so_path, |
| output_directory, knobs)) |
| elif pak_files and pak_info_file: |
| pak_symbols_by_id = _FindPakSymbolsFromFiles( |
| pak_files, pak_info_file, output_directory) |
| |
| if elf_path: |
| elf_overhead_symbol = models.Symbol( |
| models.SECTION_OTHER, elf_overhead_size, full_name='Overhead: ELF file') |
| prev = section_sizes.setdefault(models.SECTION_OTHER, 0) |
| section_sizes[models.SECTION_OTHER] = prev + elf_overhead_size |
| raw_symbols.append(elf_overhead_symbol) |
| |
| if pak_symbols_by_id: |
| logging.debug('Extracting pak IDs from symbol names, and creating symbols') |
| object_paths_by_pak_id = _CreatePakObjectMap(object_paths_by_name) |
| pak_raw_symbols = _ParsePakSymbols( |
| section_sizes, pak_symbols_by_id, object_paths_by_pak_id) |
| raw_symbols.extend(pak_raw_symbols) |
| |
| _ExtractSourcePathsAndNormalizeObjectPaths(raw_symbols, source_mapper) |
| _PopulateComponents(raw_symbols, knobs) |
| logging.info('Converting excessive aliases into shared-path symbols') |
| _CompactLargeAliasesIntoSharedSymbols(raw_symbols, knobs) |
| logging.debug('Connecting nm aliases') |
| _ConnectNmAliases(raw_symbols) |
| return section_sizes, raw_symbols |
| |
| |
| def CreateSizeInfo( |
| section_sizes, raw_symbols, metadata=None, normalize_names=True): |
| """Performs operations on all symbols and creates a SizeInfo object.""" |
| logging.debug('Sorting %d symbols', len(raw_symbols)) |
| # TODO(agrieve): Either change this sort so that it's only sorting by section |
| # (and not using .sort()), or have it specify a total ordering (which must |
| # also include putting padding-only symbols before others of the same |
| # address). Note: The sort as-is takes ~1.5 seconds. |
| raw_symbols.sort(key=lambda s: ( |
| s.IsPak(), s.IsBss(), s.section_name, s.address)) |
| logging.info('Processed %d symbols', len(raw_symbols)) |
| |
| # Padding not really required, but it is useful to check for large padding and |
| # log a warning. |
| logging.info('Calculating padding') |
| _CalculatePadding(raw_symbols) |
| |
| # Do not call _NormalizeNames() during archive since that method tends to need |
| # tweaks over time. Calling it only when loading .size files allows for more |
| # flexability. |
| if normalize_names: |
| _NormalizeNames(raw_symbols) |
| |
| return models.SizeInfo(section_sizes, raw_symbols, metadata=metadata) |
| |
| |
| def _DetectGitRevision(directory): |
| """Runs git rev-parse to get the SHA1 hash of the current revision. |
| |
| Args: |
| directory: Path to directory where rev-parse command will be run. |
| |
| Returns: |
| A string with the SHA1 hash, or None if an error occured. |
| """ |
| try: |
| git_rev = subprocess.check_output( |
| ['git', '-C', directory, 'rev-parse', 'HEAD']) |
| return git_rev.rstrip() |
| except Exception: |
| logging.warning('Failed to detect git revision for file metadata.') |
| return None |
| |
| |
| def BuildIdFromElf(elf_path, tool_prefix): |
| args = [path_util.GetReadElfPath(tool_prefix), '-n', elf_path] |
| stdout = subprocess.check_output(args) |
| match = re.search(r'Build ID: (\w+)', stdout) |
| assert match, 'Build ID not found from running: ' + ' '.join(args) |
| return match.group(1) |
| |
| |
| def _SectionSizesFromElf(elf_path, tool_prefix): |
| args = [path_util.GetReadElfPath(tool_prefix), '-S', '--wide', elf_path] |
| stdout = subprocess.check_output(args) |
| section_sizes = {} |
| # Matches [ 2] .hash HASH 00000000006681f0 0001f0 003154 04 A 3 0 8 |
| for match in re.finditer(r'\[[\s\d]+\] (\..*)$', stdout, re.MULTILINE): |
| items = match.group(1).split() |
| section_sizes[items[0]] = int(items[4], 16) |
| return section_sizes |
| |
| |
| def _ArchFromElf(elf_path, tool_prefix): |
| args = [path_util.GetReadElfPath(tool_prefix), '-h', elf_path] |
| stdout = subprocess.check_output(args) |
| machine = re.search('Machine:\s*(.+)', stdout).group(1) |
| if machine == 'Intel 80386': |
| return 'x86' |
| if machine == 'Advanced Micro Devices X86-64': |
| return 'x64' |
| elif machine == 'ARM': |
| return 'arm' |
| elif machine == 'AArch64': |
| return 'arm64' |
| return machine |
| |
| |
| def _ParseGnArgs(args_path): |
| """Returns a list of normalized "key=value" strings.""" |
| args = {} |
| with open(args_path) as f: |
| for l in f: |
| # Strips #s even if within string literal. Not a problem in practice. |
| parts = l.split('#')[0].split('=') |
| if len(parts) != 2: |
| continue |
| args[parts[0].strip()] = parts[1].strip() |
| return ["%s=%s" % x for x in sorted(args.iteritems())] |
| |
| |
| def _DetectLinkerName(map_path): |
| with _OpenMaybeGz(map_path) as map_file: |
| return linker_map_parser.DetectLinkerNameFromMapFile(map_file) |
| |
| |
| def _ElfInfoFromApk(apk_path, apk_so_path, tool_prefix): |
| """Returns a tuple of (build_id, section_sizes).""" |
| with zipfile.ZipFile(apk_path) as apk, \ |
| tempfile.NamedTemporaryFile() as f: |
| f.write(apk.read(apk_so_path)) |
| f.flush() |
| build_id = BuildIdFromElf(f.name, tool_prefix) |
| section_sizes = _SectionSizesFromElf(f.name, tool_prefix) |
| elf_overhead_size = _CalculateElfOverhead(section_sizes, f.name) |
| return build_id, section_sizes, elf_overhead_size |
| |
| |
| def _AutoIdentifyInputFile(args): |
| file_output = subprocess.check_output(['file', args.f]) |
| format_text = file_output[file_output.find(': ') + 2:] |
| # File-not-found -> 'cannot ...' and directory -> 'directory', which don't |
| # match anything here, so they are handled by the final 'return False'. |
| if (format_text.startswith('Java archive data') or |
| format_text.startswith('Zip archive data')): |
| logging.info('Auto-identified --apk-file.') |
| args.apk_file = args.f |
| return True |
| if format_text.startswith('ELF '): |
| logging.info('Auto-identified --elf-file.') |
| args.elf_file = args.f |
| return True |
| if format_text.startswith('ASCII text'): |
| logging.info('Auto-identified --map-file.') |
| args.map_file = args.f |
| return True |
| return False |
| |
| |
| def AddMainPathsArguments(parser): |
| """Add arguments for DeduceMainPaths().""" |
| parser.add_argument('-f', metavar='FILE', |
| help='Auto-identify input file type.') |
| parser.add_argument('--apk-file', |
| help='.apk file to measure. When set, --elf-file will be ' |
| 'derived (if unset). Providing the .apk allows ' |
| 'for the size of packed relocations to be recorded') |
| parser.add_argument('--elf-file', |
| help='Path to input ELF file. Currently used for ' |
| 'capturing metadata.') |
| parser.add_argument('--map-file', |
| help='Path to input .map(.gz) file. Defaults to ' |
| '{{elf_file}}.map(.gz)?. If given without ' |
| '--elf-file, no size metadata will be recorded.') |
| parser.add_argument('--no-source-paths', action='store_true', |
| help='Do not use .ninja files to map ' |
| 'object_path -> source_path') |
| parser.add_argument('--output-directory', |
| help='Path to the root build directory.') |
| parser.add_argument('--tool-prefix', |
| help='Path prefix for c++filt, nm, readelf.') |
| |
| |
| def AddArguments(parser): |
| parser.add_argument('size_file', help='Path to output .size file.') |
| parser.add_argument('--pak-file', action='append', |
| help='Paths to pak files.') |
| parser.add_argument('--pak-info-file', |
| help='This file should contain all ids found in the pak ' |
| 'files that have been passed in.') |
| parser.add_argument('--no-string-literals', dest='track_string_literals', |
| default=True, action='store_false', |
| help='Disable breaking down "** merge strings" into more ' |
| 'granular symbols.') |
| parser.add_argument('--source-directory', |
| help='Custom path to the root source directory.') |
| AddMainPathsArguments(parser) |
| |
| |
| def DeduceMainPaths(args, parser): |
| """Computes main paths based on input, and deduces them if needed.""" |
| if args.f is not None: |
| if not _AutoIdentifyInputFile(args): |
| parser.error('Cannot find or identify file %s' % args.f) |
| |
| apk_path = args.apk_file |
| elf_path = args.elf_file |
| map_path = args.map_file |
| any_input = apk_path or elf_path or map_path |
| if not any_input: |
| parser.error('Must pass at least one of --apk-file, --elf-file, --map-file') |
| output_directory_finder = path_util.OutputDirectoryFinder( |
| value=args.output_directory, |
| any_path_within_output_directory=any_input) |
| |
| apk_so_path = None |
| if apk_path: |
| with zipfile.ZipFile(apk_path) as z: |
| lib_infos = [f for f in z.infolist() |
| if f.filename.endswith('.so') and f.file_size > 0] |
| assert lib_infos, 'APK has no .so files.' |
| # TODO(agrieve): Add support for multiple .so files, and take into account |
| # secondary architectures. |
| apk_so_path = max(lib_infos, key=lambda x:x.file_size).filename |
| logging.debug('Sub-apk path=%s', apk_so_path) |
| if not elf_path and output_directory_finder.Tentative(): |
| elf_path = os.path.join( |
| output_directory_finder.Tentative(), 'lib.unstripped', |
| os.path.basename(apk_so_path.replace('crazy.', ''))) |
| logging.debug('Detected --elf-file=%s', elf_path) |
| |
| if map_path: |
| if not map_path.endswith('.map') and not map_path.endswith('.map.gz'): |
| parser.error('Expected --map-file to end with .map or .map.gz') |
| else: |
| map_path = elf_path + '.map' |
| if not os.path.exists(map_path): |
| map_path += '.gz' |
| if not os.path.exists(map_path): |
| parser.error('Could not find .map(.gz)? file. Ensure you have built with ' |
| 'is_official_build=true and generate_linker_map=true, or ' |
| 'use --map-file to point me a linker map file.') |
| |
| linker_name = _DetectLinkerName(map_path) |
| logging.info('Linker name: %s' % linker_name) |
| tool_prefix_finder = path_util.ToolPrefixFinder( |
| value=args.tool_prefix, |
| output_directory_finder=output_directory_finder, |
| linker_name=linker_name) |
| tool_prefix = tool_prefix_finder.Finalized() |
| output_directory = None |
| if not args.no_source_paths: |
| output_directory = output_directory_finder.Finalized() |
| return (output_directory, tool_prefix, apk_path, apk_so_path, elf_path, |
| map_path, linker_name) |
| |
| |
| def Run(args, parser): |
| if not args.size_file.endswith('.size'): |
| parser.error('size_file must end with .size') |
| |
| (output_directory, tool_prefix, apk_path, apk_so_path, elf_path, map_path, |
| linker_name) = (DeduceMainPaths(args, parser)) |
| |
| metadata = CreateMetadata(map_path, elf_path, apk_path, tool_prefix, |
| output_directory, linker_name) |
| |
| knobs = SectionSizeKnobs() |
| if args.source_directory: |
| knobs.src_root = args.source_directory |
| |
| section_sizes, raw_symbols = CreateSectionSizesAndSymbols( |
| map_path=map_path, tool_prefix=tool_prefix, elf_path=elf_path, |
| apk_path=apk_path, output_directory=output_directory, |
| track_string_literals=args.track_string_literals, |
| metadata=metadata, apk_so_path=apk_so_path, |
| pak_files=args.pak_file, pak_info_file=args.pak_info_file, |
| linker_name=linker_name, knobs=knobs) |
| size_info = CreateSizeInfo( |
| section_sizes, raw_symbols, metadata=metadata, normalize_names=False) |
| |
| if logging.getLogger().isEnabledFor(logging.INFO): |
| for line in describe.DescribeSizeInfoCoverage(size_info): |
| logging.info(line) |
| logging.info('Recorded info for %d symbols', len(size_info.raw_symbols)) |
| logging.info('Recording metadata: \n %s', |
| '\n '.join(describe.DescribeMetadata(size_info.metadata))) |
| logging.info('Saving result to %s', args.size_file) |
| file_format.SaveSizeInfo(size_info, args.size_file) |
| size_in_mb = os.path.getsize(args.size_file) / 1024.0 / 1024.0 |
| logging.info('Done. File size is %.2fMiB.', size_in_mb) |