| # Copyright 2017 The Chromium Authors |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| """Main Python API for analyzing binary size.""" |
| |
| import argparse |
| import collections |
| import dataclasses |
| import functools |
| import logging |
| import os |
| import posixpath |
| import re |
| import shlex |
| import subprocess |
| import time |
| import zipfile |
| |
| import apk |
| import apkanalyzer |
| import archive_util |
| import data_quality |
| import describe |
| import dex_deobfuscate |
| import dir_metadata |
| import file_format |
| import function_signature |
| import json_config_parser |
| import models |
| import native |
| import pakfile |
| import parallel |
| import path_util |
| import readelf |
| import zip_util |
| |
| |
| @dataclasses.dataclass |
| class NativeSpec: |
| # One (or more) of apk_so_path, map_path, elf_path must be non-None. |
| # Path within the .apk of the .so file. Non-None only when apk_spec is. |
| apk_so_path: str = None |
| # Path to linker map file (if present). |
| map_path: str = None |
| # Path to unstripped ELF file (if present). |
| elf_path: str = None |
| # Path to unstripped ELF file before being split into partitions (if present). |
| combined_elf_path: str = None |
| # Whether to create symbols for each string literal. |
| track_string_literals: bool = True |
| # component to use for all symbols. |
| component: str = None |
| # Regular expression that will match generated files. |
| gen_dir_regex: str = None |
| # source_path prefix to use for all symbols. |
| source_path_prefix: str = None |
| |
| @property |
| def algorithm(self): |
| if self.map_path: |
| return 'linker_map' |
| if self.elf_path: |
| return 'dwarf' |
| return 'sections' |
| |
| |
| @dataclasses.dataclass |
| class PakSpec: |
| # One of pak_paths or apk_pak_paths must be non-None. |
| pak_paths: list = None |
| apk_pak_paths: list = None |
| pak_info_path: str = None |
| |
| |
| @dataclasses.dataclass |
| class ApkSpec: |
| # Path the .apk file. Never None. |
| # This is a temp file when .apks is being analyzed. |
| apk_path: str |
| # Path to .minimal.apks (when analyzing bundles). |
| minimal_apks_path: str = None |
| # Proguard mapping path. |
| mapping_path: str = None |
| # Path to the .pathmap.txt file for the apk. Used to deobfuscate res/ files. |
| resources_pathmap_path: str = None |
| # Name of the apk split when .apks is being analyzed. |
| split_name: str = None |
| # Path such as: out/Release/size-info/BaseName |
| size_info_prefix: str = None |
| # Whether to break down classes.dex. |
| analyze_dex: bool = True |
| # Whether to create symbols for each string literal. |
| track_string_literals: bool = True |
| # Dict of apk_path -> source_path, provided by json config. |
| path_defaults: dict = None |
| # Component to use for symbols when not specified by DIR_METADATA, provided by |
| # json config. |
| default_component: str = '' |
| # Paths to not create .other symbols for. |
| ignore_apk_paths: set = dataclasses.field(default_factory=set) |
| |
| |
| @dataclasses.dataclass |
| class ContainerSpec: |
| container_name: str |
| apk_spec: ApkSpec |
| pak_spec: PakSpec |
| native_spec: NativeSpec |
| source_directory: str |
| output_directory: str |
| |
| |
| def _NormalizeNames(raw_symbols): |
| """Ensures that all names are formatted in a useful way. |
| |
| This includes: |
| - Deriving |name| and |template_name| from |full_name|. |
| - Stripping of return types (for functions). |
| - Moving "vtable for" and the like to be suffixes rather than prefixes. |
| """ |
| found_prefixes = set() |
| for symbol in raw_symbols: |
| full_name = symbol.full_name |
| |
| # See comment in _CalculatePadding() about when this can happen. Don't |
| # process names for non-native sections. |
| if symbol.IsPak(): |
| # full_name: "about_ui_resources.grdp: IDR_ABOUT_UI_CREDITS_HTML". |
| space_idx = full_name.rindex(' ') |
| name = full_name[space_idx + 1:] |
| symbol.template_name = name |
| symbol.name = name |
| elif (full_name.startswith('**') or symbol.IsOverhead() |
| or symbol.IsOther()): |
| symbol.template_name = full_name |
| symbol.name = full_name |
| elif symbol.IsStringLiteral(): # Handles native and DEX strings. |
| symbol.full_name = full_name |
| symbol.template_name = full_name |
| symbol.name = full_name |
| elif symbol.IsDex(): |
| symbol.full_name, symbol.template_name, symbol.name = ( |
| function_signature.ParseJava(full_name)) |
| elif symbol.IsNative(): |
| # Remove [clone] suffix, and set flag accordingly. |
| # Search from left-to-right, as multiple [clone]s can exist. |
| # Example name suffixes: |
| # [clone .part.322] # GCC |
| # [clone .isra.322] # GCC |
| # [clone .constprop.1064] # GCC |
| # [clone .11064] # clang |
| # http://unix.stackexchange.com/questions/223013/function-symbol-gets-part-suffix-after-compilation |
| idx = full_name.find(' [clone ') |
| if idx != -1: |
| full_name = full_name[:idx] |
| symbol.flags |= models.FLAG_CLONE |
| |
| # Clones for C symbols. |
| if symbol.section == 't': |
| idx = full_name.rfind('.') |
| if idx != -1 and full_name[idx + 1:].isdigit(): |
| new_name = full_name[:idx] |
| # Generated symbols that end with .123 but are not clones. |
| # Find these via: |
| # size_info.symbols.WhereInSection('t').WhereIsGroup().SortedByCount() |
| if new_name not in ('__tcf_0', 'startup'): |
| full_name = new_name |
| symbol.flags |= models.FLAG_CLONE |
| # Remove .part / .isra / .constprop. |
| idx = full_name.rfind('.', 0, idx) |
| if idx != -1: |
| full_name = full_name[:idx] |
| |
| # E.g.: vtable for FOO |
| idx = full_name.find(' for ', 0, 30) |
| if idx != -1: |
| found_prefixes.add(full_name[:idx + 4]) |
| full_name = '{} [{}]'.format(full_name[idx + 5:], full_name[:idx]) |
| |
| # E.g.: virtual thunk to FOO |
| idx = full_name.find(' to ', 0, 30) |
| if idx != -1: |
| found_prefixes.add(full_name[:idx + 3]) |
| full_name = '{} [{}]'.format(full_name[idx + 4:], full_name[:idx]) |
| |
| # Strip out return type, and split out name, template_name. |
| # Function parsing also applies to non-text symbols. |
| # E.g. Function statics. |
| symbol.full_name, symbol.template_name, symbol.name = ( |
| function_signature.Parse(full_name)) |
| |
| # Remove anonymous namespaces (they just harm clustering). |
| symbol.template_name = symbol.template_name.replace( |
| '(anonymous namespace)::', '') |
| symbol.full_name = symbol.full_name.replace( |
| '(anonymous namespace)::', '') |
| non_anonymous_name = symbol.name.replace('(anonymous namespace)::', '') |
| if symbol.name != non_anonymous_name: |
| symbol.flags |= models.FLAG_ANONYMOUS |
| symbol.name = non_anonymous_name |
| |
| # Allow using "is" to compare names (and should help with RAM). This applies |
| # to all symbols. |
| function_signature.InternSameNames(symbol) |
| |
| logging.debug('Found name prefixes of: %r', found_prefixes) |
| |
| |
| def LoadAndPostProcessSizeInfo(path, file_obj=None): |
| """Returns a SizeInfo for the given |path|.""" |
| logging.debug('Loading results from: %s', path) |
| size_info = file_format.LoadSizeInfo(path, file_obj=file_obj) |
| logging.info('Normalizing symbol names') |
| _NormalizeNames(size_info.raw_symbols) |
| logging.info('Loaded %d symbols', len(size_info.raw_symbols)) |
| return size_info |
| |
| |
| def LoadAndPostProcessDeltaSizeInfo(path, file_obj=None): |
| """Returns a tuple of SizeInfos for the given |path|.""" |
| logging.debug('Loading results from: %s', path) |
| before_size_info, after_size_info, _, _ = (file_format.LoadDeltaSizeInfo( |
| path, file_obj=file_obj)) |
| logging.info('Normalizing symbol names') |
| _NormalizeNames(before_size_info.raw_symbols) |
| _NormalizeNames(after_size_info.raw_symbols) |
| logging.info('Loaded %d + %d symbols', len(before_size_info.raw_symbols), |
| len(after_size_info.raw_symbols)) |
| return before_size_info, after_size_info |
| |
| |
| def CreateBuildConfig(output_directory, source_directory, url=None, title=None): |
| """Creates the dict to use for SizeInfo.build_info.""" |
| logging.debug('Constructing build_config') |
| build_config = {} |
| if output_directory: |
| gn_args = _ParseGnArgs(os.path.join(output_directory, 'args.gn')) |
| build_config[models.BUILD_CONFIG_GN_ARGS] = gn_args |
| build_config[models.BUILD_CONFIG_OUT_DIRECTORY] = os.path.relpath( |
| output_directory, start=source_directory) |
| git_rev = _DetectGitRevision(source_directory) |
| if git_rev: |
| build_config[models.BUILD_CONFIG_GIT_REVISION] = git_rev |
| if url is not None: |
| build_config[models.BUILD_CONFIG_URL] = url |
| if title is not None: |
| build_config[models.BUILD_CONFIG_TITLE] = title |
| |
| return build_config |
| |
| |
| def _CreateMetadata(container_spec, elf_info): |
| logging.debug('Constructing metadata') |
| metadata = {} |
| apk_spec = container_spec.apk_spec |
| native_spec = container_spec.native_spec |
| output_directory = container_spec.output_directory |
| |
| # Ensure all paths are relative to output directory to make them hermetic. |
| if output_directory: |
| shorten_path = lambda path: os.path.relpath(path, output_directory) |
| else: |
| # If output directory is unavailable, just store basenames. |
| shorten_path = os.path.basename |
| |
| if apk_spec: |
| apk_metadata = apk.CreateMetadata(apk_spec=apk_spec, |
| include_file_details=not native_spec, |
| shorten_path=shorten_path) |
| assert not (metadata.keys() & apk_metadata.keys()) |
| metadata.update(apk_metadata) |
| |
| if native_spec: |
| native_metadata = native.CreateMetadata(native_spec=native_spec, |
| elf_info=elf_info, |
| shorten_path=shorten_path) |
| assert not (metadata.keys() & native_metadata.keys()) |
| metadata.update(native_metadata) |
| |
| logging.debug('Constructing metadata (done)') |
| return metadata |
| |
| |
| def _CreatePakSymbols(*, pak_spec, pak_id_map, apk_spec, output_directory): |
| logging.debug('Creating Pak symbols') |
| section_ranges = {} |
| if apk_spec: |
| assert apk_spec.size_info_prefix |
| # Can modify |section_ranges|. |
| raw_symbols = pakfile.CreatePakSymbolsFromApk(section_ranges, |
| apk_spec.apk_path, |
| pak_spec.apk_pak_paths, |
| pak_spec.pak_info_path, |
| pak_id_map) |
| else: |
| # Can modify |section_ranges|. |
| raw_symbols = pakfile.CreatePakSymbolsFromFiles(section_ranges, |
| pak_spec.pak_paths, |
| pak_spec.pak_info_path, |
| output_directory, |
| pak_id_map) |
| return section_ranges, raw_symbols |
| |
| |
| def _CreateContainerSymbols(container_spec, apk_file_manager, |
| apk_analyzer_results, ninja_source_mapper, |
| pak_id_map, component_overrides, |
| dex_deobfuscator_cache): |
| container_name = container_spec.container_name |
| apk_spec = container_spec.apk_spec |
| pak_spec = container_spec.pak_spec |
| native_spec = container_spec.native_spec |
| output_directory = container_spec.output_directory |
| source_directory = container_spec.source_directory |
| |
| logging.info('Starting on container: %s', container_spec) |
| |
| raw_symbols = [] |
| section_sizes = {} |
| metrics_by_file = {} |
| default_component = apk_spec.default_component if apk_spec else '' |
| |
| def add_syms(section_ranges, |
| new_raw_symbols, |
| source_path_prefix=None, |
| component=None, |
| paths_already_normalized=False): |
| new_section_sizes = { |
| k: size |
| for k, (address, size) in section_ranges.items() |
| } |
| if models.SECTION_OTHER in new_section_sizes: |
| section_sizes[models.SECTION_OTHER] = section_sizes.get( |
| models.SECTION_OTHER, 0) + new_section_sizes[models.SECTION_OTHER] |
| del new_section_sizes[models.SECTION_OTHER] |
| |
| assert not (set(section_sizes) & set(new_section_sizes)), ( |
| 'Section collision: {}\n\n {}'.format(section_sizes, new_section_sizes)) |
| section_sizes.update(new_section_sizes) |
| |
| # E.g.: native.CreateSymbols() already calls NormalizePaths(). |
| if not paths_already_normalized: |
| archive_util.NormalizePaths(new_raw_symbols) |
| |
| if source_path_prefix: |
| # Prefix the source_path for all symbols that have a source_path assigned, |
| # and that don't have it set to $APK or $GOOGLE3. |
| for s in new_raw_symbols: |
| if s.source_path and s.source_path[0] != '$': |
| s.source_path = source_path_prefix + s.source_path |
| |
| if component is not None: |
| for s in new_raw_symbols: |
| s.component = component |
| else: |
| dir_metadata.PopulateComponents(new_raw_symbols, |
| source_directory, |
| component_overrides, |
| default_component=default_component) |
| raw_symbols.extend(new_raw_symbols) |
| |
| elf_info = None |
| if native_spec: |
| section_ranges, native_symbols, elf_info, native_metrics_by_file = ( |
| native.CreateSymbols(apk_spec=apk_spec, |
| native_spec=native_spec, |
| output_directory=output_directory, |
| ninja_source_mapper=ninja_source_mapper, |
| pak_id_map=pak_id_map)) |
| add_syms(section_ranges, |
| native_symbols, |
| source_path_prefix=native_spec.source_path_prefix, |
| component=native_spec.component, |
| paths_already_normalized=True) |
| metrics_by_file.update(native_metrics_by_file) |
| elif apk_spec and apk_spec.analyze_dex: |
| logging.info('Analyzing DEX') |
| apk_infolist = apk_file_manager.InfoList(apk_spec.apk_path) |
| dex_total_size = sum(i.file_size for i in apk_infolist |
| if i.filename.endswith('.dex')) |
| if dex_total_size > 0: |
| mapping_path = apk_spec.mapping_path # May be None. |
| class_deobfuscation_map = ( |
| dex_deobfuscator_cache.GetForMappingFile(mapping_path)) |
| section_ranges, dex_symbols, dex_metrics_by_file = ( |
| apkanalyzer.CreateDexSymbols(apk_spec.apk_path, |
| apk_analyzer_results[container_name], |
| dex_total_size, class_deobfuscation_map, |
| apk_spec.size_info_prefix, |
| apk_spec.track_string_literals)) |
| add_syms(section_ranges, dex_symbols) |
| metrics_by_file.update(dex_metrics_by_file) |
| |
| if pak_spec: |
| section_ranges, pak_symbols = _CreatePakSymbols( |
| pak_spec=pak_spec, |
| pak_id_map=pak_id_map, |
| apk_spec=apk_spec, |
| output_directory=output_directory) |
| add_syms(section_ranges, pak_symbols) |
| apk_metadata = {} |
| |
| # This function can get called multiple times for the same APK file, to |
| # process .so files that are treated as containers. The |not native_spec| |
| # condition below skips these cases to prevent redundant symbol creation. |
| if not native_spec and apk_spec: |
| logging.info('Analyzing ARSC') |
| arsc_section_ranges, arsc_symbols, arsc_metrics_by_file = ( |
| apk.CreateArscSymbols(apk_spec)) |
| add_syms(arsc_section_ranges, arsc_symbols) |
| metrics_by_file.update(arsc_metrics_by_file) |
| |
| other_section_ranges, other_symbols, apk_metadata, apk_metrics_by_file = ( |
| apk.CreateApkOtherSymbols(apk_spec)) |
| add_syms(other_section_ranges, other_symbols) |
| metrics_by_file.update(apk_metrics_by_file) |
| |
| metadata = _CreateMetadata(container_spec, elf_info) |
| assert not (metadata.keys() & apk_metadata.keys()) |
| metadata.update(apk_metadata) |
| container = models.Container(name=container_name, |
| metadata=metadata, |
| section_sizes=section_sizes, |
| metrics_by_file=metrics_by_file) |
| for symbol in raw_symbols: |
| symbol.container = container |
| |
| return raw_symbols |
| |
| |
| def _DetectGitRevision(directory): |
| """Runs git rev-parse to get the SHA1 hash of the current revision. |
| |
| Args: |
| directory: Path to directory where rev-parse command will be run. |
| |
| Returns: |
| A string with the SHA1 hash, or None if an error occured. |
| """ |
| try: |
| git_rev = subprocess.check_output( |
| ['git', '-C', directory, 'rev-parse', 'HEAD']).decode('ascii') |
| return git_rev.rstrip() |
| except Exception: |
| logging.warning('Failed to detect git revision for file metadata.') |
| return None |
| |
| |
| def _ParseGnArgs(args_path): |
| """Returns a list of normalized "key=value" strings.""" |
| args = {} |
| with open(args_path) as f: |
| for l in f: |
| # Strips #s even if within string literal. Not a problem in practice. |
| parts = l.split('#')[0].split('=') |
| if len(parts) != 2: |
| continue |
| args[parts[0].strip()] = parts[1].strip() |
| return ["%s=%s" % x for x in sorted(args.items())] |
| |
| |
| def _AddContainerArguments(parser, is_top_args=False): |
| """Add arguments applicable to a single container.""" |
| |
| # Main file argument: Exactly one should be specified (perhaps via -f). |
| # _IdentifyInputFile() should be kept updated. |
| group = parser.add_argument_group(title='Main Input') |
| group = group.add_mutually_exclusive_group(required=True) |
| group.add_argument('-f', |
| metavar='FILE', |
| help='Auto-identify input file type.') |
| group.add_argument('--apk-file', |
| help='.apk file to measure. Other flags can generally be ' |
| 'derived when this is used.') |
| group.add_argument('--minimal-apks-file', |
| help='.minimal.apks file to measure. Other flags can ' |
| 'generally be derived when this is used.') |
| group.add_argument('--elf-file', help='Path to input ELF file.') |
| group.add_argument('--map-file', |
| help='Path to input .map(.gz) file. Defaults to ' |
| '{{elf_file}}.map(.gz)?. If given without ' |
| '--elf-file, no size metadata will be recorded.') |
| group.add_argument('--pak-file', |
| action='append', |
| default=[], |
| dest='pak_files', |
| help='Paths to pak files.') |
| if is_top_args: |
| group.add_argument('--ssargs-file', |
| help='Path to SuperSize multi-container arguments file.') |
| |
| group = parser.add_argument_group(title='What to Analyze') |
| group.add_argument('--java-only', |
| action='store_true', |
| help='Run on only Java symbols') |
| group.add_argument('--native-only', |
| action='store_true', |
| help='Run on only native symbols') |
| group.add_argument('--no-java', |
| action='store_true', |
| help='Do not run on Java symbols') |
| group.add_argument('--no-native', |
| action='store_true', |
| help='Do not run on native symbols') |
| if is_top_args: |
| group.add_argument('--container-filter', |
| help='Regular expression for which containers to create') |
| |
| group = parser.add_argument_group(title='Analysis Options for Native Code') |
| group.add_argument('--no-map-file', |
| dest='ignore_linker_map', |
| action='store_true', |
| help='Use debug information to capture symbol sizes ' |
| 'instead of linker map file.') |
| # Used by tests to override path to APK-discovered files. |
| group.add_argument('--aux-elf-file', help=argparse.SUPPRESS) |
| group.add_argument( |
| '--aux-map-file', |
| help='Path to linker map to use when --elf-file is provided') |
| |
| group = parser.add_argument_group(title='APK options') |
| group.add_argument('--mapping-file', |
| help='Proguard .mapping file for deobfuscation.') |
| group.add_argument('--resources-pathmap-file', |
| help='.pathmap.txt file that contains a maping from ' |
| 'original resource paths to shortened resource paths.') |
| group.add_argument('--abi-filter', |
| dest='abi_filters', |
| action='append', |
| help='For apks with multiple ABIs, break down native ' |
| 'libraries for this ABI. Defaults to 64-bit when both ' |
| '32 and 64 bit are present.') |
| |
| group = parser.add_argument_group(title='Analysis Options for Pak Files') |
| group.add_argument('--pak-info-file', |
| help='This file should contain all ids found in the pak ' |
| 'files that have been passed in. If not specified, ' |
| '${pak_file}.info is assumed.') |
| |
| group = parser.add_argument_group(title='Analysis Options (shared)') |
| group.add_argument('--source-directory', |
| help='Custom path to the root source directory.') |
| group.add_argument('--output-directory', |
| help='Path to the root build directory.') |
| group.add_argument('--symbols-dir', |
| default='lib.unstripped', |
| help='Relative path containing unstripped .so files ' |
| '(for symbols) w.r.t. the output directory.') |
| group.add_argument('--no-string-literals', |
| action='store_true', |
| help=('Do not create symbols for string literals ' |
| '(applies to DEX and Native).')) |
| if is_top_args: |
| group.add_argument('--json-config', help='Path to a supersize.json.') |
| group.add_argument('--no-output-directory', |
| action='store_true', |
| help='Do not auto-detect --output-directory.') |
| group.add_argument('--check-data-quality', |
| action='store_true', |
| help='Perform sanity checks to ensure there is no ' |
| 'missing data.') |
| |
| |
| def AddArguments(parser): |
| parser.add_argument('size_file', help='Path to output .size file.') |
| parser.add_argument('--title', |
| help='Value for the "title" build_config entry.') |
| parser.add_argument('--url', help='Value for the "url" build_config entry.') |
| _AddContainerArguments(parser, is_top_args=True) |
| |
| |
| def _IdentifyInputFile(args, on_config_error): |
| """Identifies main input file type from |args.f|, and updates |args|. |
| |
| Identification is performed on filename alone, i.e., the file need not exist. |
| The result is written to a field in |args|. If the field exists then it |
| simply gets overwritten. |
| |
| If '.' is missing from |args.f| then --elf-file is assumed. |
| |
| Returns: |
| The primary input file. |
| """ |
| if args.f: |
| if args.f.endswith('.minimal.apks'): |
| args.minimal_apks_file = args.f |
| elif args.f.endswith('.apk'): |
| args.apk_file = args.f |
| elif args.f.endswith('.so') or '.' not in os.path.basename(args.f): |
| args.elf_file = args.f |
| elif args.f.endswith('.map') or args.f.endswith('.map.gz'): |
| args.map_file = args.f |
| elif args.f.endswith('.pak'): |
| args.pak_files.append(args.f) |
| elif args.f.endswith('.ssargs'): |
| # Fails if trying to nest them, which should never happen. |
| args.ssargs_file = args.f |
| else: |
| on_config_error('Cannot identify file ' + args.f) |
| args.f = None |
| |
| ret = [ |
| args.apk_file, args.elf_file, args.minimal_apks_file, |
| args.__dict__.get('ssargs_file'), args.map_file |
| ] + (args.pak_files or []) |
| ret = [v for v in ret if v] |
| if not ret: |
| on_config_error( |
| 'Must pass at least one of --apk-file, --minimal-apks-file, ' |
| '--elf-file, --map-file, --pak-file, --ssargs-file') |
| return ret[0] |
| |
| |
| def ParseSsargs(lines): |
| """Parses .ssargs data. |
| |
| An .ssargs file is a text file to specify multiple containers as input to |
| SuperSize-archive. After '#'-based comments, start / end whitespaces, and |
| empty lines are stripped, each line specifies a distinct container. Format: |
| * Positional argument: |name| for the container. |
| * Main input file specified by -f, --apk-file, --elf-file, etc.: |
| * Can be an absolute path. |
| * Can be a relative path. In this case, it's up to the caller to supply the |
| base directory. |
| * -f switch must not specify another .ssargs file. |
| * For supported switches: See _AddContainerArguments(). |
| |
| Args: |
| lines: An iterator containing lines of .ssargs data. |
| Returns: |
| A list of arguments, one for each container. |
| Raises: |
| ValueError: Parse error, including input line number. |
| """ |
| sub_args_list = [] |
| parser = argparse.ArgumentParser(add_help=False) |
| parser.error = lambda msg: (_ for _ in ()).throw(ValueError(msg)) |
| parser.add_argument('name') |
| _AddContainerArguments(parser) |
| try: |
| for lineno, line in enumerate(lines, 1): |
| toks = shlex.split(line, comments=True) |
| if not toks: # Skip if line is empty after stripping comments. |
| continue |
| sub_args_list.append(parser.parse_args(toks)) |
| except ValueError as e: |
| e.args = ('Line %d: %s' % (lineno, e.args[0]), ) |
| raise e |
| return sub_args_list |
| |
| |
| def _MakeNativeSpec(json_config, **kwargs): |
| native_spec = NativeSpec(**kwargs) |
| if native_spec.elf_path or native_spec.map_path: |
| basename = os.path.basename(native_spec.elf_path or native_spec.map_path) |
| native_spec.component = json_config.ComponentForNativeFile(basename) |
| native_spec.gen_dir_regex = json_config.GenDirRegexForNativeFile(basename) |
| native_spec.source_path_prefix = json_config.SourcePathPrefixForNativeFile( |
| basename) |
| |
| if not native_spec.map_path: |
| # TODO(crbug.com/40757867): Implement string literal tracking without map |
| # files. nm emits some string literal symbols, but most are missing. |
| native_spec.track_string_literals = False |
| return native_spec |
| |
| return native_spec |
| |
| |
| def _ElfIsMainPartition(elf_path): |
| section_ranges = readelf.SectionInfoFromElf(elf_path) |
| return models.SECTION_PART_END in section_ranges.keys() |
| |
| |
| def _DeduceMapPath(elf_path): |
| if _ElfIsMainPartition(elf_path): |
| map_path = elf_path.replace('.so', '__combined.so') + '.map' |
| else: |
| map_path = elf_path + '.map' |
| if not os.path.exists(map_path): |
| map_path += '.gz' |
| if not os.path.exists(map_path): |
| map_path = None |
| |
| if map_path: |
| logging.debug('Detected map_path=%s', map_path) |
| return map_path |
| |
| |
| def _CreateNativeSpecs(*, tentative_output_dir, symbols_dir, apk_infolist, |
| elf_path, map_path, abi_filters, auto_abi_filters, |
| track_string_literals, ignore_linker_map, json_config, |
| on_config_error): |
| if ignore_linker_map: |
| map_path = None |
| elif (map_path and not map_path.endswith('.map') |
| and not map_path.endswith('.map.gz')): |
| on_config_error('Expected --map-file to end with .map or .map.gz') |
| elif elf_path and not map_path: |
| map_path = _DeduceMapPath(elf_path) |
| |
| ret = [] |
| # if --elf-path or --map-path (rather than --aux-elf-path, --aux-map-path): |
| if not apk_infolist: |
| if map_path or elf_path: |
| combined_elf_path = None |
| if map_path and '__combined.so' in map_path: |
| combined_elf_path = elf_path[:-3] + '__combined.so' |
| |
| ret.append( |
| _MakeNativeSpec(json_config, |
| apk_so_path=None, |
| map_path=map_path, |
| elf_path=elf_path, |
| combined_elf_path=combined_elf_path, |
| track_string_literals=track_string_literals)) |
| return abi_filters, ret |
| |
| lib_infos = [ |
| f for f in apk_infolist if f.filename.endswith('.so') and f.file_size > 0 |
| ] |
| |
| # Sort so elf_path/map_path applies largest non-filtered library. |
| matches_abi = lambda n: not abi_filters or any(f in n for f in abi_filters) |
| lib_infos.sort(key=lambda x: (not matches_abi(x.filename), -x.file_size)) |
| |
| for lib_info in lib_infos: |
| apk_so_path = lib_info.filename |
| cur_elf_path = None |
| cur_map_path = None |
| if not matches_abi(apk_so_path): |
| logging.debug('Not breaking down %s: secondary ABI', apk_so_path) |
| elif apk_so_path.endswith('_partition.so'): |
| # TODO(agrieve): Support symbol breakdowns for partitions (they exist in |
| # the __combined .map file. Debug information (nm output) is shared |
| # with base partition. |
| logging.debug('Not breaking down %s: partitioned library', apk_so_path) |
| else: |
| if elf_path: |
| # Consume --aux-elf-file for the largest matching binary. |
| cur_elf_path = elf_path |
| elf_path = None |
| elif tentative_output_dir: |
| # TODO(crbug.com/40229168): Remove handling the legacy library prefix |
| # 'crazy.' when there is no longer interest in size comparisons for |
| # these pre-N APKs. |
| cur_elf_path = os.path.join( |
| tentative_output_dir, symbols_dir, |
| posixpath.basename(apk_so_path.replace('crazy.', ''))) |
| if os.path.exists(cur_elf_path): |
| logging.debug('Detected elf_path=%s', cur_elf_path) |
| else: |
| # TODO(agrieve): Not able to find libcrashpad_handler_trampoline.so. |
| logging.debug('Not breaking down %s because file does not exist: %s', |
| apk_so_path, cur_elf_path) |
| cur_elf_path = None |
| |
| if map_path: |
| # Consume --aux-map-file for first non-skipped elf. |
| cur_map_path = map_path |
| map_path = None |
| elif cur_elf_path and not ignore_linker_map: |
| cur_map_path = _DeduceMapPath(cur_elf_path) |
| |
| if auto_abi_filters: |
| abi_filters = [posixpath.basename(posixpath.dirname(apk_so_path))] |
| logging.info('Detected --abi-filter %s', abi_filters[0]) |
| auto_abi_filters = False |
| |
| combined_elf_path = None |
| if cur_map_path and '__combined.so' in cur_map_path: |
| combined_elf_path = cur_elf_path[:-3] + '__combined.so' |
| |
| ret.append( |
| _MakeNativeSpec(json_config, |
| apk_so_path=apk_so_path, |
| map_path=cur_map_path, |
| elf_path=cur_elf_path, |
| combined_elf_path=combined_elf_path, |
| track_string_literals=track_string_literals)) |
| |
| return abi_filters, ret |
| |
| |
| # Cache to prevent excess log messages. |
| @functools.lru_cache |
| def _DeduceMappingPath(mapping_path, apk_prefix): |
| if apk_prefix: |
| if not mapping_path: |
| possible_mapping_path = apk_prefix + '.mapping' |
| if os.path.exists(possible_mapping_path): |
| mapping_path = possible_mapping_path |
| logging.debug('Detected --mapping-file=%s', mapping_path) |
| else: |
| logging.warning('Could not find proguard mapping file at %s', |
| possible_mapping_path) |
| return mapping_path |
| |
| |
| # Cache to prevent excess log messages. |
| @functools.lru_cache |
| def _DeducePathmapPath(resources_pathmap_path, apk_prefix): |
| if apk_prefix: |
| if not resources_pathmap_path: |
| possible_pathmap_path = apk_prefix + '.pathmap.txt' |
| # This could be pointing to a stale pathmap file if path shortening was |
| # previously enabled but is disabled for the current build. However, since |
| # current apk/aab will have unshortened paths, looking those paths up in |
| # the stale pathmap which is keyed by shortened paths would not find any |
| # mapping and thus should not cause any issues. |
| if os.path.exists(possible_pathmap_path): |
| resources_pathmap_path = possible_pathmap_path |
| logging.debug('Detected --resources-pathmap-file=%s', |
| resources_pathmap_path) |
| # Path shortening is optional, so do not warn for missing file. |
| return resources_pathmap_path |
| |
| |
| def _ReadMultipleArgsFromStream(lines, base_dir, err_prefix, on_config_error): |
| try: |
| ret = ParseSsargs(lines) |
| except ValueError as e: |
| on_config_error('%s: %s' % (err_prefix, e.args[0])) |
| for sub_args in ret: |
| for k, v in sub_args.__dict__.items(): |
| # Translate file arguments to be relative to |sub_dir|. |
| if (k.endswith('_file') or k == 'f') and isinstance(v, str): |
| sub_args.__dict__[k] = os.path.join(base_dir, v) |
| return ret |
| |
| |
| def _ReadMultipleArgsFromFile(ssargs_file, on_config_error): |
| with open(ssargs_file, 'r') as fh: |
| lines = list(fh) |
| err_prefix = 'In file ' + ssargs_file |
| # Supply |base_dir| as the directory containing the .ssargs file, to ensure |
| # consistent behavior wherever SuperSize-archive runs. |
| base_dir = os.path.dirname(os.path.abspath(ssargs_file)) |
| return _ReadMultipleArgsFromStream(lines, base_dir, err_prefix, |
| on_config_error) |
| |
| |
| # Both |top_args| and |sub_args| may be modified. |
| def _CreateContainerSpecs(apk_file_manager, |
| top_args, |
| sub_args, |
| json_config, |
| base_container_name, |
| on_config_error, |
| split_name=None): |
| sub_args.source_directory = (sub_args.source_directory |
| or top_args.source_directory) |
| sub_args.output_directory = (sub_args.output_directory |
| or top_args.output_directory) |
| analyze_native = not (sub_args.java_only or sub_args.no_native |
| or top_args.java_only or top_args.no_native) |
| analyze_dex = not (sub_args.native_only or sub_args.no_java |
| or top_args.native_only or top_args.no_java) |
| only_java_or_native = (sub_args.native_only or top_args.native_only |
| or sub_args.java_only or top_args.java_only) |
| analyze_pak = not only_java_or_native and bool(sub_args.output_directory) |
| analyze_res = not only_java_or_native and bool(sub_args.output_directory) |
| |
| if split_name: |
| apk_path = apk_file_manager.SplitPath(sub_args.minimal_apks_file, |
| split_name) |
| base_container_name = f'{base_container_name}/{split_name}.apk' |
| # Make on-demand a part of the name so that: |
| # * It's obvious from the name which DFMs are on-demand. |
| # * Diffs that change an on-demand status show as adds/removes. |
| if _IsOnDemand(apk_path): |
| base_container_name += '?' |
| else: |
| apk_path = sub_args.apk_file |
| |
| apk_prefix = sub_args.minimal_apks_file or sub_args.apk_file |
| if apk_prefix: |
| # Allow either .minimal.apks or just .apks. |
| apk_prefix = apk_prefix.replace('.minimal.apks', '.aab') |
| apk_prefix = apk_prefix.replace('.apks', '.aab') |
| |
| mapping_path = None |
| if analyze_dex: |
| mapping_path = _DeduceMappingPath(sub_args.mapping_file, apk_prefix) |
| resources_pathmap_path = _DeducePathmapPath(sub_args.resources_pathmap_file, |
| apk_prefix) |
| apk_spec = None |
| if apk_prefix: |
| apk_spec = ApkSpec(apk_path=apk_path, |
| minimal_apks_path=sub_args.minimal_apks_file, |
| mapping_path=mapping_path, |
| resources_pathmap_path=resources_pathmap_path, |
| split_name=split_name) |
| size_info_prefix = os.path.join(top_args.output_directory, 'size-info', |
| os.path.basename(apk_prefix)) |
| if analyze_res: |
| apk_spec.size_info_prefix = size_info_prefix |
| res_info_path = apk_spec.size_info_prefix + '.res.info' |
| if not os.path.exists(res_info_path): |
| on_config_error('File not found: ' + res_info_path) |
| apk_spec.analyze_dex = analyze_dex |
| apk_spec.track_string_literals = not (top_args.no_string_literals |
| or sub_args.no_string_literals) |
| apk_spec.default_component = json_config.DefaultComponentForSplit( |
| split_name) |
| apk_spec.path_defaults = json_config.ApkPathDefaults() |
| |
| pak_spec = None |
| apk_pak_paths = None |
| apk_infolist = None |
| if apk_spec: |
| apk_infolist = apk_file_manager.InfoList(apk_path) |
| apk_pak_paths = [ |
| f.filename for f in apk_infolist |
| if archive_util.RemoveAssetSuffix(f.filename).endswith('.pak') |
| ] |
| |
| if analyze_pak and (apk_pak_paths or sub_args.pak_files): |
| if apk_spec: |
| pak_info_path = size_info_prefix + '.pak.info' |
| else: |
| pak_info_path = sub_args.pak_info_file |
| if pak_info_path and not os.path.exists(pak_info_path): |
| on_config_error(f'File not found: {pak_info_file}. ' |
| 'Ensure is_official_build=true, or use --native-only') |
| |
| pak_spec = PakSpec(pak_paths=sub_args.pak_files, |
| pak_info_path=pak_info_path, |
| apk_pak_paths=apk_pak_paths) |
| |
| if analyze_native: |
| # Allow top-level --abi-filter to override values set in .ssargs. |
| abi_filters = top_args.abi_filters or sub_args.abi_filters |
| aux_elf_file = sub_args.aux_elf_file |
| aux_map_file = sub_args.aux_map_file |
| if split_name not in (None, 'base'): |
| aux_elf_file = None |
| aux_map_file = None |
| |
| auto_abi_filters = not abi_filters and split_name == 'base' |
| abi_filters, native_specs = _CreateNativeSpecs( |
| tentative_output_dir=top_args.output_directory, |
| symbols_dir=sub_args.symbols_dir, |
| apk_infolist=apk_infolist, |
| elf_path=sub_args.elf_file or aux_elf_file, |
| map_path=sub_args.map_file or aux_map_file, |
| abi_filters=abi_filters, |
| auto_abi_filters=auto_abi_filters, |
| track_string_literals=not (top_args.no_string_literals |
| or sub_args.no_string_literals), |
| ignore_linker_map=(top_args.ignore_linker_map |
| or sub_args.ignore_linker_map), |
| json_config=json_config, |
| on_config_error=on_config_error) |
| |
| # For app bundles, use a consistent ABI for all splits. |
| if auto_abi_filters: |
| top_args.abi_filters = abi_filters |
| else: |
| native_specs = [] |
| |
| ret = [ |
| ContainerSpec(container_name=base_container_name, |
| apk_spec=apk_spec, |
| pak_spec=pak_spec, |
| native_spec=None, |
| source_directory=sub_args.source_directory, |
| output_directory=sub_args.output_directory) |
| ] |
| if apk_spec is None: |
| # Special case for when pointed at a single ELF, use just one container. |
| assert len(native_specs) <= 1 |
| ret[0].native_spec = native_specs[0] if native_specs else None |
| else: |
| apk_spec.ignore_apk_paths.update(s.apk_so_path for s in native_specs) |
| if pak_spec and pak_spec.apk_pak_paths: |
| apk_spec.ignore_apk_paths.update(pak_spec.apk_pak_paths) |
| if apk_spec.analyze_dex: |
| apk_spec.ignore_apk_paths.update(i.filename for i in apk_infolist |
| if i.filename.endswith('.dex')) |
| apk_spec.ignore_apk_paths.add(apk.RESOURCES_ARSC_FILE) |
| |
| for native_spec in native_specs: |
| so_name = posixpath.basename(native_spec.apk_so_path) |
| abi = posixpath.basename(posixpath.dirname(native_spec.apk_so_path)) |
| container_name = f'{base_container_name}/{so_name} ({abi})' |
| # Use same apk_spec so that all containers for the apk_spec can be found. |
| ret.append( |
| ContainerSpec(container_name=container_name, |
| apk_spec=apk_spec, |
| pak_spec=None, |
| native_spec=native_spec, |
| source_directory=sub_args.source_directory, |
| output_directory=sub_args.output_directory)) |
| return ret |
| |
| |
| def _IsOnDemand(apk_path): |
| # Check if the manifest specifies whether or not to extract native libs. |
| output = subprocess.check_output([ |
| path_util.GetAapt2Path(), 'dump', 'xmltree', '--file', |
| 'AndroidManifest.xml', apk_path |
| ]).decode('ascii') |
| |
| def parse_attr(namespace, name): |
| # A: http://schemas.android.com/apk/res/android:isFeatureSplit(0x...)=true |
| # A: http://schemas.android.com/apk/distribution:onDemand=true |
| m = re.search(f'A: (?:.*?/{namespace}:)?{name}' + r'(?:\(.*?\))?=(\w+)', |
| output) |
| return m and m.group(1) == 'true' |
| |
| is_feature_split = parse_attr('android', 'isFeatureSplit') |
| # Can use <dist:on-demand>, or <module dist:onDemand="true">. |
| on_demand = parse_attr('distribution', 'onDemand') or 'on-demand' in output |
| on_demand = bool(on_demand and is_feature_split) |
| |
| return on_demand |
| |
| |
| def _CreateAllContainerSpecs(apk_file_manager, top_args, json_config, |
| on_config_error): |
| main_file = _IdentifyInputFile(top_args, on_config_error) |
| if top_args.no_output_directory: |
| top_args.output_directory = None |
| else: |
| output_directory_finder = path_util.OutputDirectoryFinder( |
| value=top_args.output_directory, |
| any_path_within_output_directory=main_file) |
| top_args.output_directory = output_directory_finder.Finalized() |
| |
| if not top_args.source_directory: |
| top_args.source_directory = path_util.GetSrcRootFromOutputDirectory( |
| top_args.output_directory) |
| assert top_args.source_directory |
| |
| if top_args.ssargs_file: |
| sub_args_list = _ReadMultipleArgsFromFile(top_args.ssargs_file, |
| on_config_error) |
| else: |
| sub_args_list = [top_args] |
| |
| # Do a quick first pass to ensure inputs have been built. |
| for sub_args in sub_args_list: |
| main_file = _IdentifyInputFile(sub_args, on_config_error) |
| if not os.path.exists(main_file): |
| raise Exception('Input does not exist: ' + main_file) |
| |
| # Each element in |sub_args_list| specifies a container. |
| ret = [] |
| for sub_args in sub_args_list: |
| main_file = _IdentifyInputFile(sub_args, on_config_error) |
| if hasattr(sub_args, 'name'): |
| container_name = sub_args.name |
| else: |
| container_name = os.path.basename(main_file) |
| if set(container_name) & set('<>?'): |
| parser.error('Container name cannot have characters in "<>?"') |
| |
| |
| if sub_args.minimal_apks_file: |
| split_names = apk_file_manager.ExtractSplits(sub_args.minimal_apks_file) |
| for split_name in split_names: |
| ret += _CreateContainerSpecs(apk_file_manager, |
| top_args, |
| sub_args, |
| json_config, |
| container_name, |
| on_config_error, |
| split_name=split_name) |
| else: |
| ret += _CreateContainerSpecs(apk_file_manager, top_args, sub_args, |
| json_config, container_name, on_config_error) |
| all_names = [c.container_name for c in ret] |
| assert len(set(all_names)) == len(all_names), \ |
| 'Found duplicate container names: ' + '\n'.join(sorted(all_names)) |
| |
| return ret |
| |
| |
| def _FilterContainerSpecs(container_specs, container_re=None): |
| ret = [] |
| seen_container_names = set() |
| for container_spec in container_specs: |
| container_name = container_spec.container_name |
| if container_name in seen_container_names: |
| raise ValueError('Duplicate container name: {}'.format(container_name)) |
| seen_container_names.add(container_name) |
| |
| if container_re and not container_re.search(container_name): |
| logging.info('Skipping filtered container %s', container_name) |
| continue |
| ret.append(container_spec) |
| return ret |
| |
| |
| def CreateSizeInfo(container_specs, build_config, json_config, |
| apk_file_manager): |
| def sort_key(container_spec): |
| # Native containers come first to ensure pak_id_map is populated before |
| # any pak_spec is encountered. |
| if container_spec.native_spec: |
| # Do the most complicated container first, since its most likely to fail. |
| if container_spec.native_spec.algorithm == 'linker_map': |
| native_key = 0 |
| elif container_spec.native_spec.algorithm == 'dwarf': |
| native_key = 1 |
| else: |
| native_key = 2 |
| else: |
| native_key = 3 |
| return (native_key, container_spec.container_name) |
| |
| container_specs.sort(key=sort_key) |
| |
| dex_containers = [ |
| c for c in container_specs |
| if not c.native_spec and c.apk_spec and c.apk_spec.analyze_dex |
| ] |
| # Running ApkAnalyzer concurrently saves ~30 seconds for Monochrome.apks. |
| apk_analyzer_results = {} |
| if dex_containers: |
| logging.info('Kicking of ApkAnalyzer for %d .apk files', |
| len(dex_containers)) |
| for container_spec in dex_containers: |
| apk_analyzer_results[container_spec.container_name] = ( |
| apkanalyzer.RunApkAnalyzerAsync(container_spec.apk_spec.apk_path, |
| container_spec.apk_spec.mapping_path)) |
| |
| ninja_containers = [ |
| c for c in container_specs |
| if c.native_spec and c.output_directory and c.native_spec.map_path |
| ] |
| ninja_source_mapper = None |
| if ninja_containers: |
| elf_paths_to_find_inputs_for = [ |
| c.native_spec.combined_elf_path or c.native_spec.elf_path |
| for c in ninja_containers |
| if c.native_spec.combined_elf_path or c.native_spec.elf_path |
| ] |
| ninja_source_mapper = native.ParseNinjaFiles( |
| ninja_containers[0].output_directory, elf_paths_to_find_inputs_for) |
| |
| raw_symbols_list = [] |
| pak_id_map = pakfile.PakIdMap() |
| dex_deobfuscator_cache = dex_deobfuscate.CachedDexDeobfuscators() |
| for container_spec in container_specs: |
| raw_symbols = _CreateContainerSymbols(container_spec, apk_file_manager, |
| apk_analyzer_results, |
| ninja_source_mapper, pak_id_map, |
| json_config.ComponentOverrides(), |
| dex_deobfuscator_cache) |
| assert raw_symbols, f'{container_spec.container_name} had no symbols.' |
| raw_symbols_list.append(raw_symbols) |
| |
| # Normalize names before sorting. |
| logging.info('Normalizing symbol names') |
| for raw_symbols in raw_symbols_list: |
| _NormalizeNames(raw_symbols) |
| |
| # Sorting must happen after normalization. |
| logging.info('Sorting symbols') |
| for raw_symbols in raw_symbols_list: |
| file_format.SortSymbols(raw_symbols) |
| |
| logging.debug('Accumulating symbols') |
| # Containers should always have at least one symbol. |
| container_list = [syms[0].container for syms in raw_symbols_list] |
| all_raw_symbols = [] |
| for raw_symbols in raw_symbols_list: |
| all_raw_symbols += raw_symbols |
| |
| file_format.CalculatePadding(all_raw_symbols) |
| |
| return models.SizeInfo(build_config, container_list, all_raw_symbols) |
| |
| |
| def Run(top_args, on_config_error): |
| path_util.CheckLlvmToolsAvailable() |
| |
| if not top_args.size_file.endswith('.size'): |
| on_config_error('size_file must end with .size') |
| if top_args.check_data_quality: |
| start_time = time.time() |
| |
| container_re = None |
| if top_args.container_filter: |
| try: |
| container_re = re.compile(top_args.container_filter) |
| except Exception as e: |
| on_config_error(f'Bad --container-filter input: {e}') |
| |
| json_config_path = top_args.json_config |
| if not json_config_path: |
| json_config_path = path_util.GetDefaultJsonConfigPath() |
| logging.info('Using --json-config=%s', json_config_path) |
| json_config = json_config_parser.Parse(json_config_path, on_config_error) |
| |
| with zip_util.ApkFileManager() as apk_file_manager: |
| container_specs = _CreateAllContainerSpecs(apk_file_manager, top_args, |
| json_config, on_config_error) |
| container_specs = _FilterContainerSpecs(container_specs, container_re) |
| |
| build_config = CreateBuildConfig(top_args.output_directory, |
| top_args.source_directory, |
| url=top_args.url, |
| title=top_args.title) |
| size_info = CreateSizeInfo(container_specs, build_config, json_config, |
| apk_file_manager) |
| |
| if logging.getLogger().isEnabledFor(logging.DEBUG): |
| for line in data_quality.DescribeSizeInfoCoverage(size_info): |
| logging.debug(line) |
| logging.info('Recorded info for %d symbols', len(size_info.raw_symbols)) |
| for container in size_info.containers: |
| logging.info('Recording metadata: \n %s', |
| '\n '.join(describe.DescribeDict(container.metadata))) |
| |
| logging.info('Saving result to %s', top_args.size_file) |
| file_format.SaveSizeInfo(size_info, top_args.size_file) |
| size_in_mb = os.path.getsize(top_args.size_file) / 1024.0 / 1024.0 |
| logging.info('Done. File size is %.2fMiB.', size_in_mb) |
| |
| if top_args.check_data_quality: |
| logging.info('Checking data quality') |
| data_quality.CheckDataQuality(size_info, not top_args.no_string_literals) |
| duration = (time.time() - start_time) / 60 |
| if duration > 10: |
| raise data_quality.QualityCheckError( |
| 'Command should not take longer than 10 minutes.' |
| ' Took {:.1f} minutes.'.format(duration)) |