| # Copyright 2018 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """Runs apkanalyzer to parse dex files in an apk. |
| |
| Assumes that apk_path.mapping and apk_path.jar.info is available. |
| """ |
| |
| import collections |
| import logging |
| import os |
| import posixpath |
| import re |
| import subprocess |
| import zipfile |
| |
| import models |
| import path_util |
| |
| |
| _TOTAL_NODE_NAME = '<TOTAL>' |
| _OUTLINED_PREFIX = '$$Outlined$' |
| |
| |
| def _ParseJarInfoFile(file_name): |
| with open(file_name, 'r') as info: |
| source_map = dict() |
| for line in info: |
| package_path, file_path = line.strip().split(',', 1) |
| source_map[package_path] = file_path |
| return source_map |
| |
| |
| def _RunApkAnalyzer(apk_path, mapping_path): |
| args = [path_util.GetApkAnalyzerPath(), 'dex', 'packages', apk_path] |
| if mapping_path and os.path.exists(mapping_path): |
| args.extend(['--proguard-mappings', mapping_path]) |
| env = os.environ.copy() |
| env['JAVA_HOME'] = path_util.GetJavaHome() |
| output = subprocess.check_output(args, env=env).decode('ascii') |
| data = [] |
| for line in output.splitlines(): |
| try: |
| vals = line.split() |
| # We want to name these columns so we know exactly which is which. |
| # pylint: disable=unused-variable |
| node_type, state, defined_methods, referenced_methods, size, name = ( |
| vals[0], vals[1], vals[2], vals[3], vals[4], vals[5:]) |
| data.append((node_type, ' '.join(name), int(size))) |
| except Exception: |
| logging.error('Problem line was: %s', line) |
| raise |
| return data |
| |
| |
| def _ExpectedDexTotalSize(apk_path): |
| dex_total = 0 |
| with zipfile.ZipFile(apk_path) as z: |
| for zip_info in z.infolist(): |
| if not zip_info.filename.endswith('.dex'): |
| continue |
| dex_total += zip_info.file_size |
| return dex_total |
| |
| |
| # VisibleForTesting |
| def UndoHierarchicalSizing(data): |
| """Subtracts child node sizes from parent nodes. |
| |
| Note that inner classes |
| should be considered as siblings rather than child nodes. |
| |
| Example nodes: |
| [ |
| ('P', '<TOTAL>', 37), |
| ('P', 'org', 32), |
| ('P', 'org.chromium', 32), |
| ('C', 'org.chromium.ClassA', 14), |
| ('M', 'org.chromium.ClassA void methodA()', 10), |
| ('C', 'org.chromium.ClassA$Proxy', 8), |
| ] |
| |
| Processed nodes: |
| [ |
| ('<TOTAL>', 15), |
| ('org.chromium.ClassA', 4), |
| ('org.chromium.ClassA void methodA()', 10), |
| ('org.chromium.ClassA$Proxy', 8), |
| ] |
| """ |
| num_nodes = len(data) |
| nodes = [] |
| |
| def process_node(start_idx): |
| assert start_idx < num_nodes, 'Attempting to parse beyond data array.' |
| node_type, name, size = data[start_idx] |
| total_child_size = 0 |
| next_idx = start_idx + 1 |
| name_len = len(name) |
| while next_idx < num_nodes: |
| next_name = data[next_idx][1] |
| if name == _TOTAL_NODE_NAME or ( |
| len(next_name) > name_len and next_name.startswith(name) |
| and next_name[name_len] in '. '): |
| # Child node |
| child_next_idx, child_node_size = process_node(next_idx) |
| next_idx = child_next_idx |
| total_child_size += child_node_size |
| else: |
| # Sibling or higher nodes |
| break |
| |
| # Apkanalyzer may overcount private method sizes at times. Unfortunately |
| # the fix is not in the version we have in Android SDK Tools. For now we |
| # prefer to undercount child sizes since the parent's size is more |
| # accurate. This means the sum of child nodes may exceed its immediate |
| # parent node's size. |
| total_child_size = min(size, total_child_size) |
| # TODO(wnwen): Add assert back once dexlib2 2.2.5 is released and rolled. |
| #assert total_child_size <= size, ( |
| # 'Child node total size exceeded parent node total size') |
| |
| node_size = size - total_child_size |
| # It is valid to have a package and a class with the same name. |
| # To avoid having two symbols with the same name in these cases, do not |
| # create symbols for packages (which have no size anyways). |
| if node_type == 'P' and node_size != 0 and name != _TOTAL_NODE_NAME: |
| logging.warning('Unexpected java package that takes up size: %d, %s', |
| node_size, name) |
| if node_type != 'P' or node_size != 0: |
| nodes.append((node_type, name, node_size)) |
| return next_idx, size |
| |
| idx = 0 |
| while idx < num_nodes: |
| idx = process_node(idx)[0] |
| return nodes |
| |
| |
| def _TruncateFrom(value, delimiter, rfind=False): |
| idx = value.rfind(delimiter) if rfind else value.find(delimiter) |
| if idx != -1: |
| return value[:idx] |
| return value |
| |
| |
| # Visible for testing. |
| class LambdaNormalizer: |
| def __init__(self): |
| self._lambda_by_class_counter = collections.defaultdict(int) |
| self._lambda_name_to_nested_number = {} |
| |
| def _GetLambdaName(self, class_path, base_name, prefix=''): |
| lambda_number = self._lambda_name_to_nested_number.get(class_path) |
| if lambda_number is None: |
| # First time we've seen this lambda, increment nested class count. |
| lambda_number = self._lambda_by_class_counter[base_name] |
| self._lambda_name_to_nested_number[class_path] = lambda_number |
| self._lambda_by_class_counter[base_name] = lambda_number + 1 |
| return prefix + base_name + '$$Lambda$' + str(lambda_number) |
| |
| def Normalize(self, class_path, full_name): |
| # Make d8 desugared lambdas look the same as Desugar ones. |
| # Desugar lambda: org.Promise$Nested1$$Lambda$0 |
| # 1) Need to prefix with proper class name so that they will show as nested. |
| # 2) Need to suffix with number so that they diff better. |
| # Original name will be kept as "object_path". |
| # See tests for a more comprehensive list of what d8 currently generates. |
| |
| # Map nested classes to outer class. |
| outer_class = _TruncateFrom(class_path, '$') |
| |
| # $$ is the convention for a synthetic class and all known desugared lambda |
| # classes have 'Lambda' in the synthetic part of its name. If it doesn't |
| # then it's almost certainly not a desugared lambda class. |
| if 'Lambda' not in class_path[class_path.find('$$'):]: |
| return outer_class, full_name |
| |
| # Example: package.AnimatedProgressBar$$InternalSyntheticLambda$3$81073ff6$0 |
| # Example: package.Class$$Lambda$2$$InternalSyntheticOutline$8$cbe941dd782$0 |
| match = re.fullmatch( |
| # The base_name group needs to be non-greedy/minimal (using +?) since we |
| # want it to not include $$Lambda$28 when present. |
| r'(?P<base_name>.+?)(\$\$Lambda\$\d+)?' |
| r'\$\$InternalSynthetic[a-zA-Z0-9_]+' |
| r'\$\d+\$[0-9a-f]+\$\d+', |
| class_path) |
| if match: |
| new_name = self._GetLambdaName(class_path=class_path, |
| base_name=match.group('base_name')) |
| return outer_class, full_name.replace(class_path, new_name) |
| # Example: AnimatedProgressBar$$ExternalSyntheticLambda0 |
| # Example: AutofillAssistant$$Lambda$2$$ExternalSyntheticOutline0 |
| # Example: ContextMenuCoord$$Lambda$2$$ExternalSyntheticThrowCCEIfNotNull0 |
| match = re.fullmatch( |
| r'(?P<base_name>.+?)(\$\$Lambda\$\d+)?' |
| r'\$\$ExternalSynthetic[a-zA-Z0-9_]+', class_path) |
| if match: |
| new_name = self._GetLambdaName(class_path=class_path, |
| base_name=match.group('base_name'), |
| prefix=_OUTLINED_PREFIX) |
| return outer_class, full_name.replace(class_path, new_name) |
| # Example: package.FirebaseInstallationsRegistrar$$Lambda$1 |
| match = re.fullmatch(r'(?P<base_name>.+)\$\$Lambda\$\d+', class_path) |
| if match: |
| # Although these are already valid names, re-number them to avoid name |
| # collisions with renamed InternalSyntheticLambdas. |
| new_name = self._GetLambdaName(class_path=class_path, |
| base_name=match.group('base_name')) |
| return outer_class, full_name.replace(class_path, new_name) |
| # Example: org.-$$Lambda$StackAnimation$Nested1$kjevdDQ8V2zqCrdieLqWLHzk |
| # Assume that the last portion of the name after $ is the hash identifier. |
| match = re.fullmatch( |
| r'(?P<package>.+)-\$\$Lambda\$(?P<class>[^$]+)(?P<nested>.*)\$[^$]+', |
| class_path) |
| if match: |
| package_name = match.group('package') |
| class_name = match.group('class') |
| nested_classes = match.group('nested') |
| base_name = package_name + class_name + nested_classes |
| new_name = self._GetLambdaName(class_path=class_path, base_name=base_name) |
| outer_class = package_name + class_name |
| return outer_class, full_name.replace(class_path, new_name) |
| assert False, ( |
| 'No valid match for new lambda name format: ' + class_path + '\n' |
| 'Please update https://crbug.com/1208385 with this error so we can ' |
| 'update the lambda normalization code.') |
| |
| |
| # Visible for testing. |
| def CreateDexSymbol(name, size, source_map, lambda_normalizer): |
| parts = name.split(' ') # (class_name, return_type, method_name) |
| new_package = parts[0] |
| |
| if new_package == _TOTAL_NODE_NAME: |
| return None |
| |
| # Make d8 desugared lambdas look the same as Desugar ones. |
| outer_class, name = lambda_normalizer.Normalize(new_package, name) |
| |
| # Look for class merging. |
| old_package = new_package |
| # len(parts) == 2 for class nodes. |
| if len(parts) > 2: |
| method = parts[2] |
| # last_idx == -1 for fields, which is fine. |
| last_idx = method.find('(') |
| last_idx = method.rfind('.', 0, last_idx) |
| if last_idx != -1: |
| old_package = method[:last_idx] |
| outer_class, name = lambda_normalizer.Normalize(old_package, name) |
| |
| source_path = source_map.get(outer_class, '') |
| # Create a special meta-directory for outlined lambdas to easily monitor their |
| # total size and spot regressions. |
| if name.startswith(_OUTLINED_PREFIX): |
| object_path = posixpath.join(models.APK_PREFIX_PATH, 'Outlined', |
| *old_package.split('.')) |
| else: |
| object_path = posixpath.join(models.APK_PREFIX_PATH, |
| *old_package.split('.')) |
| if name.endswith(')'): |
| section_name = models.SECTION_DEX_METHOD |
| else: |
| section_name = models.SECTION_DEX |
| |
| return models.Symbol(section_name, |
| size, |
| full_name=name, |
| object_path=object_path, |
| source_path=source_path) |
| |
| |
| def CreateDexSymbols(apk_path, mapping_path, size_info_prefix): |
| source_map = _ParseJarInfoFile(size_info_prefix + '.jar.info') |
| |
| nodes = _RunApkAnalyzer(apk_path, mapping_path) |
| nodes = UndoHierarchicalSizing(nodes) |
| |
| dex_expected_size = _ExpectedDexTotalSize(apk_path) |
| total_node_size = sum([x[2] for x in nodes]) |
| # TODO(agrieve): Figure out why this log is triggering for |
| # ChromeModernPublic.apk (https://crbug.com/851535). |
| # Reporting: dex_expected_size=6546088 total_node_size=6559549 |
| if dex_expected_size < total_node_size: |
| logging.error( |
| 'Node size too large, check for node processing errors. ' |
| 'dex_expected_size=%d total_node_size=%d', dex_expected_size, |
| total_node_size) |
| # Use (DEX_METHODS, DEX) buckets to speed up sorting. |
| symbols = ([], []) |
| lambda_normalizer = LambdaNormalizer() |
| for _, name, node_size in nodes: |
| symbol = CreateDexSymbol(name, node_size, source_map, lambda_normalizer) |
| if symbol: |
| symbols[int(symbol.section_name is models.SECTION_DEX)].append(symbol) |
| |
| symbols[0].sort(key=lambda s: s.full_name) |
| symbols[1].sort(key=lambda s: s.full_name) |
| symbols[0].extend(symbols[1]) |
| return symbols[0] |