tools/binary_size/libsupersize/apkanalyzer.py - chromium/src - Git at Google

 # Copyright 2018 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 """Runs apkanalyzer to parse dex files in an apk.

 Assumes that apk_path.mapping and apk_path.jar.info is available.
 """

 import collections
 import logging
 import os
 import posixpath
 import re
 import subprocess
 import zipfile

 import models
 import path_util


 _TOTAL_NODE_NAME = '<TOTAL>'
 _OUTLINED_PREFIX = '$$Outlined$'


 def _ParseJarInfoFile(file_name):
   with open(file_name, 'r') as info:
     source_map = dict()
     for line in info:
       package_path, file_path = line.strip().split(',', 1)
       source_map[package_path] = file_path
   return source_map


 def _RunApkAnalyzer(apk_path, mapping_path):
   args = [path_util.GetApkAnalyzerPath(), 'dex', 'packages', apk_path]
   if mapping_path and os.path.exists(mapping_path):
     args.extend(['--proguard-mappings', mapping_path])
   env = os.environ.copy()
   env['JAVA_HOME'] = path_util.GetJavaHome()
   output = subprocess.check_output(args, env=env).decode('ascii')
   data = []
   for line in output.splitlines():
     try:
       vals = line.split()
       # We want to name these columns so we know exactly which is which.
       # pylint: disable=unused-variable
       node_type, state, defined_methods, referenced_methods, size, name = (
           vals[0], vals[1], vals[2], vals[3], vals[4], vals[5:])
       data.append((node_type, ' '.join(name), int(size)))
     except Exception:
       logging.error('Problem line was: %s', line)
       raise
   return data


 def _ExpectedDexTotalSize(apk_path):
   dex_total = 0
   with zipfile.ZipFile(apk_path) as z:
     for zip_info in z.infolist():
       if not zip_info.filename.endswith('.dex'):
         continue
       dex_total += zip_info.file_size
   return dex_total


 # VisibleForTesting
 def UndoHierarchicalSizing(data):
   """Subtracts child node sizes from parent nodes.

   Note that inner classes
   should be considered as siblings rather than child nodes.

   Example nodes:
     [
       ('P', '<TOTAL>', 37),
       ('P', 'org', 32),
       ('P', 'org.chromium', 32),
       ('C', 'org.chromium.ClassA', 14),
       ('M', 'org.chromium.ClassA void methodA()', 10),
       ('C', 'org.chromium.ClassA$Proxy', 8),
     ]

   Processed nodes:
     [
       ('<TOTAL>', 15),
       ('org.chromium.ClassA', 4),
       ('org.chromium.ClassA void methodA()', 10),
       ('org.chromium.ClassA$Proxy', 8),
     ]
   """
   num_nodes = len(data)
   nodes = []

   def process_node(start_idx):
     assert start_idx < num_nodes, 'Attempting to parse beyond data array.'
     node_type, name, size = data[start_idx]
     total_child_size = 0
     next_idx = start_idx + 1
     name_len = len(name)
     while next_idx < num_nodes:
       next_name = data[next_idx][1]
       if name == _TOTAL_NODE_NAME or (
           len(next_name) > name_len and next_name.startswith(name)
           and next_name[name_len] in '. '):
         # Child node
         child_next_idx, child_node_size = process_node(next_idx)
         next_idx = child_next_idx
         total_child_size += child_node_size
       else:
         # Sibling or higher nodes
         break

     # Apkanalyzer may overcount private method sizes at times. Unfortunately
     # the fix is not in the version we have in Android SDK Tools. For now we
     # prefer to undercount child sizes since the parent's size is more
     # accurate. This means the sum of child nodes may exceed its immediate
     # parent node's size.
     total_child_size = min(size, total_child_size)
     # TODO(wnwen): Add assert back once dexlib2 2.2.5 is released and rolled.
     #assert total_child_size <= size, (
     #    'Child node total size exceeded parent node total size')

     node_size = size - total_child_size
     # It is valid to have a package and a class with the same name.
     # To avoid having two symbols with the same name in these cases, do not
     # create symbols for packages (which have no size anyways).
     if node_type == 'P' and node_size != 0 and name != _TOTAL_NODE_NAME:
       logging.warning('Unexpected java package that takes up size: %d, %s',
                       node_size, name)
     if node_type != 'P' or node_size != 0:
       nodes.append((node_type, name, node_size))
     return next_idx, size

   idx = 0
   while idx < num_nodes:
     idx = process_node(idx)[0]
   return nodes


 def _TruncateFrom(value, delimiter, rfind=False):
   idx = value.rfind(delimiter) if rfind else value.find(delimiter)
   if idx != -1:
     return value[:idx]
   return value


 # Visible for testing.
 class LambdaNormalizer:
   def __init__(self):
     self._lambda_by_class_counter = collections.defaultdict(int)
     self._lambda_name_to_nested_number = {}

   def _GetLambdaName(self, class_path, base_name, prefix=''):
     lambda_number = self._lambda_name_to_nested_number.get(class_path)
     if lambda_number is None:
       # First time we've seen this lambda, increment nested class count.
       lambda_number = self._lambda_by_class_counter[base_name]
       self._lambda_name_to_nested_number[class_path] = lambda_number
       self._lambda_by_class_counter[base_name] = lambda_number + 1
     return prefix + base_name + '$$Lambda$' + str(lambda_number)

   def Normalize(self, class_path, full_name):
     # Make d8 desugared lambdas look the same as Desugar ones.
     # Desugar lambda: org.Promise$Nested1$$Lambda$0
     # 1) Need to prefix with proper class name so that they will show as nested.
     # 2) Need to suffix with number so that they diff better.
     # Original name will be kept as "object_path".
     # See tests for a more comprehensive list of what d8 currently generates.

     # Map nested classes to outer class.
     outer_class = _TruncateFrom(class_path, '$')

     # $$ is the convention for a synthetic class and all known desugared lambda
     # classes have 'Lambda' in the synthetic part of its name. If it doesn't
     # then it's almost certainly not a desugared lambda class.
     if 'Lambda' not in class_path[class_path.find('$$'):]:
       return outer_class, full_name

     # Example: package.AnimatedProgressBar$$InternalSyntheticLambda$3$81073ff6$0
     # Example: package.Class$$Lambda$2$$InternalSyntheticOutline$8$cbe941dd782$0
     match = re.fullmatch(
         # The base_name group needs to be non-greedy/minimal (using +?) since we
         # want it to not include $$Lambda$28 when present.
         r'(?P<base_name>.+?)(\$\$Lambda\$\d+)?'
         r'\$\$InternalSynthetic[a-zA-Z0-9_]+'
         r'\$\d+\$[0-9a-f]+\$\d+',
         class_path)
     if match:
       new_name = self._GetLambdaName(class_path=class_path,
                                      base_name=match.group('base_name'))
       return outer_class, full_name.replace(class_path, new_name)
     # Example: AnimatedProgressBar$$ExternalSyntheticLambda0
     # Example: AutofillAssistant$$Lambda$2$$ExternalSyntheticOutline0
     # Example: ContextMenuCoord$$Lambda$2$$ExternalSyntheticThrowCCEIfNotNull0
     match = re.fullmatch(
         r'(?P<base_name>.+?)(\$\$Lambda\$\d+)?'
         r'\$\$ExternalSynthetic[a-zA-Z0-9_]+', class_path)
     if match:
       new_name = self._GetLambdaName(class_path=class_path,
                                      base_name=match.group('base_name'),
                                      prefix=_OUTLINED_PREFIX)
       return outer_class, full_name.replace(class_path, new_name)
     # Example: package.FirebaseInstallationsRegistrar$$Lambda$1
     match = re.fullmatch(r'(?P<base_name>.+)\$\$Lambda\$\d+', class_path)
     if match:
       # Although these are already valid names, re-number them to avoid name
       # collisions with renamed InternalSyntheticLambdas.
       new_name = self._GetLambdaName(class_path=class_path,
                                      base_name=match.group('base_name'))
       return outer_class, full_name.replace(class_path, new_name)
     # Example: org.-$$Lambda$StackAnimation$Nested1$kjevdDQ8V2zqCrdieLqWLHzk
     # Assume that the last portion of the name after $ is the hash identifier.
     match = re.fullmatch(
         r'(?P<package>.+)-\$\$Lambda\$(?P<class>[^$]+)(?P<nested>.*)\$[^$]+',
         class_path)
     if match:
       package_name = match.group('package')
       class_name = match.group('class')
       nested_classes = match.group('nested')
       base_name = package_name + class_name + nested_classes
       new_name = self._GetLambdaName(class_path=class_path, base_name=base_name)
       outer_class = package_name + class_name
       return outer_class, full_name.replace(class_path, new_name)
     assert False, (
         'No valid match for new lambda name format: ' + class_path + '\n'
         'Please update https://crbug.com/1208385 with this error so we can '
         'update the lambda normalization code.')


 # Visible for testing.
 def CreateDexSymbol(name, size, source_map, lambda_normalizer):
   parts = name.split(' ')  # (class_name, return_type, method_name)
   new_package = parts[0]

   if new_package == _TOTAL_NODE_NAME:
     return None

   # Make d8 desugared lambdas look the same as Desugar ones.
   outer_class, name = lambda_normalizer.Normalize(new_package, name)

   # Look for class merging.
   old_package = new_package
   # len(parts) == 2 for class nodes.
   if len(parts) > 2:
     method = parts[2]
     # last_idx == -1 for fields, which is fine.
     last_idx = method.find('(')
     last_idx = method.rfind('.', 0, last_idx)
     if last_idx != -1:
       old_package = method[:last_idx]
       outer_class, name = lambda_normalizer.Normalize(old_package, name)

   source_path = source_map.get(outer_class, '')
   # Create a special meta-directory for outlined lambdas to easily monitor their
   # total size and spot regressions.
   if name.startswith(_OUTLINED_PREFIX):
     object_path = posixpath.join(models.APK_PREFIX_PATH, 'Outlined',
                                  *old_package.split('.'))
   else:
     object_path = posixpath.join(models.APK_PREFIX_PATH,
                                  *old_package.split('.'))
   if name.endswith(')'):
     section_name = models.SECTION_DEX_METHOD
   else:
     section_name = models.SECTION_DEX

   return models.Symbol(section_name,
                        size,
                        full_name=name,
                        object_path=object_path,
                        source_path=source_path)


 def CreateDexSymbols(apk_path, mapping_path, size_info_prefix):
   source_map = _ParseJarInfoFile(size_info_prefix + '.jar.info')

   nodes = _RunApkAnalyzer(apk_path, mapping_path)
   nodes = UndoHierarchicalSizing(nodes)

   dex_expected_size = _ExpectedDexTotalSize(apk_path)
   total_node_size = sum([x[2] for x in nodes])
   # TODO(agrieve): Figure out why this log is triggering for
   #     ChromeModernPublic.apk (https://crbug.com/851535).
   # Reporting: dex_expected_size=6546088 total_node_size=6559549
   if dex_expected_size < total_node_size:
     logging.error(
       'Node size too large, check for node processing errors. '
       'dex_expected_size=%d total_node_size=%d', dex_expected_size,
       total_node_size)
   # Use (DEX_METHODS, DEX) buckets to speed up sorting.
   symbols = ([], [])
   lambda_normalizer = LambdaNormalizer()
   for _, name, node_size in nodes:
     symbol = CreateDexSymbol(name, node_size, source_map, lambda_normalizer)
     if symbol:
       symbols[int(symbol.section_name is models.SECTION_DEX)].append(symbol)

   symbols[0].sort(key=lambda s: s.full_name)
   symbols[1].sort(key=lambda s: s.full_name)
   symbols[0].extend(symbols[1])
   return symbols[0]
	# Copyright 2018 The Chromium Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	"""Runs apkanalyzer to parse dex files in an apk.

	Assumes that apk_path.mapping and apk_path.jar.info is available.
	"""

	import collections
	import logging
	import os
	import posixpath
	import re
	import subprocess
	import zipfile

	import models
	import path_util


	_TOTAL_NODE_NAME = '<TOTAL>'
	_OUTLINED_PREFIX = '$$Outlined$'


	def _ParseJarInfoFile(file_name):
	with open(file_name, 'r') as info:
	source_map = dict()
	for line in info:
	package_path, file_path = line.strip().split(',', 1)
	source_map[package_path] = file_path
	return source_map


	def _RunApkAnalyzer(apk_path, mapping_path):
	args = [path_util.GetApkAnalyzerPath(), 'dex', 'packages', apk_path]
	if mapping_path and os.path.exists(mapping_path):
	args.extend(['--proguard-mappings', mapping_path])
	env = os.environ.copy()
	env['JAVA_HOME'] = path_util.GetJavaHome()
	output = subprocess.check_output(args, env=env).decode('ascii')
	data = []
	for line in output.splitlines():
	try:
	vals = line.split()
	# We want to name these columns so we know exactly which is which.
	# pylint: disable=unused-variable
	node_type, state, defined_methods, referenced_methods, size, name = (
	vals[0], vals[1], vals[2], vals[3], vals[4], vals[5:])
	data.append((node_type, ' '.join(name), int(size)))
	except Exception:
	logging.error('Problem line was: %s', line)
	raise
	return data


	def _ExpectedDexTotalSize(apk_path):
	dex_total = 0
	with zipfile.ZipFile(apk_path) as z:
	for zip_info in z.infolist():
	if not zip_info.filename.endswith('.dex'):
	continue
	dex_total += zip_info.file_size
	return dex_total


	# VisibleForTesting
	def UndoHierarchicalSizing(data):
	"""Subtracts child node sizes from parent nodes.

	Note that inner classes
	should be considered as siblings rather than child nodes.

	Example nodes:
	[
	('P', '<TOTAL>', 37),
	('P', 'org', 32),
	('P', 'org.chromium', 32),
	('C', 'org.chromium.ClassA', 14),
	('M', 'org.chromium.ClassA void methodA()', 10),
	('C', 'org.chromium.ClassA$Proxy', 8),
	]

	Processed nodes:
	[
	('<TOTAL>', 15),
	('org.chromium.ClassA', 4),
	('org.chromium.ClassA void methodA()', 10),
	('org.chromium.ClassA$Proxy', 8),
	]
	"""
	num_nodes = len(data)
	nodes = []

	def process_node(start_idx):
	assert start_idx < num_nodes, 'Attempting to parse beyond data array.'
	node_type, name, size = data[start_idx]
	total_child_size = 0
	next_idx = start_idx + 1
	name_len = len(name)
	while next_idx < num_nodes:
	next_name = data[next_idx][1]
	if name == _TOTAL_NODE_NAME or (
	len(next_name) > name_len and next_name.startswith(name)
	and next_name[name_len] in '. '):
	# Child node
	child_next_idx, child_node_size = process_node(next_idx)
	next_idx = child_next_idx
	total_child_size += child_node_size
	else:
	# Sibling or higher nodes
	break

	# Apkanalyzer may overcount private method sizes at times. Unfortunately
	# the fix is not in the version we have in Android SDK Tools. For now we
	# prefer to undercount child sizes since the parent's size is more
	# accurate. This means the sum of child nodes may exceed its immediate
	# parent node's size.
	total_child_size = min(size, total_child_size)
	# TODO(wnwen): Add assert back once dexlib2 2.2.5 is released and rolled.
	#assert total_child_size <= size, (
	# 'Child node total size exceeded parent node total size')

	node_size = size - total_child_size
	# It is valid to have a package and a class with the same name.
	# To avoid having two symbols with the same name in these cases, do not
	# create symbols for packages (which have no size anyways).
	if node_type == 'P' and node_size != 0 and name != _TOTAL_NODE_NAME:
	logging.warning('Unexpected java package that takes up size: %d, %s',
	node_size, name)
	if node_type != 'P' or node_size != 0:
	nodes.append((node_type, name, node_size))
	return next_idx, size

	idx = 0
	while idx < num_nodes:
	idx = process_node(idx)[0]
	return nodes


	def _TruncateFrom(value, delimiter, rfind=False):
	idx = value.rfind(delimiter) if rfind else value.find(delimiter)
	if idx != -1:
	return value[:idx]
	return value


	# Visible for testing.
	class LambdaNormalizer:
	def __init__(self):
	self._lambda_by_class_counter = collections.defaultdict(int)
	self._lambda_name_to_nested_number = {}

	def _GetLambdaName(self, class_path, base_name, prefix=''):
	lambda_number = self._lambda_name_to_nested_number.get(class_path)
	if lambda_number is None:
	# First time we've seen this lambda, increment nested class count.
	lambda_number = self._lambda_by_class_counter[base_name]
	self._lambda_name_to_nested_number[class_path] = lambda_number
	self._lambda_by_class_counter[base_name] = lambda_number + 1
	return prefix + base_name + '$$Lambda$' + str(lambda_number)

	def Normalize(self, class_path, full_name):
	# Make d8 desugared lambdas look the same as Desugar ones.
	# Desugar lambda: org.Promise$Nested1$$Lambda$0
	# 1) Need to prefix with proper class name so that they will show as nested.
	# 2) Need to suffix with number so that they diff better.
	# Original name will be kept as "object_path".
	# See tests for a more comprehensive list of what d8 currently generates.

	# Map nested classes to outer class.
	outer_class = _TruncateFrom(class_path, '$')

	# $$ is the convention for a synthetic class and all known desugared lambda
	# classes have 'Lambda' in the synthetic part of its name. If it doesn't
	# then it's almost certainly not a desugared lambda class.
	if 'Lambda' not in class_path[class_path.find('$$'):]:
	return outer_class, full_name

	# Example: package.AnimatedProgressBar$$InternalSyntheticLambda$3$81073ff6$0
	# Example: package.Class$$Lambda$2$$InternalSyntheticOutline$8$cbe941dd782$0
	match = re.fullmatch(
	# The base_name group needs to be non-greedy/minimal (using +?) since we
	# want it to not include $$Lambda$28 when present.
	r'(?P<base_name>.+?)(\$\$Lambda\$\d+)?'
	r'\$\$InternalSynthetic[a-zA-Z0-9_]+'
	r'\$\d+\$[0-9a-f]+\$\d+',
	class_path)
	if match:
	new_name = self._GetLambdaName(class_path=class_path,
	base_name=match.group('base_name'))
	return outer_class, full_name.replace(class_path, new_name)
	# Example: AnimatedProgressBar$$ExternalSyntheticLambda0
	# Example: AutofillAssistant$$Lambda$2$$ExternalSyntheticOutline0
	# Example: ContextMenuCoord$$Lambda$2$$ExternalSyntheticThrowCCEIfNotNull0
	match = re.fullmatch(
	r'(?P<base_name>.+?)(\$\$Lambda\$\d+)?'
	r'\$\$ExternalSynthetic[a-zA-Z0-9_]+', class_path)
	if match:
	new_name = self._GetLambdaName(class_path=class_path,
	base_name=match.group('base_name'),
	prefix=_OUTLINED_PREFIX)
	return outer_class, full_name.replace(class_path, new_name)
	# Example: package.FirebaseInstallationsRegistrar$$Lambda$1
	match = re.fullmatch(r'(?P<base_name>.+)\$\$Lambda\$\d+', class_path)
	if match:
	# Although these are already valid names, re-number them to avoid name
	# collisions with renamed InternalSyntheticLambdas.
	new_name = self._GetLambdaName(class_path=class_path,
	base_name=match.group('base_name'))
	return outer_class, full_name.replace(class_path, new_name)
	# Example: org.-$$Lambda$StackAnimation$Nested1$kjevdDQ8V2zqCrdieLqWLHzk
	# Assume that the last portion of the name after $ is the hash identifier.
	match = re.fullmatch(
	r'(?P<package>.+)-\$\$Lambda\$(?P<class>[^$]+)(?P<nested>.*)\$[^$]+',
	class_path)
	if match:
	package_name = match.group('package')
	class_name = match.group('class')
	nested_classes = match.group('nested')
	base_name = package_name + class_name + nested_classes
	new_name = self._GetLambdaName(class_path=class_path, base_name=base_name)
	outer_class = package_name + class_name
	return outer_class, full_name.replace(class_path, new_name)
	assert False, (
	'No valid match for new lambda name format: ' + class_path + '\n'
	'Please update https://crbug.com/1208385 with this error so we can '
	'update the lambda normalization code.')


	# Visible for testing.
	def CreateDexSymbol(name, size, source_map, lambda_normalizer):
	parts = name.split(' ') # (class_name, return_type, method_name)
	new_package = parts[0]

	if new_package == _TOTAL_NODE_NAME:
	return None

	# Make d8 desugared lambdas look the same as Desugar ones.
	outer_class, name = lambda_normalizer.Normalize(new_package, name)

	# Look for class merging.
	old_package = new_package
	# len(parts) == 2 for class nodes.
	if len(parts) > 2:
	method = parts[2]
	# last_idx == -1 for fields, which is fine.
	last_idx = method.find('(')
	last_idx = method.rfind('.', 0, last_idx)
	if last_idx != -1:
	old_package = method[:last_idx]
	outer_class, name = lambda_normalizer.Normalize(old_package, name)

	source_path = source_map.get(outer_class, '')
	# Create a special meta-directory for outlined lambdas to easily monitor their
	# total size and spot regressions.
	if name.startswith(_OUTLINED_PREFIX):
	object_path = posixpath.join(models.APK_PREFIX_PATH, 'Outlined',
	*old_package.split('.'))
	else:
	object_path = posixpath.join(models.APK_PREFIX_PATH,
	*old_package.split('.'))
	if name.endswith(')'):
	section_name = models.SECTION_DEX_METHOD
	else:
	section_name = models.SECTION_DEX

	return models.Symbol(section_name,
	size,
	full_name=name,
	object_path=object_path,
	source_path=source_path)


	def CreateDexSymbols(apk_path, mapping_path, size_info_prefix):
	source_map = _ParseJarInfoFile(size_info_prefix + '.jar.info')

	nodes = _RunApkAnalyzer(apk_path, mapping_path)
	nodes = UndoHierarchicalSizing(nodes)

	dex_expected_size = _ExpectedDexTotalSize(apk_path)
	total_node_size = sum([x[2] for x in nodes])
	# TODO(agrieve): Figure out why this log is triggering for
	# ChromeModernPublic.apk (https://crbug.com/851535).
	# Reporting: dex_expected_size=6546088 total_node_size=6559549
	if dex_expected_size < total_node_size:
	logging.error(
	'Node size too large, check for node processing errors. '
	'dex_expected_size=%d total_node_size=%d', dex_expected_size,
	total_node_size)
	# Use (DEX_METHODS, DEX) buckets to speed up sorting.
	symbols = ([], [])
	lambda_normalizer = LambdaNormalizer()
	for _, name, node_size in nodes:
	symbol = CreateDexSymbol(name, node_size, source_map, lambda_normalizer)
	if symbol:
	symbols[int(symbol.section_name is models.SECTION_DEX)].append(symbol)

	symbols[0].sort(key=lambda s: s.full_name)
	symbols[1].sort(key=lambda s: s.full_name)
	symbols[0].extend(symbols[1])
	return symbols[0]