blob: 1d3015b4c5665b64b10a8785a7c537f56fde7c74 [file] [log] [blame]
# Copyright 2022 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Functions for creating APK symbols."""
import logging
import os
import posixpath
import re
import zipfile
import archive_util
import arsc_parser
import file_format
import models
import zip_util
_RESOURCES_ARSC_FILE = 'resources.arsc'
_MAX_STRING_LEN = 30
class _ResourcePathDeobfuscator:
def __init__(self, pathmap_path):
self._pathmap = self._LoadResourcesPathmap(pathmap_path)
def _LoadResourcesPathmap(self, pathmap_path):
"""Loads the pathmap of obfuscated resource paths.
Returns: A dict mapping from obfuscated paths to original paths or an
empty dict if passed a None |pathmap_path|.
"""
if pathmap_path is None:
return {}
pathmap = {}
with open(pathmap_path, 'r') as f:
for line in f:
line = line.strip()
if line.startswith('--') or line == '':
continue
original, renamed = line.split(' -> ')
pathmap[renamed] = original
return pathmap
def MaybeRemapPath(self, path):
long_path = self._pathmap.get(path)
if long_path:
return long_path
# if processing a .minimal.apks, we are actually just processing the base
# split.
long_path = self._pathmap.get('base/{}'.format(path))
if long_path:
# The first 5 chars are 'base/', which we don't need because we are
# looking directly inside the base apk.
return long_path[5:]
return path
class _ResourceSourceMapper:
def __init__(self, size_info_prefix, path_defaults):
self._path_defaults = path_defaults or {}
if size_info_prefix:
self._res_info = self._LoadResInfo(size_info_prefix)
else:
self._res_info = dict()
self._pattern_dollar_underscore = re.compile(r'\$+(.*?)(?:__\d)+')
self._pattern_version_suffix = re.compile(r'-v\d+/')
@staticmethod
def _ParseResInfoFile(res_info_path):
with open(res_info_path, 'r') as info_file:
return dict(l.rstrip().split('\t') for l in info_file)
def _LoadResInfo(self, size_info_prefix):
apk_res_info_path = size_info_prefix + '.res.info'
res_info_without_root = self._ParseResInfoFile(apk_res_info_path)
# We package resources in the res/ folder only in the apk.
res_info = {
os.path.join('res', dest): source
for dest, source in res_info_without_root.items()
}
res_info.update(self._path_defaults)
return res_info
def FindSourceForPath(self, path):
# Sometimes android adds $ in front and __# before extension.
path = self._pattern_dollar_underscore.sub(r'\1', path)
path = archive_util.RemoveAssetSuffix(path)
ret = self._res_info.get(path)
if ret:
return ret
# Android build tools may append extra -v flags for the root dir.
path = self._pattern_version_suffix.sub('/', path)
ret = self._res_info.get(path)
if ret:
return ret
return ''
def _CreateTypeSpecSymbols(chunk, package_id, sym_source_path, names_by_id,
raw_symbols):
# Rather than report the type spec as a symbol, create a 4-byte
# symbol for each resource. While the size is not representative,
# this at least allows determining which symbols were added/removed
# when diffing.
PER_ENTRY_SIZE = 4
assert chunk.size > chunk.entry_count * PER_ENTRY_SIZE, (
f'{chunk.type_str}: size={chunk.size}, count={chunk.entry_count}')
if not names_by_id:
sym = models.Symbol(models.SECTION_ARSC,
chunk.size,
source_path=sym_source_path,
full_name=chunk.symbol_name())
raw_symbols.append(sym)
return chunk.size
num_unnamed = 0
for i in range(chunk.entry_count):
res_id = package_id << 24 | chunk.id << 16 | i
name = names_by_id.get(res_id)
if not name:
num_unnamed += 1
continue
sym = models.Symbol(models.SECTION_ARSC,
PER_ENTRY_SIZE,
source_path=sym_source_path,
full_name=name)
raw_symbols.append(sym)
# Unnamed can happen when using stable IDs, and aapt2 is forced to
# leave gaps.
if num_unnamed > 0:
sym = models.Symbol(models.SECTION_ARSC,
num_unnamed * PER_ENTRY_SIZE,
source_path=sym_source_path,
full_name='<unnamed>')
raw_symbols.append(sym)
return chunk.entry_count * PER_ENTRY_SIZE
def _CreateStringSymbols(chunk, sym_source_path, raw_symbols):
total_size = 0
for i in range(chunk.string_count):
# Do an extra initial truncation to make the ascii checks faster.
value = chunk.GetString(i)[:_MAX_STRING_LEN + 1]
if not value.isascii():
# file_format.py currently requires ascii (maybe unnecessarily...)
name = '<non-ascii>'
else:
value = value.replace('\r', '').replace('\n', '').replace('\t', '')
if not value.isprintable():
name = '<non-printable>'
elif len(value) > _MAX_STRING_LEN:
name = f'"{value[:_MAX_STRING_LEN - 3]}"...'
else:
name = f'"{value}"'
size = 4 + chunk.GetEncodedSize(i) # Include the offset uint32
sym = models.Symbol(models.SECTION_ARSC,
size,
source_path=sym_source_path,
full_name=name)
raw_symbols.append(sym)
total_size += size
return total_size
def CreateArscSymbols(apk_spec):
"""Creates symbols for resources"""
names_by_id = None
if apk_spec.rtxt_path:
names_by_id = arsc_parser.ParseRtxt(apk_spec.rtxt_path)
raw_symbols = []
metrics_by_file = {}
with zipfile.ZipFile(apk_spec.apk_path) as src_zip:
arsc_infos = [
info for info in src_zip.infolist()
if info.filename == _RESOURCES_ARSC_FILE
]
if len(arsc_infos) != 0:
assert len(arsc_infos) == 1
filename = arsc_infos[0].filename
metrics = {}
arsc_data = src_zip.read(arsc_infos[0])
arsc_file = arsc_parser.ArscFile(arsc_data)
source_path = posixpath.join(models.APK_PREFIX_PATH, filename)
overhead = len(arsc_data)
package_id = None
for inner_path, chunk in arsc_file.VisitPreOrder():
sym_source_path = (f'{source_path}/{inner_path}'
if inner_path else source_path)
if isinstance(chunk, arsc_parser.ArscResTablePackage):
package_id = chunk.id
elif isinstance(chunk, arsc_parser.ArscStringPool):
prev_count = len(raw_symbols)
overhead -= _CreateStringSymbols(
chunk, f'{sym_source_path}/{chunk.symbol_name()}', raw_symbols)
logging.info('Created %d ARSC string pool symbols for %s',
len(raw_symbols) - prev_count, chunk.symbol_name())
elif isinstance(chunk, arsc_parser.ArscResTableTypeSpec):
metrics[f'{models.METRICS_COUNT}/{chunk.type_str}'] = (
chunk.entry_count)
prev_count = len(raw_symbols)
overhead -= _CreateTypeSpecSymbols(chunk, package_id, sym_source_path,
names_by_id, raw_symbols)
logging.info('Created %d ARSC type spec symbols', len(raw_symbols) - prev_count)
elif not chunk.children: # Leaf chunk.
name = chunk.symbol_name()
overhead -= chunk.size
sym = models.Symbol(models.SECTION_ARSC,
chunk.size - chunk.placeholder,
source_path=sym_source_path,
full_name=name)
raw_symbols.append(sym)
if chunk.placeholder:
placeholder_sym = (models.Symbol(
models.SECTION_ARSC,
chunk.placeholder,
source_path=sym_source_path,
full_name=f'{name} (placeholders)'))
raw_symbols.append(placeholder_sym)
if overhead > 0:
raw_symbols.append(
models.Symbol(models.SECTION_ARSC,
overhead,
source_path=source_path,
full_name='Overhead: ARSC'))
metrics_by_file[filename] = metrics
section_ranges = {}
archive_util.ExtendSectionRange(section_ranges, models.SECTION_ARSC,
sum(s.size for s in raw_symbols))
return section_ranges, raw_symbols, metrics_by_file
def CreateMetadata(apk_spec, include_file_details, shorten_path):
"""Returns metadata for the given apk_spec."""
logging.debug('Constructing APK metadata')
apk_metadata = {}
if include_file_details:
if apk_spec.mapping_path:
apk_metadata[models.METADATA_PROGUARD_MAPPING_FILENAME] = shorten_path(
apk_spec.mapping_path)
if apk_spec.minimal_apks_path:
apk_metadata[models.METADATA_APK_FILENAME] = shorten_path(
apk_spec.minimal_apks_path)
apk_metadata[models.METADATA_APK_SPLIT_NAME] = apk_spec.split_name
else:
apk_metadata[models.METADATA_APK_FILENAME] = shorten_path(apk_spec.apk_path)
return apk_metadata
def CreateApkOtherSymbols(apk_spec):
"""Creates symbols for resources / assets within the apk.
Returns:
A tuple of (section_ranges, raw_symbols, apk_metadata, apk_metrics_by_file).
"""
logging.info('Creating symbols for other APK entries')
res_source_mapper = _ResourceSourceMapper(apk_spec.size_info_prefix,
apk_spec.path_defaults)
resource_deobfuscator = _ResourcePathDeobfuscator(
apk_spec.resources_pathmap_path)
raw_symbols = []
zip_info_total = 0
zipalign_total = 0
with zipfile.ZipFile(apk_spec.apk_path) as z:
signing_block_size = zip_util.MeasureApkSignatureBlock(z)
for zip_info in z.infolist():
zip_info_total += zip_info.compress_size
# Account for zipalign overhead that exists in local file header.
zipalign_total += zip_util.ReadZipInfoExtraFieldLength(z, zip_info)
# Account for zipalign overhead that exists in central directory header.
# Happens when python aligns entries in apkbuilder.py, but does not
# exist when using Android's zipalign. E.g. for bundle .apks files.
zipalign_total += len(zip_info.extra)
# Skip files that we explicitly analyze: .so, .dex, .pak, and .arsc.
if (zip_info.filename == _RESOURCES_ARSC_FILE
or zip_info.filename in apk_spec.ignore_apk_paths):
continue
resource_filename = resource_deobfuscator.MaybeRemapPath(
zip_info.filename)
source_path = res_source_mapper.FindSourceForPath(resource_filename)
if not source_path:
source_path = posixpath.join(models.APK_PREFIX_PATH, resource_filename)
raw_symbols.append(
models.Symbol(
models.SECTION_OTHER,
zip_info.compress_size,
source_path=source_path,
full_name=resource_filename)) # Full name must disambiguate
# Store zipalign overhead and signing block size as metadata rather than an
# "Overhead:" symbol because they fluctuate in size, and would be a source of
# noise in symbol diffs if included as symbols (http://crbug.com/1130754).
# Might be even better if we had an option in Tiger Viewer to ignore certain
# symbols, but taking this as a short-cut for now.
apk_metadata = {
models.METADATA_ZIPALIGN_OVERHEAD: zipalign_total,
models.METADATA_SIGNING_BLOCK_SIZE: signing_block_size,
}
apk_metrics_by_file = {}
apk_metrics_by_file[posixpath.basename(apk_spec.apk_path)] = {
f'{models.METRICS_SIZE}/{models.METRICS_SIZE_APK_FILE}':
os.path.getsize(apk_spec.apk_path),
}
# Overhead includes:
# * Size of all local zip headers (minus zipalign padding).
# * Size of central directory & end of central directory.
overhead_size = (os.path.getsize(apk_spec.apk_path) - zip_info_total -
zipalign_total - signing_block_size)
assert overhead_size >= 0, 'Apk overhead must be non-negative'
zip_overhead_symbol = models.Symbol(models.SECTION_OTHER,
overhead_size,
full_name='Overhead: APK file')
raw_symbols.append(zip_overhead_symbol)
section_ranges = {}
archive_util.ExtendSectionRange(section_ranges, models.SECTION_OTHER,
sum(s.size for s in raw_symbols))
file_format.SortSymbols(raw_symbols)
return section_ranges, raw_symbols, apk_metadata, apk_metrics_by_file