blob: e0f96158e76eb9ca01c906a78d7c15c129ad6240 [file] [log] [blame]
# Copyright 2020 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Utilities to process compresssed files."""
import contextlib
import logging
import os
import pathlib
import re
import shutil
import struct
import tempfile
import zipfile
class _ApkFileManager:
def __init__(self, temp_dir):
self._temp_dir = pathlib.Path(temp_dir)
self._subdir_by_apks_path = {}
self._infolist_by_path = {}
def _MapPath(self, path):
# Use numbered subdirectories for uniqueness.
# Suffix with basename(path) for readability.
default = '-'.join(
return self._temp_dir / self._subdir_by_apks_path.setdefault(path, default)
def InfoList(self, path):
"""Returns zipfile.ZipFile(path).infolist()."""
ret = self._infolist_by_path.get(path)
if ret is None:
with zipfile.ZipFile(path) as z:
ret = z.infolist()
self._infolist_by_path[path] = ret
return ret
def SplitPath(self, minimal_apks_path, split_name):
"""Returns the path to the apk split extracted by ExtractSplits.
minimal_apks_path: The .apks file that was passed to ExtractSplits().
split_name: Then name of the split.
Path to the extracted .apk file.
subdir = self._subdir_by_apks_path[minimal_apks_path]
if '-' in split_name:
name = f'{split_name}.apk'
name = f'{split_name}-master.apk'
return self._temp_dir / subdir / 'splits' / name
def ExtractSplits(self, minimal_apks_path):
"""Extracts the master splits in the given .apks file.
List of split names, with "base" always appearing first.
dest = self._MapPath(minimal_apks_path)
split_names = []
logging.debug('Extracting %s', minimal_apks_path)
with zipfile.ZipFile(minimal_apks_path) as z:
for filename in z.namelist():
# E.g.:
# splits/base-master.apk
# splits/base-hi.apk
# splits/vr-master.apk
# splits/vr-en.apk
m = re.match(r'splits/(.*)-master\.apk', filename)
if m:
z.extract(filename, dest)
# Also analyze -hi locale splits, since does this.
m = re.match(r'splits/(.*-hi)\.apk', filename)
if m:
z.extract(filename, dest)
logging.debug('Extracting %s (done)', minimal_apks_path)
# Make "base" comes first since that's the main chunk of work.
# Also so that --abi-filter detection looks at it first.
return sorted(split_names, key=lambda x: (not x.startswith('base'), x))
def ApkFileManager():
"""Context manager that extracts apk splits to a temp dir."""
# Cannot use tempfile.TemporaryDirectory() here because our use of
# multiprocessing results in __del__ methods being called in forked processes.
temp_dir = tempfile.mkdtemp(suffix='-supersize')
zip_files = _ApkFileManager(temp_dir)
yield zip_files
def UnzipToTemp(zip_path, inner_path):
"""Extract a |inner_path| from a |zip_path| file to an auto-deleted temp file.
zip_path: Path to the zip file.
inner_path: Path to the file within |zip_path| to extract.
The path of the temp created (and auto-deleted when context exits).
logging.debug('Extracting %s', inner_path)
_, suffix = os.path.splitext(inner_path)
# Can't use NamedTemporaryFile() because it deletes via __del__, which will
# trigger in both this and the fork()'ed processes.
fd, temp_file = tempfile.mkstemp(suffix=suffix)
with zipfile.ZipFile(zip_path) as z:
logging.debug('Extracting %s (done)', inner_path)
yield temp_file
def ReadZipInfoExtraFieldLength(zip_file, zip_info):
"""Reads the value of |extraLength| from |zip_info|'s local file header.
|zip_info| has an |extra| field, but it's read from the central directory.
Android's zipalign tool sets the extra field only in local file headers.
# Refer to + 28)
return struct.unpack('<H',[0]
def MeasureApkSignatureBlock(zip_file):
"""Measures the size of the v2 / v3 signing block.
Refer to:
# Seek to "end of central directory" struct.
eocd_offset_from_end = -22 - len(zip_file.comment), os.SEEK_END)
assert == b'PK\005\006', (
'failed to find end-of-central-directory')
# Read out the "start of central directory" offset. + 16, os.SEEK_END)
start_of_central_directory = struct.unpack('<I',[0]
# Compute the offset after the last zip entry.
last_info = max(zip_file.infolist(), key=lambda i: i.header_offset)
last_header_size = (30 + len(last_info.filename) +
ReadZipInfoExtraFieldLength(zip_file, last_info))
end_of_last_file = (last_info.header_offset + last_header_size +
return start_of_central_directory - end_of_last_file