blob: f0c66d49e20794b76ae940c14fb8e0832670ebe0 [file] [log] [blame]
#!/usr/bin/env python
# Copyright 2018 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Generate report files to view and/or compare (diff) milestones.
Size files are located in a Google Cloud Storage bucket for various Chrome
versions. This script generates various HTML report files to view a single
milesone, or to compare two milestones with the same CPU and APK.
Desired CPUs, APKs, and milestone versions are set in constants below. If
specified by the --skip-existing flag, the script checks what HTML report files
have already been uploaded to the GCS bucket, then works on generating the
remaining desired files.
Size files are fetched by streaming them from the source bucket, then the
html_report module handles creating a report file to diff two size files.
Reports are saved to a local directory, and once all reports are created they
can be uploaded to the destination bucket.
Reports can be uploaded automatically with the --sync flag. Otherwise, they can
be uploaded at a later point.
import argparse
import collections
import contextlib
import errno
import itertools
import json
import logging
import multiprocessing
import os
import re
import shutil
import sys
import subprocess
import tempfile
_PUSH_URL = 'gs://chrome-supersize/milestones/'
_DESIRED_CPUS = ['arm', 'arm_64']
# Measure Chrome.apk since it's not a bundle.
_DESIRED_APKS = ['Monochrome.apk', 'Chrome.apk', 'AndroidWebview.apk']
# Versions are manually gathered from
'74.0.3729.11', # Beta
def _VersionTuple(version):
return tuple(int(x) for x in version.split('.'))
def _IsBundle(apk, version):
return apk == 'Monochrome.apk' and _VersionTuple(version) >= (73,)
def _EnumerateReports():
for cpu, apk in itertools.product(_DESIRED_CPUS, _DESIRED_APKS):
# KitKat doesn't support arm64.
if cpu == 'arm_64' and apk == 'Chrome.apk':
# Webview .size files do not exist before M71.
if apk == 'AndroidWebview.apk':
versions = [v for v in versions if _VersionTuple(v) >= (71,)]
for after_version in versions:
yield Report(cpu, apk, None, after_version)
for i, before_version in enumerate(versions):
for after_version in versions[i + 1:]:
yield Report(cpu, apk, before_version, after_version)
def _TemplateToRegex(template):
# Transform '{cpu}/{apk}/... -> (?P<cpu>[^/]+)/(?P<apk>[^/]+)/...
pattern = re.sub(r'{(.*?)}', r'(?P<\1>[^/]+)', template)
return re.compile(pattern)
class Report(
collections.namedtuple('Report', 'cpu,apk,before_version,after_version')):
_NDJSON_TEMPLATE_VIEW = '{cpu}/{apk}/report_{after_version}.ndjson'
def FromUrl(cls, url):
# Perform this match first since it's more restrictive.
match = cls._PUSH_URL_REGEX_COMPARE.match(url)
if match:
return cls(**match.groupdict())
match = cls._PUSH_URL_REGEX_VIEW.match(url)
if match:
return cls(before_version=None, **match.groupdict())
return None
def _CreateSizeSubpath(self, version):
ret = '{version}/{cpu}/{apk}.size'.format(version=version, **self._asdict())
if _IsBundle(self.apk, version):
ret = ret.replace('.apk', '.minimal.apks')
return ret
def before_size_file_subpath(self):
if self.before_version:
return self._CreateSizeSubpath(self.before_version)
return None
def after_size_file_subpath(self):
return self._CreateSizeSubpath(self.after_version)
def ndjson_subpath(self):
if self.before_version:
return self._NDJSON_TEMPLATE_COMPARE.format(**self._asdict())
return self._NDJSON_TEMPLATE_VIEW.format(**self._asdict())
def _MakeDirectory(path):
# Function is safe even from racing fork()ed processes.
except OSError as e:
if e.errno != errno.EEXIST:
def _Shard(func, arg_tuples):
pool = multiprocessing.Pool()
for x in pool.imap_unordered(func, arg_tuples):
yield x
def _DownloadOneSizeFile(arg_tuples):
subpath, temp_dir, base_url = arg_tuples
src = '{}/{}'.format(base_url, subpath)
dest = os.path.join(temp_dir, subpath)
subprocess.check_call(['', '-q', 'cp', src, dest])
def _DownloadSizeFiles(base_url, reports):
temp_dir = tempfile.mkdtemp()
subpaths = set(x.after_size_file_subpath for x in reports)
for x in reports
if x.before_size_file_subpath)
logging.warning('Downloading %d .size files', len(subpaths))
arg_tuples = ((p, temp_dir, base_url) for p in subpaths)
for _ in _Shard(_DownloadOneSizeFile, arg_tuples):
yield temp_dir
def _FetchExistingMilestoneReports():
milestones = subprocess.check_output(
['', 'ls', '-R', _PUSH_URL + '*'])
for path in milestones.splitlines()[1:]:
report = Report.FromUrl(path)
if report:
yield report
def _WriteMilestonesJson(path):
with open(path, 'w') as out_file:
# TODO(agrieve): Record the full list of reports rather than three arrays
# so that the UI can prevent selecting non-existent entries.
pushed_reports_obj = {
'pushed': {
json.dump(pushed_reports_obj, out_file, sort_keys=True, indent=2)
def _BuildOneReport(arg_tuples):
report, output_directory, size_file_directory = arg_tuples
ndjson_path = os.path.join(output_directory, report.ndjson_subpath)
script = os.path.join(os.path.dirname(__file__), 'supersize')
after_size_file = os.path.join(size_file_directory,
args = [script, 'html_report', after_size_file, ndjson_path]
if report.before_version:
before_size_file = os.path.join(size_file_directory,
args += ['--diff-with', before_size_file]
subprocess.check_output(args, stderr=subprocess.STDOUT)
def _CreateReportObjects(skip_existing):
desired_reports = set(_EnumerateReports())
logging.warning('Querying storage bucket for existing reports.')
existing_reports = set(_FetchExistingMilestoneReports())
missing_reports = desired_reports - existing_reports
stale_reports = existing_reports - desired_reports
if stale_reports:
# Stale reports happen when we remove a version
# (e.g. update a beta to a stable).
# It's probably best to leave them in case people have linked to them.
logging.warning('Number of stale reports: %d', len(stale_reports))
if skip_existing:
return sorted(missing_reports)
return sorted(desired_reports)
def main():
parser = argparse.ArgumentParser(description=__doc__)
'directory', help='Directory to save report files to (must not exist).')
help='GCS bucket to find size files in. (e.g. "gs://bucket/subdir")')
help='Sync data files to GCS (otherwise just prints out command to run).')
'--skip-existing', action='store_true', help='Skip existing reports.')
args = parser.parse_args()
# Anything lower than WARNING gives screens full of supersize logs.
format='%(levelname).1s %(relativeCreated)6d %(message)s')
size_file_bucket = args.size_file_bucket.rstrip('/')
if not size_file_bucket.startswith('gs://'):
parser.error('Size file bucket must start with gs://')
if os.listdir(
parser.error('Directory must be empty')
reports_to_make = _CreateReportObjects(args.skip_existing)
if not reports_to_make:
logging.warning('No reports need to be created (due to --skip-existing).')
with _DownloadSizeFiles(args.size_file_bucket, reports_to_make) as sizes_dir:
logging.warning('Generating %d reports.', len(reports_to_make))
arg_tuples = ((r,, sizes_dir) for r in reports_to_make)
for i, _ in enumerate(_Shard(_BuildOneReport, arg_tuples)):
sys.stdout.write('\rGenerated {} of {}'.format(i + 1,
_WriteMilestonesJson(os.path.join(, 'milestones.json'))
logging.warning('Reports saved to %s',
cmd = [
'', '-m', 'rsync', '-J', '-a', 'public-read', '-r',, _PUSH_URL,
if args.sync:
print 'Sync files by running:'
print ' ', ' '.join(cmd)
if __name__ == '__main__':