# Copyright 2018 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Script to bisect a VR perf regression.
This is only meant to be used on a developer's workstation, not on bots or any
other sort of automated infrastructure. This is to help reduce the amount of
work necessary to bisect VR perf regressions until the perf dashboard supports
automatic bisects on devices outside of the Telemetry lab.
As a result, this is a bit crude and makes a number of assumptions.
import argparse
import json
import os
import shutil
import subprocess
import sys
import tempfile
import time
'chartjson': 'The standard JSON output format for Telemetry.',
'printedjson': ('The JSON output format generated from parsing stdout from '
'a test. This is the format used by tests that use '
# The revision that changed the way perf tests are run on swarming.
PERF_FORMAT_CHANGE_REVISION = '0d271f583489024e3c163f0538d1ec29097b3615'
class TempDir():
"""Context manager for temp dirs since Python 2 doesn't have one."""
def __enter__(self):
self._dirpath = tempfile.mkdtemp()
return self._dirpath
def __exit__(self, type, value, traceback):
class SwitchDirsIfNotChromiumBisect():
"""Context manager for switching between another repo and Chromium src.
No-op if the --bisect-repo option is not used, otherwise changes directories
to the specified repo's directory, then returns to the original directory when
the context is left.
def __init__(self, args):
self.starting_directory = os.getcwd()
self.target_directory = args.bisect_repo
def __enter__(self):
if self.target_directory:
def __exit__(self, type, value, traceback):
if self.target_directory:
def VerifyCwd():
"""Checks that the script is being run from the Chromium root directory.
Not robust in the slightest, but should catch common issues like running the
script from the directory that it's located in.
if os.path.basename(os.getcwd()) != 'src':
raise RuntimeError('Script must be run from the Chromium root directory')
def ParseArgsAndAssertValid():
"""Parses all the provided arguments and ensures everything is valid."""
parser = argparse.ArgumentParser();
# Arguments related to the actual bisection
parser.add_argument_group('bisect arguments')
parser.add_argument('--binary-size-bisect', action='store_true',
help='Bisect a binary size regression instead of a '
'regular perf regression.')
parser.add_argument('--good-revision', required=True,
help='A known good Git revision')
parser.add_argument('--bad-revision', required=True,
help='A known bad Git revision')
parser.add_argument('--metric', required=True,
help='The perf metric being bisected')
parser.add_argument('--story', required=True,
help='The perf story to check the affected metric in')
parser.add_argument('--good-value', type=float,
help='The value of the metric at the good revision. If '
'not defined, an extra test iteration will be run '
'to determine the value')
parser.add_argument('--bad-value', type=float,
help='The value of the metric at the bad revision. If '
'not defined, an extra test iteration will be run '
'to determine the value')
def comma_separated(arg):
split_arg = arg.split(',')
if len(split_arg) != 2:
raise argparse.ArgumentError(
'Expected two comma-separated strings but '
'received %d' % len(split_arg))
return {split_arg[0]: split_arg[1]}
parser.add_argument('--checkout-override', action='append',
type=comma_separated, default=[],
help='A comma-separated path/revision key/value pair. '
'Each git checkout at the specified path will be '
'synced to the specified revision after the normal '
'sync. For example, passing '
'third_party/android_ndk,abcdefg would cause the '
'checkout in //third_party/android_ndk to be synced '
'to revision abcdefg.')
help='A path to the repo that will be bisected instead '
'the Chromium src repo. Meant to be used for '
'bisecting rolls of DEPS (e.g V8 or Skia) after '
'an initial bisect finds that a roll is the culprit '
'CL. Using this option will disable any syncing of '
'the Chromium src repo, so ensure that you are '
'synced to the correct src revision before running '
'with this option.')
parser.add_argument('--reset-before-sync', action='store_true',
help='When set, runs "git reset --hard HEAD" before '
'syncing. This has the potential to accidentally '
'delete any uncommitted changes, but can help avoid '
'random bisect failures.')
parser.add_argument('--num-attempts-before-marking-good', type=int, default=1,
help='The number of times the test will be run before '
'a revision can be marked as good. If all runs are '
'found to be good, then the revision is good, '
'otherwise bad. Overriding this can help when '
'bisecting flaky metrics that fluctuate between '
'good/bad values, but can significantly increase '
'bisect time.')
parser.add_argument('--expected-json-result-format', default='chartjson',
help='The data format the JSON results from the test are '
'expected to be in. Supported values are: ' +
parser.add_argument('--manual-mode', action='store_true', default=False,
help='Does not automatically run gclient sync and waits '
'for user input before starting the build/run '
'process. Useful if a bisect needs to be run within '
'a range where some change breaks gclient sync and '
'a patch needs to be applied at each step before '
help='Applies the given stash entry (e.g. "stash@{0}"), '
'commits it, and syncs to the new revision during '
'each iteration. Primary use case for this is '
'fixing bad DEPS entries in the revision range.')
help='The benchmark to run when bisecting a perf test. '
'This will be automatically translated to the '
'correct format and passed to the test, as the '
'format is different depending on when the '
'regression occured.')
parser.add_argument_group('swarming arguments')
help='The swarming server to trigger the test on')
help='The isolate server to upload the test to')
parser.add_argument('--dimension', action='append', type=comma_separated,
default=[], dest='dimensions',
help='A comma-separated swarming dimension key/value '
'pair. At least one must be provided.')
help='The target to isolate. Defaults to the build '
parser.add_argument_group('compile arguments')
parser.add_argument('-j', '--parallel-jobs', type=int, default=1000,
help='The number of parallel jobs ninja will compile '
parser.add_argument('-l', '--load-limit', type=int, default=70,
help='Don\'t start new ninja jobs if the average CPU '
'is above this')
default=os.path.join('out', 'Release'),
help='The directory that builds will take place in. '
'Assumes that gn args have already been generated '
'for the provided directory. Must be relative to '
'the Chromium src/ directory, e.g. out/Release. '
'When using --binary-size-bisect, this should be '
'the output directory for the base build.')
default=os.path.join('out', 'Release_diff'),
help='Only used when --binary-size-bisect is set. The '
'same as --build-output-dir, but for the diff '
parser.add_argument('--regenerate-args-after-sync', action='store_true',
help='Causes the build output directory to be deleted '
'and re-created using the same gn args after each '
'sync. Normally not necessary, but can work around '
'weird issues like the build target not being '
'available unless the directory is re-created.')
parser.add_argument('--build-target', required=True,
help='The target to build for testing')
help='Only used when --binary-size-bisect is set. The '
'name of the APK that will be diffed.')
args, unknown_args = parser.parse_known_args()
if args.binary_size_bisect:
if not args.apk_name:
raise RuntimeError(
'--apk-name must be set when using --binary-size-bisect.')
if not args.diff_build_output_dir:
raise RuntimeError('--diff-build-output-dir must be set when using '
# Set defaults.
if not args.isolate_target:
args.isolate_target = args.build_target
# Make sure we have at least one swarming dimension.
if len(args.dimensions) == 0:
raise RuntimeError('No swarming dimensions provided')
if not args.benchmark:
raise RuntimeError('No benchmark provided with --benchmark')
# Make sure we have all the information we need in order to run on Swarming.
if not (args.swarming_server and args.isolate_server):
raise RuntimeError('--swarming-server and --isolate-server must be set '
'when running a non-binary-size bisection.')
# Make sure we're set to run at least one attempt per revision
if args.num_attempts_before_marking_good < 1:
raise RuntimeError(
'--num-attempts-before-marking-good set to invalid value %d' %
# Make sure the provided data format is supported.
if args.expected_json_result_format not in SUPPORTED_JSON_RESULT_FORMATS:
raise RuntimeError(
'--expected-json-result-format set to invalid value %s' %
# Determining initial values is not currently supported if we're bisecting a
# roll. Since bisecting a roll is almost always a product of a normal bisect
# pointing to a roll anyways, the user should have the good/bad values
# already.
if args.bisect_repo and (args.good_value is None or args.bad_value is None):
raise RuntimeError(
'--bisect-repo requires good and bad values to be set.')
return (args, unknown_args)
def VerifyInput(args, unknown_args):
"""Verifies with the user that the provided args are satisfactory.
args: The known args parsed by the argument parser
unknown_args: The unknown args parsed by the argument parser
print '======'
if args.manual_mode:
print ('Script is running in manual mode - you must manually run gclient '
'sync on each revision')
if args.bisect_repo:
print ('Script is set to bisect %s instead of Chromium src. gclient sync '
'will not be run, so ensure you are synced to the correct revision '
'and any patches, etc. you need are applied before running.' %
if args.binary_size_bisect:
print 'Script is running in binary size bisect mode.'
print 'This will start a bisect for a for:'
print 'Metric: %s' % args.metric
print 'Story: %s' % args.story
if args.benchmark:
print 'In the benchmark %s' % args.benchmark
if args.good_value == None and args.bad_value == None:
print 'The good and bad values at %s and %s will be determined' % (
args.good_revision, args.bad_revision)
elif args.good_value == None:
print ('The good value at %s will be determined, and the bad value of %f '
'at %s will be used' % (args.good_revision, args.bad_value,
elif args.bad_value == None:
print ('The good value of %f at %s will be used, and the bad value at %s '
'will be determined' % (args.good_value, args.good_revision,
print 'Changing from %f at %s to %f at %s' % (args.good_value,
args.good_revision, args.bad_value, args.bad_revision)
if args.checkout_overrides:
for pair in args.checkout_overrides:
for key, val in pair.iteritems():
print '%s will be synced to revision %s' % (key, val)
if args.apply_stash_before_sync:
print 'The stash entry %s will be applied before each sync' % (
if args.num_attempts_before_marking_good > 1:
print ('Each revision must be found to be good %d times before actually '
'being marked as good' % args.num_attempts_before_marking_good)
print 'The data format that will be expected is %s: %s' % (
print '======'
if args.binary_size_bisect:
print '%s will be built in both %s and %s' % (args.build_target,
print 'The test target %s will be built to %s' % (args.build_target,
print '%d parallel jobs will be used with a load limit of %d' % (
args.parallel_jobs, args.load_limit)
if args.regenerate_args_after_sync:
print 'The build output directory will be recreated after each sync'
print '======'
if not args.binary_size_bisect:
print 'The target %s will be isolated and uploaded to %s' % (
args.isolate_target, args.isolate_server)
print 'The test will be triggered on %s with the following dimensions:' % (
for pair in args.dimensions:
for key, val in pair.iteritems():
print '%s = %s' % (key, val)
print '======'
print 'The test will be run with these additional arguments:'
for extra_arg in unknown_args:
print extra_arg
print '======'
if args.reset_before_sync:
print '**WARNING** This will run git reset --hard HEAD'
print 'If you have any uncommitted changes, commit or stash them beforehand'
if raw_input('Are these settings correct? y/N').lower() != 'y':
print 'Aborting'
def SetupBisect(args):
"""Does all the one-time setup for a bisect.
args: The parsed args from argparse
The first revision to sync to
with SwitchDirsIfNotChromiumBisect(args):
subprocess.check_output(['git', 'bisect', 'start'])
subprocess.check_output(['git', 'bisect', 'good', args.good_revision])
output = subprocess.check_output(
['git', 'bisect', 'bad', args.bad_revision])
print output
# Get the revision, which is between []
revision = output.split('[', 1)[1].split(']', 1)[0]
return revision
def RunTestOnSwarming(args, unknown_args, output_dir, use_new_perf_format):
"""Isolates the test target and runs it on swarming to get perf results.
args: The known args parsed by the argument parser
unknown_args: The unknown args parsed by the argument parser
output_dir: The directory to save swarming results to
use_new_perf_format: Whether to use the new perf format that came with
revision 0d271f583489024e3c163f0538d1ec29097b3615 or not.
print "=== Isolating and running target %s ===" % args.isolate_target
print 'Isolating'
subprocess.check_output(['python', os.path.join('tools', 'mb', ''),
'//%s' % args.build_output_dir, args.isolate_target])
print 'Uploading'
output = subprocess.check_output([
'python', os.path.join('tools', 'swarming_client', ''),
'batcharchive', '--isolate-server', args.isolate_server,
'%s.isolated.gen.json' % args.isolate_target)])
isolate_hash = output.split(' ')[0]
swarming_args = [
'python', os.path.join('tools', 'swarming_client', ''), 'run',
'--isolated', isolate_hash,
'--isolate-server', args.isolate_server,
'--swarming', args.swarming_server,
'--task-output-dir', output_dir,
for pair in args.dimensions:
for key, val in pair.iteritems():
swarming_args.extend(['--dimension', key, val])
# Temporary workaround for We could get the same
# effect by isolating/uploading/running using " run -s", but that has
# the issue of apparently not having a way to specify a task output directory.
# So instead, manually append the additional arguments that running that way
# would do for us to work around the vpython issues until they're fixed.
# TODO( Remove this when possible.
cipd_packages = [
for package in cipd_packages:
swarming_args.extend(['--cipd-package', package])
'swarming_module_cache_vpython', '.swarming_module_cache/vpython',
'--env-prefix', 'PATH', '.swarming_module',
'--env-prefix', 'PATH', '.swarming_module/bin',
'--env-prefix', 'VPYTHON_VIRTUALENV_ROOT', '.swarming_module_cache/vpython',
# Determine how we're supposed to pass the benchmark to run to the test
if use_new_perf_format:
# Telemetry tests in the new format use --benchmarks, non-Telemetry use
# --gtest-benchmark-name. Non-Telemetry tests need to have --non-telemetry
# passed to them in order to work, so check for the presence of that flag.
is_non_telemetry = False
for arg in unknown_args:
if '--non-telemetry' in arg:
is_non_telemetry = True
if is_non_telemetry:
swarming_args.append('--gtest-benchmark-name=%s' % args.benchmark)
swarming_args.append('--benchmarks=%s' % args.benchmark)
# The old perf format simply passed the benchmark to run as the first
# positional argument
'--output-format', 'chartjson',
'--chromium-output-directory', args.build_output_dir])
print 'Running test %s' % (
'(new format)' if use_new_perf_format else '(old format)')
def RunBinarySizeDiff(args, output_dir):
"""Locally runs a binary size diff on the two APKs specified by args.
args: The known args parsed from the argument parser
output_dir: The directory to save diff results to
print 'Diffing APKs'
['python', os.path.join('build', 'android', ''),
'--base-apk', os.path.join(args.build_output_dir, 'apks', args.apk_name),
'--chromium-output-directory-base', args.build_output_dir,
os.path.join(args.diff_build_output_dir, 'apks', args.apk_name),
'--chromium-output-directory-diff', args.diff_build_output_dir,
'--output-dir', output_dir])
def GetSwarmingResult(args, output_dir, use_new_perf_format):
"""Extracts the value for the story/metric combo of interest from swarming.
args: The known args parsed from the argument parser
output_dir: The directory where swarming results have been saved to
use_new_perf_format: Whether to use the new perf format that came with
revision 0d271f583489024e3c163f0538d1ec29097b3615 or not.
The value for the story/metric combo that the last swarming run produced
outfile = os.path.join(output_dir, '0', 'perftest-output.json')
if use_new_perf_format:
outfile = os.path.join(output_dir, '0', args.benchmark, 'perf_results.json')
return _GetResultsFromJson(args, outfile)
def GetBinarySizeResult(args, output_dir):
"""Extracts the value for the story/metric combo of interest locally.
args: The known args parsed from the argument parser
output_dir: The directory where local results have been saved to
The value for the story/metric combo that the last binary size diff produced
return _GetResultsFromJson(args,
os.path.join(output_dir, 'results-chart.json'))
def _GetResultsFromJson(args, filepath):
with open(filepath, 'r') as infile:
perf_results = json.load(infile)
all_results = []
if args.expected_json_result_format == 'chartjson':
# Perf tests use a 'values' array, while binary size uses a single 'value'
# field. So, check for both.
story = perf_results.get(unicode('charts'), {}).get(
unicode(args.metric), {}).get(unicode(args.story), {})
all_results = story.get(unicode('values'), [])
if 'value' in story:
all_results = [story[unicode('value')]]
elif args.expected_json_result_format == 'printedjson':
all_results = perf_results.get(args.metric, {}).get('traces', {}).get(
args.story, [])
if len(all_results) == 0:
raise RuntimeError('Got no results for the story/metric combo. '
'Is there a typo in one of them?')
result = all_results[0]
print 'Got result %s' % str(result)
return float(result)
def RunBisectStep(args, unknown_args, revision, output_dir):
"""Runs a bisect step for a revision.
This will run recursively until the culprit CL is found.
args: The known args parsed from the argument parser
unknown_args: The unknown args parsed from the argument parser
revision: The git revision to sync to and test
output_dir: The directory to save swarming results to
revision_good = True
for attempt in xrange(1, args.num_attempts_before_marking_good + 1):
# Only bother syncing and building if this is our first attempt on this
# revision.
result = GetValueAtRevision(args, unknown_args, revision, output_dir,
sync=(attempt == 1))
# Regression was an increased value.
if args.bad_value > args.good_value:
# If we're greater than the provided bad value or between good and bad,
# but closer to bad, we're still bad.
if (result > args.bad_value or
abs(args.bad_value - result) < abs(args.good_value - result)):
print '=== Attempt %d found revision that is BAD ===' % attempt
revision_good = False
print '=== Attempt %d found that revision is GOOD ===' % attempt
# Regression was a decreased value.
# If we're smaller than the provided bad value or between good and bad,
# but closer to bad, we're still bad.
if (result < args.bad_value or
abs(args.bad_value - result) < abs(args.good_value - result)):
print '=== Attempt %d found that revision is BAD ===' % attempt
revision_good = False
print '=== Attempt %d found that revision is GOOD ===' % attempt
output = ""
with SwitchDirsIfNotChromiumBisect(args):
if revision_good:
print '=== Current revision is GOOD ==='
output = subprocess.check_output(['git', 'bisect', 'good', revision])
print '=== Current revision is BAD ==='
output = subprocess.check_output(['git', 'bisect', 'bad', revision])
print output
if output.startswith('Bisecting:'):
RunBisectStep(args, unknown_args, output.split('[', 1)[1].split(']', 1)[0],
def BuildTarget(args):
print 'Building'
subprocess.check_output(['ninja', '-C', args.build_output_dir,
'-j', str(args.parallel_jobs),
'-l', str(args.load_limit), args.build_target])
if args.binary_size_bisect:
subprocess.check_output(['ninja', '-C', args.diff_build_output_dir,
'-j', str(args.parallel_jobs),
'-l', str(args.load_limit), args.build_target])
def RegenerateGnArgs(args):
"""Recreates the build output directory using existing GN args."""
directories = [args.build_output_dir]
if args.binary_size_bisect:
for d in directories:
with open(os.path.join(d, ''), 'r') as args_file:
gn_args =
with open(os.path.join(d, ''), 'w') as args_file:
subprocess.check_output(['gn', 'gen', d])
def SyncAndBuild(args, unknown_args, revision):
"""Syncs to the given revision and builds the test target.
args: The known args parsed by the argument parser
unknown_args: The unknown args parsed by the argument parser
revision: The revision to sync to and build
if args.manual_mode:
print ('=== Waiting on user input to start build/run process for %s ===' %
raw_input('Press any key to continue')
print '=== Building ==='
print '=== Syncing to revision %s and building ===' % revision
# Sometimes random files show up as unstaged changes (???), so make sure
# that isn't the case before we try to run gclient sync
if args.reset_before_sync:
subprocess.check_output(['git', 'reset', '--hard', 'HEAD'])
sync_revision = revision
if args.apply_stash_before_sync:
print 'Applying stash entry %s' % args.apply_stash_before_sync
['git', 'stash', 'apply', args.apply_stash_before_sync])
subprocess.check_output(['git', 'add', '-u'])
subprocess.check_output(['git', 'commit', '-m', 'Apply stash.'])
sync_revision = 'HEAD'
print 'Syncing'
output = subprocess.check_output(['gclient', 'sync', '-r',
'src@%s' % sync_revision])
if ('error: Your local changes to the following files would be overwritten '
'by checkout:' in output):
raise RuntimeError('Could not run gclient sync due to uncommitted '
'changes. If these changes are actually yours, please commit or '
'stash them. If they are not, remove them and try again. If the '
'issue persists, try running with --reset-before-sync')
# Ensure that the VR assets are synced to the current revision since it isn't
# guaranteed that gclient will handle it properly
# TODO( Remove this once asset downloading is more
# robust in gclient.
'python', 'third_party/depot_tools/',
'--bucket', 'chrome-vr-assets',
'--directory', 'chrome/browser/resources/vr/assets/google_chrome'])
# Checkout any specified revisions.
cwd = os.getcwd()
for override in args.checkout_overrides:
for repo, rev in override.iteritems():
subprocess.check_output(['git', 'checkout', rev])
if args.regenerate_args_after_sync:
def BisectRegression(args, unknown_args):
"""Runs all steps necessary to bisect a perf regression.
Intermediate steps and the culprit CL will be printed to stdout.
args: The known args parsed by the argument parser
unknown_args: The unknown args parsed by the argument parser
with TempDir() as output_dir:
# Ensure that we also sync any APKs we use
os.environ['DOWNLOAD_VR_TEST_APKS'] = '1'
if args.good_value == None:
# Once we've run "git bisect start" and set the good/bad revisions,
# we'll be in a detached head state before we sync. However, the git
# bisect has to start after this point, so we can't use that behavior
# here. So, manually sync to the revision to get into a detached state.
subprocess.check_output(['git', 'checkout', args.good_revision])
args.good_value = GetValueAtRevision(args, unknown_args,
args.good_revision, output_dir)
print '=== Got initial good value of %f ===' % args.good_value
if args.bad_value == None:
subprocess.check_output(['git', 'checkout', args.bad_revision])
args.bad_value = GetValueAtRevision(args, unknown_args,
args.bad_revision, output_dir)
print '=== Got initial bad value of %f ===' % args.bad_value
revision = SetupBisect(args)
RunBisectStep(args, unknown_args, revision, output_dir)
with SwitchDirsIfNotChromiumBisect(args):
subprocess.check_output(['git', 'bisect', 'reset'])
def GetValueAtRevision(args, unknown_args, revision, output_dir, sync=True):
"""Builds and runs the test at a particular revision.
args: The known args parsed by the argument parser
unknown_args: The unknown args parsed by the argument parser
revision: The revision to sync to and build
output_dir: The directory to store swarming results to
The value of the story/metric combo at the given revision
# In the case where we're bisecting a repo other than Chromium src,
# "git bisect"'s automatic checkouts will be enough to ensure we're at the
# correct revision, so we can just build immediately.
if args.bisect_repo:
print '=== Building with %s at revision %s ===' % (
args.bisect_repo, revision)
elif sync:
SyncAndBuild(args, unknown_args, revision)
if args.binary_size_bisect:
RunBinarySizeDiff(args, output_dir)
return GetBinarySizeResult(args, output_dir)
use_new_perf_format = _IsCurrentRevisionAfterFormatChange(revision)
RunTestOnSwarming(args, unknown_args, output_dir, use_new_perf_format)
return GetSwarmingResult(args, output_dir, use_new_perf_format)
def _IsCurrentRevisionAfterFormatChange(revision):
"""Determines whether we need to use the new or old perf format.
With commit 0d271f583489024e3c163f0538d1ec29097b3615, the VR perf tests moved
to use the newer script. This changed the output
file name and the way which benchmark to run is specified, so we need to
know where we are in relation to that commit in order to know how to run
revision: The currently synced revision
True if the current revision comes after the commit that changed how perf
tests are run, False otherwise
return True
# Check how many revisions after the format change revision we are - if it's
# non-zero, we're at some point after the format got changed, otherwise we're
# before.
num_revisions_after = subprocess.check_output(
'%s..%s' % (PERF_FORMAT_CHANGE_REVISION, revision),
return int(num_revisions_after) > 0
def main():
args, unknown_args = ParseArgsAndAssertValid()
VerifyInput(args, unknown_args)
BisectRegression(args, unknown_args)
if __name__ == '__main__':