blob: 1a59d0e30c555db5dcf5483ecb56a716fec9001a [file] [log] [blame]
#!/usr/bin/env vpython
# Copyright 2018 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import argparse
import json
import multiprocessing
import sys
from core import benchmark_utils
from core import bot_platforms
from core import retrieve_story_timing
from core import sharding_map_generator
def GetParser():
parser = argparse.ArgumentParser(
description='Generate perf test sharding map.')
subparsers = parser.add_subparsers()
parser_update = subparsers.add_parser('update')
parser_update.add_argument(
'--regenerate-timing-data', '-r', action='store_true',
help=('Whether to regenerate timing data for all builders in '
'chromium.perf'), default=False)
parser_update.add_argument(
'--builders', '-b', action='store', nargs='*',
help=('The builder names to reshard. If not specified, use all '
'perf builders'),
choices=bot_platforms.ALL_PLATFORM_NAMES,
default=bot_platforms.ALL_PLATFORM_NAMES)
parser.add_argument(
'--debug', action='store_true',
help=('Whether to include detailed debug info of the sharding map in the'
'shard maps.'), default=False)
parser_update.set_defaults(func=_UpdateShardsForBuilders)
parser_create = subparsers.add_parser('create')
parser_create.add_argument(
'--benchmark', help='The benchmark that you want to create shard for',
required=True)
parser_create.add_argument(
'--timing-data-source', '-t', choices=bot_platforms.ALL_PLATFORM_NAMES,
help='The timing data that you want to use. If not set, it will assume '
'all stories use the same amount of time to run')
parser_create.add_argument(
# pinpoint typically has 16 machines for each hardware types, so we set
# the default to use half of them to avoid starving the pool.
'--shards-num', type=int, default=8,
help="The number of shards you'd like to use, default is %(default)s")
parser_create.add_argument(
'--output-path', default='new_shard_map.json',
help='Output file path for the shard map, default is `%(default)s`')
parser_create.set_defaults(func=_CreateShardMapForBenchmark)
return parser
def _GenerateBenchmarksToShardsList(benchmarks):
"""Return |benchmarks_to_shard| from given list of |benchmarks|.
benchmarks_to_shard is a list all benchmarks to be sharded. Its
structure is as follows:
[{
"name": "benchmark_1",
"stories": [ "storyA", "storyB",...],
"repeat": <number of pageset_repeat>
},
{
"name": "benchmark_2",
"stories": [ "storyA", "storyB",...],
"repeat": <number of pageset_repeat>
},
...
]
The "stories" field contains a list of ordered story names. Notes that
this should match the actual order of how the benchmark stories are
executed for the sharding algorithm to be effective.
"""
benchmarks_to_shard = []
for b in benchmarks:
benchmarks_to_shard.append({
'name': b.Name(),
'repeat': b().options.get('pageset_repeat', 1),
'stories': benchmark_utils.GetBenchmarkStoryNames(b())
})
return benchmarks_to_shard
def _LoadTimingData(args):
builder_name, timing_file_path = args
data = retrieve_story_timing.FetchAverageStortyTimingData(
configurations=[builder_name], num_last_days=5)
with open(timing_file_path, 'w') as output_file:
json.dump(data, output_file, indent=4, separators=(',', ': '))
print 'Finish retrieve story timing data for %s' % repr(builder_name)
def _GenerateShardMap(
builder, num_of_shards, output_path, debug, benchmark):
timing_data = []
if builder:
with open(builder.timing_file_path) as f:
timing_data = json.load(f)
benchmarks_to_shard = _GenerateBenchmarksToShardsList(
[b for b in builder.benchmarks_to_run if not benchmark or (
b.Name() == benchmark)])
sharding_map = sharding_map_generator.generate_sharding_map(
benchmarks_to_shard, timing_data, num_shards=num_of_shards,
debug=debug)
with open(output_path, 'w') as output_file:
json.dump(sharding_map, output_file, indent=4, separators=(',', ': '))
def _UpdateShardsForBuilders(args):
builders = {b for b in bot_platforms.ALL_PLATFORMS if b.name in args.builders}
if args.regenerate_timing_data:
print 'Update shards timing data. May take a while...'
load_timing_args = []
for b in builders:
load_timing_args.append((b.name, b.timing_file_path))
p = multiprocessing.Pool(len(load_timing_args))
p.map(_LoadTimingData, load_timing_args)
for b in builders:
_GenerateShardMap(
b, b.num_shards, b.shards_map_file_path, args.debug, benchmark=None)
print 'Updated sharding map for %s' % repr(b.name)
def _CreateShardMapForBenchmark(args):
"""Create the shard map for the given benchmark.
Args:
args(Namespace object): the namespace object for the subparser `create`. It
will contain the attributes:
`benchmark`: the name of the benchmark that we want the shard for
`num_shards`: the total number of shards that we want to use
`output_path`: the output file path for the shard map
`builder`: the builder name, unlike the above, this is a string instead
of a list of string like above
"""
builder = None
if args.timing_data_source:
[builder] = [b for b in bot_platforms.ALL_PLATFORMS
if b.name == args.timing_data_source]
_GenerateShardMap(
builder, args.shards_num, args.output_path, args.debug, args.benchmark)
def main():
parser = GetParser()
options = parser.parse_args()
options.func(options)
if __name__ == '__main__':
sys.exit(main())