| #!/usr/bin/env vpython | 
 | # Copyright 2018 The Chromium Authors. All rights reserved. | 
 | # Use of this source code is governed by a BSD-style license that can be | 
 | # found in the LICENSE file. | 
 |  | 
 | import argparse | 
 | import json | 
 | import multiprocessing | 
 | import sys | 
 |  | 
 | from core import benchmark_utils | 
 | from core import bot_platforms | 
 | from core import retrieve_story_timing | 
 | from core import sharding_map_generator | 
 |  | 
 |  | 
 | def GetParser(): | 
 |   parser = argparse.ArgumentParser( | 
 |       description='Generate perf test sharding map.') | 
 |   subparsers = parser.add_subparsers() | 
 |  | 
 |   parser_update = subparsers.add_parser('update') | 
 |   parser_update.add_argument( | 
 |       '--regenerate-timing-data', '-r', action='store_true', | 
 |       help=('Whether to regenerate timing data for all builders in ' | 
 |             'chromium.perf'), default=False) | 
 |   parser_update.add_argument( | 
 |       '--builders', '-b', action='store', nargs='*', | 
 |       help=('The builder names to reshard. If not specified, use all ' | 
 |             'perf builders'), | 
 |       choices=bot_platforms.ALL_PLATFORM_NAMES, | 
 |       default=bot_platforms.ALL_PLATFORM_NAMES) | 
 |   parser.add_argument( | 
 |       '--debug', action='store_true', | 
 |       help=('Whether to include detailed debug info of the sharding map in the' | 
 |             'shard maps.'), default=False) | 
 |  | 
 |   parser_update.set_defaults(func=_UpdateShardsForBuilders) | 
 |  | 
 |   parser_create = subparsers.add_parser('create') | 
 |   parser_create.add_argument( | 
 |       '--benchmark', help='The benchmark that you want to create shard for', | 
 |       required=True) | 
 |   parser_create.add_argument( | 
 |       '--timing-data-source', '-t', choices=bot_platforms.ALL_PLATFORM_NAMES, | 
 |       help='The timing data that you want to use. If not set, it will assume ' | 
 |            'all stories use the same amount of time to run') | 
 |   parser_create.add_argument( | 
 |       # pinpoint typically has 16 machines for each hardware types, so we set | 
 |       # the default to use half of them to avoid starving the pool. | 
 |       '--shards-num', type=int, default=8, | 
 |       help="The number of shards you'd like to use, default is %(default)s") | 
 |   parser_create.add_argument( | 
 |       '--output-path', default='new_shard_map.json', | 
 |       help='Output file path for the shard map, default is `%(default)s`') | 
 |   parser_create.set_defaults(func=_CreateShardMapForBenchmark) | 
 |   return parser | 
 |  | 
 |  | 
 | def _GenerateBenchmarksToShardsList(benchmarks): | 
 |   """Return |benchmarks_to_shard| from given list of |benchmarks|. | 
 |  | 
 |     benchmarks_to_shard is a list all benchmarks to be sharded. Its | 
 |     structure is as follows: | 
 |     [{ | 
 |        "name": "benchmark_1", | 
 |        "stories": [ "storyA", "storyB",...], | 
 |        "repeat": <number of pageset_repeat> | 
 |       }, | 
 |       { | 
 |        "name": "benchmark_2", | 
 |        "stories": [ "storyA", "storyB",...], | 
 |        "repeat": <number of pageset_repeat> | 
 |       }, | 
 |        ... | 
 |     ] | 
 |  | 
 |     The "stories" field contains a list of ordered story names. Notes that | 
 |     this should match the actual order of how the benchmark stories are | 
 |     executed for the sharding algorithm to be effective. | 
 |   """ | 
 |   benchmarks_to_shard = [] | 
 |   for b in benchmarks: | 
 |     benchmarks_to_shard.append({ | 
 |         'name': b.Name(), | 
 |         'repeat': b().options.get('pageset_repeat', 1), | 
 |         'stories': benchmark_utils.GetBenchmarkStoryNames(b()) | 
 |     }) | 
 |   return benchmarks_to_shard | 
 |  | 
 |  | 
 | def _LoadTimingData(args): | 
 |   builder_name, timing_file_path = args | 
 |   data = retrieve_story_timing.FetchAverageStortyTimingData( | 
 |       configurations=[builder_name], num_last_days=5) | 
 |   with open(timing_file_path, 'w') as output_file: | 
 |     json.dump(data, output_file, indent=4, separators=(',', ': ')) | 
 |   print 'Finish retrieve story timing data for %s' % repr(builder_name) | 
 |  | 
 |  | 
 | def _GenerateShardMap( | 
 |     builder, num_of_shards, output_path, debug, benchmark): | 
 |   timing_data = [] | 
 |   if builder: | 
 |     with open(builder.timing_file_path) as f: | 
 |       timing_data = json.load(f) | 
 |   benchmarks_to_shard = _GenerateBenchmarksToShardsList( | 
 |       [b for b in builder.benchmarks_to_run if not benchmark or ( | 
 |           b.Name() == benchmark)]) | 
 |   sharding_map = sharding_map_generator.generate_sharding_map( | 
 |       benchmarks_to_shard, timing_data, num_shards=num_of_shards, | 
 |       debug=debug) | 
 |   with open(output_path, 'w') as output_file: | 
 |     json.dump(sharding_map, output_file, indent=4, separators=(',', ': ')) | 
 |  | 
 |  | 
 | def _UpdateShardsForBuilders(args): | 
 |   builders = {b for b in bot_platforms.ALL_PLATFORMS if b.name in args.builders} | 
 |   if args.regenerate_timing_data: | 
 |     print 'Update shards timing data. May take a while...' | 
 |     load_timing_args = [] | 
 |     for b in builders: | 
 |       load_timing_args.append((b.name, b.timing_file_path)) | 
 |     p = multiprocessing.Pool(len(load_timing_args)) | 
 |     p.map(_LoadTimingData, load_timing_args) | 
 |  | 
 |   for b in builders: | 
 |     _GenerateShardMap( | 
 |         b, b.num_shards, b.shards_map_file_path, args.debug, benchmark=None) | 
 |     print 'Updated sharding map for %s' % repr(b.name) | 
 |  | 
 |  | 
 | def _CreateShardMapForBenchmark(args): | 
 |   """Create the shard map for the given benchmark. | 
 |  | 
 |   Args: | 
 |     args(Namespace object): the namespace object for the subparser `create`. It | 
 |       will contain the attributes: | 
 |         `benchmark`: the name of the benchmark that we want the shard for | 
 |         `num_shards`: the total number of shards that we want to use | 
 |         `output_path`: the output file path for the shard map | 
 |         `builder`: the builder name, unlike the above, this is a string instead | 
 |           of a list of string like above | 
 |   """ | 
 |   builder = None | 
 |   if args.timing_data_source: | 
 |     [builder] = [b for b in bot_platforms.ALL_PLATFORMS | 
 |                  if b.name == args.timing_data_source] | 
 |   _GenerateShardMap( | 
 |       builder, args.shards_num, args.output_path, args.debug, args.benchmark) | 
 |  | 
 |  | 
 | def main(): | 
 |   parser = GetParser() | 
 |   options = parser.parse_args() | 
 |   options.func(options) | 
 |  | 
 | if __name__ == '__main__': | 
 |   sys.exit(main()) |