tools/perf/generate_perf_sharding - chromium/src.git - Git at Google

 #!/usr/bin/env vpython
 # Copyright 2018 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 import argparse
 import json
 import multiprocessing
 import sys

 from core import benchmark_utils
 from core import bot_platforms
 from core import retrieve_story_timing
 from core import sharding_map_generator


 def GetParser():
   parser = argparse.ArgumentParser(
       description='Generate perf test sharding map.')
   subparsers = parser.add_subparsers()

   parser_update = subparsers.add_parser('update')
   parser_update.add_argument(
       '--regenerate-timing-data', '-r', action='store_true',
       help=('Whether to regenerate timing data for all builders in '
             'chromium.perf'), default=False)
   parser_update.add_argument(
       '--builders', '-b', action='store', nargs='*',
       help=('The builder names to reshard. If not specified, use all '
             'perf builders'),
       choices=bot_platforms.ALL_PLATFORM_NAMES,
       default=bot_platforms.ALL_PLATFORM_NAMES)
   parser.add_argument(
       '--debug', action='store_true',
       help=('Whether to include detailed debug info of the sharding map in the'
             'shard maps.'), default=False)

   parser_update.set_defaults(func=_UpdateShardsForBuilders)

   parser_create = subparsers.add_parser('create')
   parser_create.add_argument(
       '--benchmark', help='The benchmark that you want to create shard for',
       required=True)
   parser_create.add_argument(
       '--timing-data-source', '-t', choices=bot_platforms.ALL_PLATFORM_NAMES,
       help='The timing data that you want to use. If not set, it will assume '
            'all stories use the same amount of time to run')
   parser_create.add_argument(
       # pinpoint typically has 16 machines for each hardware types, so we set
       # the default to use half of them to avoid starving the pool.
       '--shards-num', type=int, default=8,
       help="The number of shards you'd like to use, default is %(default)s")
   parser_create.add_argument(
       '--output-path', default='new_shard_map.json',
       help='Output file path for the shard map, default is `%(default)s`')
   parser_create.set_defaults(func=_CreateShardMapForBenchmark)
   return parser


 def _GenerateBenchmarksToShardsList(benchmarks):
   """Return |benchmarks_to_shard| from given list of |benchmarks|.

     benchmarks_to_shard is a list all benchmarks to be sharded. Its
     structure is as follows:
     [{
        "name": "benchmark_1",
        "stories": [ "storyA", "storyB",...],
        "repeat": <number of pageset_repeat>
       },
       {
        "name": "benchmark_2",
        "stories": [ "storyA", "storyB",...],
        "repeat": <number of pageset_repeat>
       },
        ...
     ]

     The "stories" field contains a list of ordered story names. Notes that
     this should match the actual order of how the benchmark stories are
     executed for the sharding algorithm to be effective.
   """
   benchmarks_to_shard = []
   for b in benchmarks:
     benchmarks_to_shard.append({
         'name': b.Name(),
         'repeat': b().options.get('pageset_repeat', 1),
         'stories': benchmark_utils.GetBenchmarkStoryNames(b())
     })
   return benchmarks_to_shard


 def _LoadTimingData(args):
   builder_name, timing_file_path = args
   data = retrieve_story_timing.FetchAverageStortyTimingData(
       configurations=[builder_name], num_last_days=5)
   with open(timing_file_path, 'w') as output_file:
     json.dump(data, output_file, indent=4, separators=(',', ': '))
   print 'Finish retrieve story timing data for %s' % repr(builder_name)


 def _GenerateShardMap(
     builder, num_of_shards, output_path, debug, benchmark):
   timing_data = []
   if builder:
     with open(builder.timing_file_path) as f:
       timing_data = json.load(f)
   benchmarks_to_shard = _GenerateBenchmarksToShardsList(
       [b for b in builder.benchmarks_to_run if not benchmark or (
           b.Name() == benchmark)])
   sharding_map = sharding_map_generator.generate_sharding_map(
       benchmarks_to_shard, timing_data, num_shards=num_of_shards,
       debug=debug)
   with open(output_path, 'w') as output_file:
     json.dump(sharding_map, output_file, indent=4, separators=(',', ': '))


 def _UpdateShardsForBuilders(args):
   builders = {b for b in bot_platforms.ALL_PLATFORMS if b.name in args.builders}
   if args.regenerate_timing_data:
     print 'Update shards timing data. May take a while...'
     load_timing_args = []
     for b in builders:
       load_timing_args.append((b.name, b.timing_file_path))
     p = multiprocessing.Pool(len(load_timing_args))
     p.map(_LoadTimingData, load_timing_args)

   for b in builders:
     _GenerateShardMap(
         b, b.num_shards, b.shards_map_file_path, args.debug, benchmark=None)
     print 'Updated sharding map for %s' % repr(b.name)


 def _CreateShardMapForBenchmark(args):
   """Create the shard map for the given benchmark.

   Args:
     args(Namespace object): the namespace object for the subparser `create`. It
       will contain the attributes:
         `benchmark`: the name of the benchmark that we want the shard for
         `num_shards`: the total number of shards that we want to use
         `output_path`: the output file path for the shard map
         `builder`: the builder name, unlike the above, this is a string instead
           of a list of string like above
   """
   builder = None
   if args.timing_data_source:
     [builder] = [b for b in bot_platforms.ALL_PLATFORMS
                  if b.name == args.timing_data_source]
   _GenerateShardMap(
       builder, args.shards_num, args.output_path, args.debug, args.benchmark)


 def main():
   parser = GetParser()
   options = parser.parse_args()
   options.func(options)

 if __name__ == '__main__':
   sys.exit(main())
	#!/usr/bin/env vpython
	# Copyright 2018 The Chromium Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	import argparse
	import json
	import multiprocessing
	import sys

	from core import benchmark_utils
	from core import bot_platforms
	from core import retrieve_story_timing
	from core import sharding_map_generator


	def GetParser():
	parser = argparse.ArgumentParser(
	description='Generate perf test sharding map.')
	subparsers = parser.add_subparsers()

	parser_update = subparsers.add_parser('update')
	parser_update.add_argument(
	'--regenerate-timing-data', '-r', action='store_true',
	help=('Whether to regenerate timing data for all builders in '
	'chromium.perf'), default=False)
	parser_update.add_argument(
	'--builders', '-b', action='store', nargs='*',
	help=('The builder names to reshard. If not specified, use all '
	'perf builders'),
	choices=bot_platforms.ALL_PLATFORM_NAMES,
	default=bot_platforms.ALL_PLATFORM_NAMES)
	parser.add_argument(
	'--debug', action='store_true',
	help=('Whether to include detailed debug info of the sharding map in the'
	'shard maps.'), default=False)

	parser_update.set_defaults(func=_UpdateShardsForBuilders)

	parser_create = subparsers.add_parser('create')
	parser_create.add_argument(
	'--benchmark', help='The benchmark that you want to create shard for',
	required=True)
	parser_create.add_argument(
	'--timing-data-source', '-t', choices=bot_platforms.ALL_PLATFORM_NAMES,
	help='The timing data that you want to use. If not set, it will assume '
	'all stories use the same amount of time to run')
	parser_create.add_argument(
	# pinpoint typically has 16 machines for each hardware types, so we set
	# the default to use half of them to avoid starving the pool.
	'--shards-num', type=int, default=8,
	help="The number of shards you'd like to use, default is %(default)s")
	parser_create.add_argument(
	'--output-path', default='new_shard_map.json',
	help='Output file path for the shard map, default is `%(default)s`')
	parser_create.set_defaults(func=_CreateShardMapForBenchmark)
	return parser


	def _GenerateBenchmarksToShardsList(benchmarks):
	"""Return \|benchmarks_to_shard\| from given list of \|benchmarks\|.

	benchmarks_to_shard is a list all benchmarks to be sharded. Its
	structure is as follows:
	[{
	"name": "benchmark_1",
	"stories": [ "storyA", "storyB",...],
	"repeat": <number of pageset_repeat>
	},
	{
	"name": "benchmark_2",
	"stories": [ "storyA", "storyB",...],
	"repeat": <number of pageset_repeat>
	},
	...
	]

	The "stories" field contains a list of ordered story names. Notes that
	this should match the actual order of how the benchmark stories are
	executed for the sharding algorithm to be effective.
	"""
	benchmarks_to_shard = []
	for b in benchmarks:
	benchmarks_to_shard.append({
	'name': b.Name(),
	'repeat': b().options.get('pageset_repeat', 1),
	'stories': benchmark_utils.GetBenchmarkStoryNames(b())
	})
	return benchmarks_to_shard


	def _LoadTimingData(args):
	builder_name, timing_file_path = args
	data = retrieve_story_timing.FetchAverageStortyTimingData(
	configurations=[builder_name], num_last_days=5)
	with open(timing_file_path, 'w') as output_file:
	json.dump(data, output_file, indent=4, separators=(',', ': '))
	print 'Finish retrieve story timing data for %s' % repr(builder_name)


	def _GenerateShardMap(
	builder, num_of_shards, output_path, debug, benchmark):
	timing_data = []
	if builder:
	with open(builder.timing_file_path) as f:
	timing_data = json.load(f)
	benchmarks_to_shard = _GenerateBenchmarksToShardsList(
	[b for b in builder.benchmarks_to_run if not benchmark or (
	b.Name() == benchmark)])
	sharding_map = sharding_map_generator.generate_sharding_map(
	benchmarks_to_shard, timing_data, num_shards=num_of_shards,
	debug=debug)
	with open(output_path, 'w') as output_file:
	json.dump(sharding_map, output_file, indent=4, separators=(',', ': '))


	def _UpdateShardsForBuilders(args):
	builders = {b for b in bot_platforms.ALL_PLATFORMS if b.name in args.builders}
	if args.regenerate_timing_data:
	print 'Update shards timing data. May take a while...'
	load_timing_args = []
	for b in builders:
	load_timing_args.append((b.name, b.timing_file_path))
	p = multiprocessing.Pool(len(load_timing_args))
	p.map(_LoadTimingData, load_timing_args)

	for b in builders:
	_GenerateShardMap(
	b, b.num_shards, b.shards_map_file_path, args.debug, benchmark=None)
	print 'Updated sharding map for %s' % repr(b.name)


	def _CreateShardMapForBenchmark(args):
	"""Create the shard map for the given benchmark.

	Args:
	args(Namespace object): the namespace object for the subparser `create`. It
	will contain the attributes:
	`benchmark`: the name of the benchmark that we want the shard for
	`num_shards`: the total number of shards that we want to use
	`output_path`: the output file path for the shard map
	`builder`: the builder name, unlike the above, this is a string instead
	of a list of string like above
	"""
	builder = None
	if args.timing_data_source:
	[builder] = [b for b in bot_platforms.ALL_PLATFORMS
	if b.name == args.timing_data_source]
	_GenerateShardMap(
	builder, args.shards_num, args.output_path, args.debug, args.benchmark)


	def main():
	parser = GetParser()
	options = parser.parse_args()
	options.func(options)

	if __name__ == '__main__':
	sys.exit(main())