blob: f516e316ad311e581e0ab6284486cf54295e0270 [file] [log] [blame]
#!/usr/bin/env python
# Copyright 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""
Estimates capacity needs for all builders of a given master.
"""
import argparse
import datetime
import json
import os
import subprocess
import sys
import tempfile
import urllib
import urllib2
import numpy
BASE_DIR = os.path.dirname(
os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
def get_builds(mastername, buildername, days):
results = []
for day in days:
cursor = ''
while True:
response = urllib2.urlopen(
'https://chrome-build-extract.appspot.com/get_builds'
'?master=%s&builder=%s&day=%s&cursor=%s' % (
urllib.quote(mastername),
urllib.quote(buildername),
urllib.quote(str(day)),
urllib.quote(cursor)))
data = json.load(response)
results.extend(data['builds'])
cursor = data['cursor']
if not cursor:
break
return results
def estimate_capacity(requests):
hourly_buckets = {}
daily_buckets = {}
build_times = []
for request in requests:
build_times.append(request['build_time_s'])
hourly_bucket = request['timestamp'].strftime('%Y-%m-%d-%H')
hourly_buckets.setdefault(hourly_bucket, []).append(request['build_time_s'])
daily_bucket = request['timestamp'].strftime('%Y-%m-%d')
daily_buckets.setdefault(daily_bucket, []).append(request['build_time_s'])
def min_bots(buckets, resolution_s):
result = 0
for times in buckets.itervalues():
total_time = sum(times)
bots = total_time / resolution_s
result = max(result, bots)
return result
return {
'hourly_bots': min_bots(hourly_buckets, 3600),
'daily_bots': min_bots(daily_buckets, 3600 * 24),
'build_times_s': build_times,
}
def estimate_buildbot_capacity(builds):
requests = []
for build in builds:
changes = build['sourceStamp']['changes']
if changes:
assert len(changes) == 1
timestamp = datetime.datetime.utcfromtimestamp(changes[0]['when'])
else:
# Fallback for builds that don't have blamelist/source stamp,
# e.g. win_pgo.
timestamp = datetime.datetime.utcfromtimestamp(build['times'][0])
requests.append({
'build_time_s': build['times'][1] - build['times'][0],
'timestamp': timestamp,
})
return estimate_capacity(requests)
def get_properties(build):
return {p[0]: p[1] for p in build.get('properties', [])}
def chunks(iterable, length):
for i in range(0, len(iterable), length):
yield iterable[i:i + length]
def launch_and_collect(iterable, get_process, collect_result):
# Run processes in chunks so that we don't exceed the process limit.
for chunk in chunks(iterable, 50):
processes = []
try:
for item in chunk:
with tempfile.NamedTemporaryFile(delete=False, prefix='estimate_') as f:
process, metadata = get_process(item, f.name)
processes.append({
'process': process,
'metadata': metadata,
'file': f.name,
})
for p in processes:
rc = p['process'].wait()
if rc != 0:
raise Exception('fail')
with open(p['file']) as f:
data = json.load(f)
collect_result(data, p['metadata'])
finally:
for p in processes:
os.remove(p['file'])
def parse_datetime(date_string):
try:
return datetime.datetime.strptime(date_string, '%Y-%m-%dT%H:%M:%S.%f')
except ValueError:
return datetime.datetime.strptime(date_string, '%Y-%m-%dT%H:%M:%S')
def estimate_swarming_capacity(swarming_py, builds):
"""Estimates swarming capacity needs based on a list of builds.
Returns a dictionary, where keys are swarming pools
and values are capacity estimates for corresponding pool,
as returned by estimate_capacity function.
"""
def get_ids_and_durations():
result = []
def get_process(build, tmp_filename):
properties = get_properties(build)
# TODO(phajdan.jr): Handle case where there are more results than limit.
return subprocess.Popen([
swarming_py, 'query',
'-S', 'chromium-swarm.appspot.com',
'tasks/list?'
'tags=master:%s&tags=buildername:%s&tags=buildnumber:%s' % (
urllib.quote(properties['mastername']),
urllib.quote(properties['buildername']),
urllib.quote(str(properties['buildnumber']))),
'--json', tmp_filename]), None
def collect_result(data, metadata):
# When there are no results swarming.py doesn't include 'items'.
# Task may not have 'duration' e.g. when its state is EXPIRED.
result.extend({
'duration': item['duration'], 'id': item['task_id']}
for item in data.get('items', []) if item and item.get('duration'))
launch_and_collect(builds, get_process, collect_result)
return result
ids_and_durations = get_ids_and_durations()
def get_pools():
result = []
def get_process(id_and_durations, tmp_filename):
# TODO(phajdan.jr): Handle case where there are more results than limit.
return subprocess.Popen([
swarming_py, 'query',
'-S', 'chromium-swarm.appspot.com',
'task/%s/request' % id_and_durations['id'],
'--json', tmp_filename]), id_and_durations
def collect_result(data, id_and_durations):
dimensions = {e['key']: e['value']
for e in data['properties']['dimensions']}
result.append({
'id': id_and_durations['id'],
'total_duration': id_and_durations['duration'],
'dimensions': dimensions,
'created_ts': parse_datetime(data['created_ts']),
})
launch_and_collect(ids_and_durations, get_process, collect_result)
pools = {}
for r in result:
pools.setdefault(frozenset(r['dimensions'].iteritems()), []).append({
'build_time_s': r['total_duration'],
'timestamp': r['created_ts']})
return pools
pools = get_pools()
capacity = {pool : estimate_capacity(requests)
for pool, requests in pools.iteritems()}
return capacity
def main(argv):
parser = argparse.ArgumentParser()
parser.add_argument('master')
parser.add_argument('--days', type=int, default=14)
parser.add_argument('--exclude-by-blamelist')
parser.add_argument('--filter-by-blamelist')
parser.add_argument('--filter-by-builder')
parser.add_argument('--filter-by-patch-project')
parser.add_argument('--print-builds', action='store_true')
parser.add_argument('--swarming-py', help='Path to swarming.py')
args = parser.parse_args(argv)
with tempfile.NamedTemporaryFile() as f:
subprocess.check_call([
os.path.join(BASE_DIR, 'scripts', 'tools', 'runit.py'),
os.path.join(BASE_DIR, 'scripts', 'tools', 'dump_master_cfg.py'),
'masters/%s' % args.master,
f.name])
master_config = json.load(f)
builder_pools = []
for builder in master_config['builders']:
if args.filter_by_builder and builder['name'] != args.filter_by_builder:
continue
slave_set = set(builder['slavenames'])
builddir = builder.get('slavebuilddir', builder['name'])
for pool in builder_pools:
if pool['slave_set'] == slave_set:
pool['builders'].setdefault(builddir, []).append(builder['name'])
break
else:
builder_pools.append({
'slave_set': slave_set,
'builders': {builddir: [builder['name']]},
})
days = []
for i in range(args.days):
days.append(datetime.date.today() - datetime.timedelta(days=(i + 1)))
all_builds = []
cl_ids = set()
for index, pool in enumerate(builder_pools):
print 'Pool #%d:' % (index + 1)
pool_capacity = {
'hourly_bots': 0.0,
'daily_bots': 0.0,
'build_times_s': [],
}
# TODO(phajdan.jr): use multiprocessing pool to speed this up.
for builddir, builders in pool['builders'].iteritems():
print ' builddir "%s":' % builddir
for builder in builders:
raw_builds = get_builds(
args.master.replace('master.', ''), builder, days)
builds = []
for build in raw_builds:
properties = get_properties(build)
if (args.filter_by_patch_project and
properties.get('patch_project') != args.filter_by_patch_project):
continue
blamelist = build.get('blame', [])
if (args.exclude_by_blamelist and
args.exclude_by_blamelist in blamelist):
continue
if (args.filter_by_blamelist and
args.filter_by_blamelist not in blamelist):
continue
if not properties.get('issue') or not properties.get('patchset'):
continue
builds.append(build)
all_builds.append(build)
cl_ids.add('%s:%s' % (properties['issue'], properties['patchset']))
capacity = estimate_buildbot_capacity(builds)
for key in ('hourly_bots', 'daily_bots', 'build_times_s'):
pool_capacity[key] += capacity[key]
if capacity['build_times_s']:
avg_build_time = str(datetime.timedelta(
seconds=int(numpy.mean(capacity['build_times_s']))))
median_build_time = str(datetime.timedelta(
seconds=int(numpy.median(capacity['build_times_s']))))
else:
avg_build_time = 'n/a'
median_build_time = 'n/a'
print (' %-45s %-9s %-9s (%4s/%4s builds) '
'%5.1f %5.1f <%10.1f>' % (
builder,
avg_build_time,
median_build_time,
len(builds),
len(raw_builds),
capacity['daily_bots'],
capacity['hourly_bots'],
sum(capacity['build_times_s'])))
if args.print_builds:
def issue_patchset(build):
properties = get_properties(build)
return (properties.get('issue', -1), properties.get('patchset', -1))
for build in sorted(builds, key=issue_patchset):
properties = get_properties(build)
build_time = build['times'][1] - build['times'][0]
print ' build #%-9s %-25s %11s:%-11s %-9s' % (
build['number'],
properties.get('requester', 'n/a'),
properties.get('issue', 'n/a'),
properties.get('patchset', 'n/a'),
str(datetime.timedelta(seconds=build_time)))
print ' %-45s %s %5.1f %5.1f %5.1f <%10.1f>' % (
'total',
' ' * 40,
pool_capacity['daily_bots'],
pool_capacity['hourly_bots'],
len(pool['slave_set']),
sum(pool_capacity['build_times_s']))
if args.swarming_py:
swarming_capacity = estimate_swarming_capacity(args.swarming_py, all_builds)
if swarming_capacity:
print 'Swarming pools:'
for pool, capacity in swarming_capacity.iteritems():
formatted_pool = ', '.join(sorted(':'.join(d) for d in pool))
argv = [
args.swarming_py, 'bots',
'-S', 'chromium-swarm.appspot.com',
'--bare',
]
for d in pool:
argv.extend(['-d', d[0], d[1]])
swarming_bots = subprocess.check_output(argv)
print ' %-86s %5.1f %5.1f %5.1f <%10.1f>' % (
formatted_pool,
capacity['daily_bots'],
capacity['hourly_bots'],
len(swarming_bots.splitlines()),
sum(capacity['build_times_s']))
print 'Data generated for %d unique CL IDs (issue/patchset pairs)' % len(
cl_ids)
return 0
if __name__ == '__main__':
sys.exit(main(sys.argv[1:]))