| #!/usr/bin/env python |
| # Copyright 2014 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """Scans a list of masters and saves information in a build_db.""" |
| |
| from contextlib import closing |
| import base64 |
| import httplib2 |
| import json |
| import logging |
| import optparse |
| import os |
| import sys |
| import time |
| import urllib |
| import zlib |
| |
| from common import chromium_utils |
| from slave import build_scan_db |
| |
| SCRIPTS_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), |
| '..', '..') |
| |
| # We need master to be on the path to import auth. |
| sys.path.insert(0, os.path.join(SCRIPTS_DIR, 'master')) |
| from master import auth |
| |
| # Buildbot status enum. |
| SUCCESS, WARNINGS, FAILURE, SKIPPED, EXCEPTION, RETRY = range(6) |
| |
| MAX_ATTEMPTS = 4 |
| URL_TIMEOUT = 60 |
| |
| BUILDER_WILDCARD = '*' |
| |
| ENDPOINT_ROOT = 'https://luci-milo.appspot.com/prpc/' |
| SCOPES = ['https://www.googleapis.com/auth/userinfo.email'] |
| |
| |
| def _get_from_milo(endpoint, data, milo_creds=None, http=None): |
| headers = { |
| 'Accept': 'application/json', |
| 'Content-Type': 'application/json', |
| 'User-Agent': 'Python-httplib2/2.7 -- build_scan.py', |
| } |
| url = ENDPOINT_ROOT + endpoint |
| if not http: |
| http = httplib2.Http() |
| if milo_creds: |
| creds = auth.create_service_account_credentials(milo_creds, SCOPES) |
| http = creds.authorize(http) |
| creds.refresh(http) |
| logging.info('fetching %s with %s' % (url, data)) |
| |
| attempts = 0 |
| while True: |
| resp, content = http.request(url, 'POST', body=data, headers=headers) |
| if resp.status == 200: |
| # Remove the jsonp header. |
| return json.loads(content[4:]) |
| if attempts > MAX_ATTEMPTS: |
| msg = "Error encountered during URL Fetch: %s" % content |
| logging.error(msg) |
| raise ValueError(msg) |
| |
| attempts += 1 |
| time_to_sleep = 2 ** attempts |
| logging.info( |
| "url fetch encountered %d, sleeping for %d seconds and retrying..." % ( |
| resp.status, time_to_sleep)) |
| time.sleep(time_to_sleep) |
| |
| |
| def get_root_json(master_url, milo_creds): |
| """Pull down root JSON which contains builder and build info.""" |
| # Assumes we have something like https://build.chromium.org/p/chromium.perf |
| name = master_url.rstrip('/').split('/')[-1] |
| |
| endpoint = 'milo.Buildbot/GetCompressedMasterJSON' |
| req = { |
| 'name': name, |
| 'exclude_deprecated': True, |
| } |
| resp = _get_from_milo(endpoint, json.dumps(req), milo_creds) |
| data = zlib.decompress(base64.b64decode(resp['data']), zlib.MAX_WBITS | 16) |
| return json.loads(data) |
| |
| |
| def find_new_builds(master_url, builderlist, root_json, build_db): |
| """Given a dict of previously-seen builds, find new builds on each builder. |
| |
| Note that we use the 'cachedBuilds' here since it should be faster, and this |
| script is meant to be run frequently enough that it shouldn't skip any builds. |
| |
| 'Frequently enough' means 1 minute in the case of Buildbot or cron, so the |
| only way for the scan to be overwhelmed is if > cachedBuilds builds |
| complete within 1 minute. As cachedBuilds is scaled per number of slaves per |
| builder, the only way for this to really happen is if a build consistently |
| takes < 1 minute to complete. |
| """ |
| new_builds = {} |
| build_db.masters[master_url] = build_db.masters.get(master_url, {}) |
| |
| last_finished_build = {} |
| for builder, builds in build_db.masters[master_url].iteritems(): |
| finished = [int(y[0]) for y in builds.iteritems() |
| if y[1].finished] |
| if finished: |
| last_finished_build[builder] = max(finished) |
| |
| for buildername, builder in root_json['builders'].iteritems(): |
| if (BUILDER_WILDCARD not in builderlist) and ( |
| buildername not in builderlist): |
| logging.debug('ignoring %s:%s because not in builder whitelist', |
| master_url, buildername) |
| continue |
| |
| # cachedBuilds are the builds in the cache, while currentBuilds are the |
| # currently running builds. Thus cachedBuilds can be unfinished or finished, |
| # while currentBuilds are always unfinished. |
| cached_builds = builder.get('cachedBuilds') or [] |
| current_builds = builder.get('currentBuilds') or [] |
| candidate_builds = set(cached_builds + current_builds) |
| if buildername in last_finished_build: |
| new_builds[buildername] = [ |
| buildnum for buildnum in candidate_builds |
| if buildnum > last_finished_build[buildername]] |
| else: |
| if buildername in build_db.masters[master_url]: |
| # We've seen this builder before, but haven't seen a finished build. |
| # Scan finished builds as well as unfinished. |
| new_builds[buildername] = candidate_builds |
| else: |
| # We've never seen this builder before, only scan unfinished builds. |
| |
| # We're explicitly only dealing with current builds since we haven't |
| # seen this builder before. Thus, the next time a scan is run, |
| # only unfinished builds will be in the build_db. This immediately drops |
| # us into the section above (builder is in the db, but no finished |
| # builds yet.) In this state all the finished builds will be loaded in, |
| # firing off an email storm any time the build_db changes or a new |
| # builder is added. We set the last finished build here to prevent that. |
| finished = set(cached_builds) - set(current_builds) |
| if finished: |
| build_db.masters[master_url].setdefault(buildername, {})[ |
| max(finished)] = build_scan_db.gen_build(finished=True) |
| |
| new_builds[buildername] = current_builds |
| |
| logging.info('milo output for %s:', master_url) |
| for builder in sorted(root_json['builders'].keys()): |
| data = root_json['builders'][builder] |
| logging.info( |
| 'builder: %s, current builds: %s, cached builds: %s', |
| builder, |
| data.get('currentBuilds') or [], |
| data.get('cachedBuilds') or [], |
| ) |
| logging.info('new builds for %s:', master_url) |
| for builder in sorted(new_builds.keys()): |
| logging.info('builder: %s, new builds: %s', builder, new_builds[builder]) |
| |
| return new_builds |
| |
| |
| def find_new_builds_per_master(masters, build_db, milo_creds): |
| """Given a list of masters, find new builds and collect them under a dict.""" |
| builds = {} |
| master_jsons = {} |
| for master, builders in masters.iteritems(): |
| root_json = get_root_json(master, milo_creds) |
| master_jsons[master] = root_json |
| builds[master] = find_new_builds( |
| master, builders, root_json, build_db) |
| return builds, master_jsons |
| |
| |
| def get_build_json(url_tuple): |
| """Downloads the json of a specific build.""" |
| master_url, builder, buildnum, milo_creds = url_tuple |
| |
| # Assumes we have something like https://build.chromium.org/p/chromium.perf |
| master_name = master_url.rstrip('/').split('/')[-1] |
| |
| endpoint = 'milo.Buildbot/GetBuildbotBuildJSON' |
| data = { |
| 'master': master_name, |
| 'builder': builder, |
| 'build_num': int(buildnum), |
| 'exclude_deprecated': True, |
| } |
| resp = _get_from_milo(endpoint, json.dumps(data), milo_creds) |
| return (json.loads(base64.b64decode(resp['data'])), |
| master_url, builder, buildnum) |
| |
| |
| def get_build_jsons(master_builds, processes, milo_creds): |
| """Get all new builds on specified masters. |
| |
| This takes a dict in the form of [master][builder][build], formats that URL |
| and appends that to url_list. Then, it forks out and queries each build_url |
| for build information. |
| """ |
| url_list = [] |
| for master, builder_dict in master_builds.iteritems(): |
| for builder, new_builds in builder_dict.iteritems(): |
| for buildnum in new_builds: |
| url_list.append((master, builder, buildnum, milo_creds)) |
| |
| # Prevent map from hanging, see http://bugs.python.org/issue12157. |
| if url_list: |
| # The async/get is so that ctrl-c can interrupt the scans. |
| # See http://stackoverflow.com/questions/1408356/ |
| # keyboard-interrupts-with-pythons-multiprocessing-pool |
| with chromium_utils.MultiPool(processes) as pool: |
| builds = filter(bool, pool.map_async(get_build_json, url_list).get( |
| 9999999)) |
| else: |
| builds = [] |
| |
| return builds |
| |
| |
| def propagate_build_json_to_db(build_db, builds): |
| """Propagates build status changes from build_json to build_db.""" |
| for build_json, master, builder, buildnum in builds: |
| build = build_db.masters[master].setdefault(builder, {}).get(buildnum) |
| if not build: |
| build = build_scan_db.gen_build() |
| |
| if build_json.get('results', None) is not None: |
| build = build._replace(finished=True) # pylint: disable=W0212 |
| else: |
| # Builds can't be marked succeeded unless they are finished. |
| build = build._replace(succeeded=False) # pylint: disable=W0212 |
| |
| build_db.masters[master][builder][buildnum] = build |
| |
| |
| def get_options(): |
| prog_desc = 'Scans for builds and outputs updated builds.' |
| usage = '%prog [options] <one or more master urls>' |
| parser = optparse.OptionParser(usage=(usage + '\n\n' + prog_desc)) |
| parser.add_option('--milo-creds', |
| help='Location to service account json credentials for ' |
| 'accessing Milo.') |
| parser.add_option('--build-db', default='build_scan_db.json', |
| help='records the last-seen build for each builder') |
| parser.add_option('--clear-build-db', action='store_true', |
| help='reset build_db to be empty') |
| parser.add_option('--skip-build-db-update', action='store_true', |
| help='don\' write to the build_db, overridden by clear-db') |
| parser.add_option('--parallelism', default=16, |
| help='up to this many builds can be queried simultaneously') |
| parser.add_option('-v', '--verbose', action='store_true', |
| help='turn on extra debugging information') |
| |
| options, args = parser.parse_args() |
| |
| if not args: |
| parser.error('you need to specify at least one master URL') |
| |
| args = [url.rstrip('/') for url in args] |
| |
| return options, args |
| |
| |
| def get_updated_builds(masters, build_db, parallelism, milo_creds): |
| new_builds, master_jsons = find_new_builds_per_master( |
| masters, build_db, milo_creds) |
| build_jsons = get_build_jsons(new_builds, parallelism, milo_creds) |
| propagate_build_json_to_db(build_db, build_jsons) |
| return master_jsons, build_jsons |
| |
| |
| def main(): |
| options, args = get_options() |
| |
| logging.basicConfig(level=logging.DEBUG if options.verbose else logging.INFO) |
| |
| masters = {} |
| for m in set(args): |
| masters[m] = BUILDER_WILDCARD |
| |
| if options.clear_build_db: |
| build_db = {} |
| build_scan_db.save_build_db(build_db, {}, options.build_db) |
| else: |
| build_db = build_scan_db.get_build_db(options.build_db) |
| |
| _, build_jsons = get_updated_builds( |
| masters, build_db, options.parallelism, options.milo_creds) |
| |
| for _, master_url, builder, buildnum in build_jsons: |
| print '%s:%s:%s' % (master_url, builder, buildnum) |
| |
| if not options.skip_build_db_update: |
| build_scan_db.save_build_db(build_db, {}, options.build_db) |
| |
| return 0 |
| |
| |
| if __name__ == '__main__': |
| sys.exit(main()) |