| #!/usr/bin/env python |
| # Copyright 2016 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """Script which gathers and merges the JSON results from multiple |
| swarming shards of a step on the waterfall. |
| |
| This is used to feed in the per-test times of previous runs of tests |
| to the browser_test_runner's sharding algorithm, to improve shard |
| distribution. |
| """ |
| |
| import argparse |
| import json |
| import os |
| import shutil |
| import subprocess |
| import sys |
| import tempfile |
| import urllib |
| import urllib2 |
| |
| SWARMING_SERVICE = 'https://chromium-swarm.appspot.com' |
| |
| THIS_DIR = os.path.dirname(os.path.abspath(__file__)) |
| SRC_DIR = os.path.dirname(os.path.dirname(os.path.dirname(THIS_DIR))) |
| SWARMING_CLIENT_DIR = os.path.join(SRC_DIR, 'tools', 'swarming_client') |
| |
| class Swarming: |
| @staticmethod |
| def CheckAuth(): |
| output = subprocess.check_output([ |
| 'python', |
| os.path.join(SWARMING_CLIENT_DIR, 'auth.py'), |
| 'check', |
| '--service', |
| SWARMING_SERVICE]) |
| if not output.startswith('user:'): |
| print 'Must run:' |
| print ' tools/swarming_client/auth.py login --service ' + \ |
| SWARMING_SERVICE |
| print 'and authenticate with @google.com credentials.' |
| sys.exit(1) |
| |
| @staticmethod |
| def Collect(taskIDs, output_dir, verbose): |
| cmd = [ |
| 'python', |
| os.path.join(SWARMING_CLIENT_DIR, 'swarming.py'), |
| 'collect', |
| '-S', |
| SWARMING_SERVICE, |
| '--task-output-dir', |
| output_dir] + taskIDs |
| if verbose: |
| print 'Collecting Swarming results:' |
| print cmd |
| if verbose > 1: |
| # Print stdout from the collect command. |
| stdout = None |
| else: |
| fnull = open(os.devnull, 'w') |
| stdout = fnull |
| subprocess.check_call(cmd, stdout=stdout, stderr=subprocess.STDOUT) |
| |
| @staticmethod |
| def ExtractShardTaskIDs(urls): |
| SWARMING_URL = 'https://chromium-swarm.appspot.com/user/task/' |
| task_ids = [] |
| for u in urls: |
| if not u.startswith(SWARMING_URL): |
| raise Exception('Illegally formatted \'urls\' value %s' % v) |
| task_ids.append(u[len(SWARMING_URL):]) |
| return task_ids |
| |
| class Waterfall: |
| def __init__(self, waterfall): |
| self._waterfall = waterfall |
| |
| def GetJsonForBuild(self, bot, build): |
| # Explorable via RPC explorer: |
| # https://luci-milo.appspot.com/rpcexplorer/services/milo.BuildInfo/Get |
| |
| # The Python docs are wrong. It's fine for this payload to be just |
| # a JSON string. |
| call_arg = json.dumps({ "buildbot": { "masterName": self._waterfall, |
| "builderName": bot, |
| "buildNumber": build }}) |
| headers = { |
| "content-type": "application/json", |
| "accept": "application/json" |
| } |
| request = urllib2.Request( |
| "https://luci-milo.appspot.com/prpc/milo.BuildInfo/Get", |
| call_arg, |
| headers) |
| conn = urllib2.urlopen(request) |
| result = conn.read() |
| conn.close() |
| |
| # Result is a two-line string the first line of which is |
| # deliberate garbage and the second of which is a JSON payload. |
| return json.loads(result.splitlines()[1]) |
| |
| def JsonLoadStrippingUnicode(file, **kwargs): |
| def StripUnicode(obj): |
| if isinstance(obj, unicode): |
| try: |
| return obj.encode('ascii') |
| except UnicodeEncodeError: |
| return obj |
| |
| if isinstance(obj, list): |
| return map(StripUnicode, obj) |
| |
| if isinstance(obj, dict): |
| new_obj = type(obj)( |
| (StripUnicode(k), StripUnicode(v)) for k, v in obj.iteritems() ) |
| return new_obj |
| |
| return obj |
| |
| return StripUnicode(json.load(file, **kwargs)) |
| |
| def FindStepRecursive(node, step_name): |
| # The format of this JSON-encoded protobuf is defined here: |
| # https://chromium.googlesource.com/infra/luci/luci-go/+/master/ |
| # common/proto/milo/annotations.proto |
| # It's easiest to just use the RPC explorer to fetch one and see |
| # what's desired to extract. |
| if 'name' in node: |
| if node['name'].startswith(step_name): |
| return node |
| if 'substep' in node: |
| for subnode in node['substep']: |
| # The substeps all wrap the node we care about in a wrapper |
| # object which has one field named "step". |
| res = FindStepRecursive(subnode['step'], step_name) |
| if res: |
| return res |
| return None |
| |
| def Merge(dest, src): |
| if isinstance(dest, list): |
| if not isinstance(src, list): |
| raise Exception('Both must be lists: ' + dest + ' and ' + src) |
| return dest + src |
| |
| if isinstance(dest, dict): |
| if not isinstance(src, dict): |
| raise Exception('Both must be dicts: ' + dest + ' and ' + src) |
| for k in src.iterkeys(): |
| if k not in dest: |
| dest[k] = src[k] |
| else: |
| dest[k] = Merge(dest[k], src[k]) |
| return dest |
| |
| return src |
| |
| |
| def ExtractTestTimes(node, node_name, dest): |
| if 'times' in node: |
| dest[node_name] = sum(node['times']) / len(node['times']) |
| else: |
| # Currently the prefix names in the trie are dropped. Could |
| # concatenate them if the naming convention is changed. |
| for k in node.iterkeys(): |
| if isinstance(node[k], dict): |
| ExtractTestTimes(node[k], k, dest) |
| |
| def main(): |
| rest_args = sys.argv[1:] |
| parser = argparse.ArgumentParser( |
| description='Gather JSON results from a run of a Swarming test.', |
| formatter_class=argparse.ArgumentDefaultsHelpFormatter) |
| parser.add_argument('-v', '--verbose', action='count', default=0, |
| help='Enable verbose output (specify multiple times ' |
| 'for more output)') |
| parser.add_argument('--waterfall', type=str, default='chromium.gpu.fyi', |
| help='Which waterfall to examine') |
| parser.add_argument('--bot', type=str, default='Linux Release (NVIDIA)', |
| help='Which bot on the waterfall to examine') |
| parser.add_argument('--build', default=-1, type=int, |
| help='Which build to fetch (must be specified)') |
| parser.add_argument('--step', type=str, default='webgl2_conformance_tests', |
| help='Which step to fetch (treated as a prefix)') |
| parser.add_argument('--output', type=str, default='output.json', |
| help='Name of output file; contains only test run times') |
| parser.add_argument('--full-output', type=str, default='', |
| help='Name of complete output file if desired') |
| parser.add_argument('--leak-temp-dir', action='store_true', default=False, |
| help='Deliberately leak temporary directory') |
| parser.add_argument('--start-from-temp-dir', type=str, default='', |
| help='Start from temporary directory (for debugging)') |
| |
| options = parser.parse_args(rest_args) |
| |
| if options.start_from_temp_dir: |
| tmpdir = options.start_from_temp_dir |
| shard_dirs = [f for f in os.listdir(tmpdir) |
| if os.path.isdir(os.path.join(tmpdir, f))] |
| numTaskIDs = len(shard_dirs) |
| else: |
| Swarming.CheckAuth() |
| |
| waterfall = Waterfall(options.waterfall) |
| build = options.build |
| if build < 0: |
| print "Build number must be specified; check the bot's page" |
| return 1 |
| |
| build_json = waterfall.GetJsonForBuild(options.bot, build) |
| |
| if options.verbose: |
| print 'Fetching information from %s, bot %s, build %s' % ( |
| options.waterfall, options.bot, build) |
| |
| step = FindStepRecursive(build_json['step'], options.step) |
| if not step: |
| print "Unable to find step starting with " + options.step |
| return 1 |
| |
| shard_urls = [] |
| expected_prefix = 'https://chromium-swarm.appspot.com/user/task/' |
| for link in step['otherLinks']: |
| label = link['label'] |
| if label.startswith('shard #') and not label.endswith('isolated out'): |
| shard_urls.append(link['url']) |
| task_ids = Swarming.ExtractShardTaskIDs(shard_urls) |
| |
| if not task_ids: |
| print 'Problem gathering the Swarming task IDs for %s' % options.step |
| return 1 |
| |
| # Collect the results. |
| tmpdir = tempfile.mkdtemp() |
| Swarming.Collect(task_ids, tmpdir, options.verbose) |
| num_task_ids = len(task_ids) |
| |
| # Shards' JSON outputs are in sequentially-numbered subdirectories |
| # of the output directory. |
| merged_json = None |
| for i in xrange(num_task_ids): |
| with open(os.path.join(tmpdir, str(i), 'output.json')) as f: |
| cur_json = JsonLoadStrippingUnicode(f) |
| if not merged_json: |
| merged_json = cur_json |
| else: |
| merged_json = Merge(merged_json, cur_json) |
| extracted_times = {'times':{}} |
| ExtractTestTimes(merged_json, '', extracted_times['times']) |
| |
| with open(options.output, 'w') as f: |
| json.dump(extracted_times, f, sort_keys=True, indent=2, |
| separators=(',', ': ')) |
| |
| if options.full_output: |
| json.dump(merged_json, f, sort_keys=True, indent=2, |
| separators=(',', ': ')) |
| |
| if options.leak_temp_dir: |
| print 'Temporary directory: %s' % tmpdir |
| else: |
| shutil.rmtree(tmpdir) |
| |
| return 0 |
| |
| |
| if __name__ == "__main__": |
| sys.exit(main()) |