GoogleGit

blob: c8aa89b1d3e37b5066bd2189da354d5236b4d716 [file] [log] [blame]
  1. #!/usr/bin/env python
  2. # Copyright 2013 The Chromium Authors. All rights reserved.
  3. # Use of this source code is governed by a BSD-style license that can be
  4. # found in the LICENSE file.
  5. """Closes tree if configured masters have failed tree-closing steps.
  6. Given a list of masters, gatekeeper_ng will get a list of the latest builds from
  7. the specified masters. It then checks if any tree-closing steps have failed, and
  8. if so closes the tree and emails appropriate parties. Configuration for which
  9. steps to close and which parties to notify are in a local gatekeeper.json file.
  10. """
  11. from collections import defaultdict
  12. from contextlib import closing, contextmanager
  13. import argparse
  14. import fnmatch
  15. import getpass
  16. import hashlib
  17. import hmac
  18. import itertools
  19. import json
  20. import logging
  21. import operator
  22. import os
  23. import random
  24. import re
  25. import sys
  26. import time
  27. import urllib
  28. import urllib2
  29. from slave import build_scan
  30. from slave import build_scan_db
  31. from slave import gatekeeper_ng_config
  32. DATA_DIR = os.path.dirname(os.path.abspath(__file__))
  33. # Buildbot status enum.
  34. SUCCESS, WARNINGS, FAILURE, SKIPPED, EXCEPTION, RETRY = range(6)
  35. def get_pwd(password_file):
  36. if os.path.isfile(password_file):
  37. return open(password_file, 'r').read().strip()
  38. return getpass.getpass()
  39. def in_glob_list(value, glob_list):
  40. """Returns True if 'value' matches any glob in 'glob_list'.
  41. Args:
  42. value: (str) The value to search for.
  43. glob_list: (list) A list of glob strings to test.
  44. """
  45. return any(fnmatch.fnmatch(value, glob)
  46. for glob in glob_list)
  47. def logging_urlopen(url, *args, **kwargs):
  48. try:
  49. return urllib2.urlopen(url, *args, **kwargs)
  50. except urllib2.HTTPError as e:
  51. logging.debug('error accessing url %s: %s' % (url, e))
  52. raise
  53. def update_status(tree_message, status_url_root, username, password, simulate):
  54. """Connects to chromium-status and closes the tree."""
  55. #TODO(xusydoc): append status if status is already closed.
  56. if isinstance(tree_message, unicode):
  57. tree_message = tree_message.encode('utf8')
  58. elif isinstance(tree_message, str):
  59. tree_message = tree_message.decode('utf8')
  60. params = urllib.urlencode({
  61. 'message': tree_message,
  62. 'username': username,
  63. 'password': password
  64. })
  65. # Standard urllib doesn't raise an exception on 403, urllib2 does.
  66. if not simulate:
  67. status_url = status_url_root + "/status"
  68. with closing(logging_urlopen(status_url, params)):
  69. pass
  70. else:
  71. logging.info("Simulate: Setting tree [%s] status: %s",
  72. status_url_root, tree_message)
  73. logging.info('success')
  74. def get_tree_status(status_url_root, username, password):
  75. status_url = status_url_root + "/current?format=json"
  76. with closing(logging_urlopen(status_url)) as f:
  77. data = f.read()
  78. try:
  79. return json.loads(data)
  80. except ValueError:
  81. # Failed due to authentication error?
  82. if 'login' not in data:
  83. raise
  84. # Try using bot password to authenticate.
  85. params = urllib.urlencode({
  86. 'username': username,
  87. 'password': password
  88. })
  89. try:
  90. with closing(logging_urlopen(status_url, params)) as f:
  91. data = f.read()
  92. except urllib2.HTTPError, e:
  93. if e.code == 405:
  94. logging.warn("update your chromium_status app.")
  95. raise
  96. return json.loads(data)
  97. def get_builder_section(gatekeeper_section, builder):
  98. """Returns the applicable gatekeeper config for the builder.
  99. If the builder isn't present or is excluded, return None.
  100. """
  101. if builder in gatekeeper_section:
  102. builder_section = gatekeeper_section[builder]
  103. elif '*' in gatekeeper_section:
  104. builder_section = gatekeeper_section['*']
  105. else:
  106. return None
  107. if not in_glob_list(builder, builder_section.get('excluded_builders', ())):
  108. return builder_section
  109. return None
  110. def check_builds(master_builds, master_jsons, gatekeeper_config):
  111. """Given a gatekeeper configuration, see which builds have failed."""
  112. succeeded_builds = []
  113. failed_builds = []
  114. # Sort by buildnumber, highest first.
  115. sorted_builds = sorted(master_builds, key=lambda x: x[3], reverse=True)
  116. successful_builder_steps = defaultdict(lambda: defaultdict(set))
  117. current_builds_successful = True
  118. for build_json, master_url, builder, buildnum in sorted_builds:
  119. gatekeeper_sections = gatekeeper_config.get(master_url, [])
  120. for gatekeeper_section in gatekeeper_sections:
  121. section_hash = gatekeeper_ng_config.gatekeeper_section_hash(
  122. gatekeeper_section)
  123. gatekeeper = get_builder_section(
  124. gatekeeper_section, build_json['builderName'])
  125. if not gatekeeper:
  126. succeeded_builds.append((master_url, builder, buildnum))
  127. continue
  128. steps = build_json['steps']
  129. excluded_steps = set(gatekeeper.get('excluded_steps', []))
  130. forgiving = set(gatekeeper.get('forgiving_steps', [])) - excluded_steps
  131. forgiving_optional = (
  132. set(gatekeeper.get('forgiving_optional', [])) - excluded_steps)
  133. closing_steps = (
  134. set(gatekeeper.get('closing_steps', [])) | forgiving) - excluded_steps
  135. closing_optional = (
  136. (set(gatekeeper.get('closing_optional', [])) | forgiving_optional) -
  137. excluded_steps
  138. )
  139. tree_notify = set(gatekeeper.get('tree_notify', []))
  140. sheriff_classes = set(gatekeeper.get('sheriff_classes', []))
  141. status_template = gatekeeper.get(
  142. 'status_template', gatekeeper_ng_config.DEFAULTS['status_template'])
  143. subject_template = gatekeeper.get(
  144. 'subject_template', gatekeeper_ng_config.DEFAULTS[
  145. 'subject_template'])
  146. finished = [s for s in steps if s.get('isFinished')]
  147. close_tree = gatekeeper.get('close_tree', True)
  148. respect_build_status = gatekeeper.get('respect_build_status', False)
  149. # We ignore EXCEPTION and RETRY here since those are usually
  150. # infrastructure-related instead of actual test errors.
  151. successful_steps = set(s['name'] for s in finished
  152. if s.get('results', [FAILURE])[0] != FAILURE)
  153. successful_builder_steps[master_url][builder].update(successful_steps)
  154. finished_steps = set(s['name'] for s in finished)
  155. if '*' in forgiving_optional:
  156. forgiving_optional = (finished_steps - excluded_steps)
  157. if '*' in closing_optional:
  158. closing_optional = (finished_steps - excluded_steps)
  159. unsatisfied_steps = closing_steps - successful_steps
  160. failed_steps = finished_steps - successful_steps
  161. failed_optional_steps = failed_steps & closing_optional
  162. unsatisfied_steps |= failed_optional_steps
  163. # Build is not yet finished, don't penalize on unstarted/unfinished steps.
  164. if build_json.get('results', None) is None:
  165. unsatisfied_steps &= finished_steps
  166. # If the entire build failed.
  167. if (not unsatisfied_steps and 'results' in build_json and
  168. build_json['results'] == FAILURE and respect_build_status):
  169. unsatisfied_steps.add('[overall build status]')
  170. buildbot_url = master_jsons[master_url]['project']['buildbotURL']
  171. project_name = master_jsons[master_url]['project']['title']
  172. if unsatisfied_steps:
  173. failed_builds.append(({'base_url': buildbot_url,
  174. 'build': build_json,
  175. 'close_tree': close_tree,
  176. 'forgiving_steps': (
  177. forgiving | forgiving_optional),
  178. 'project_name': project_name,
  179. 'sheriff_classes': sheriff_classes,
  180. 'subject_template': subject_template,
  181. 'status_template': status_template,
  182. 'tree_notify': tree_notify,
  183. 'unsatisfied': unsatisfied_steps,
  184. },
  185. master_url,
  186. builder,
  187. buildnum,
  188. section_hash))
  189. # If there is a failing step that a newer builder hasn't succeeded on,
  190. # don't open the tree.
  191. still_failing_steps = (
  192. unsatisfied_steps - successful_builder_steps[master_url][builder])
  193. if still_failing_steps:
  194. logging.debug('%s failed on %s, not yet resolved.',
  195. ','.join(still_failing_steps),
  196. generate_build_url(failed_builds[-1][0]))
  197. current_builds_successful = False
  198. else:
  199. succeeded_builds.append((master_url, builder, buildnum))
  200. return (list(reversed(failed_builds)), list(reversed(succeeded_builds)),
  201. successful_builder_steps, current_builds_successful)
  202. def propagate_build_status_back_to_db(failure_tuples, success_tuples, build_db):
  203. """Write back to build_db which finished steps failed or succeeded."""
  204. for _, master_url, builder, buildnum, _ in failure_tuples:
  205. builder_dict = build_db.masters[master_url][builder]
  206. if builder_dict[buildnum].finished:
  207. # pylint: disable=W0212
  208. builder_dict[buildnum] = builder_dict[buildnum]._replace(
  209. succeeded=False)
  210. for master_url, builder, buildnum in success_tuples:
  211. builder_dict = build_db.masters[master_url][builder]
  212. if builder_dict[buildnum].finished:
  213. # pylint: disable=W0212
  214. builder_dict[buildnum] = builder_dict[buildnum]._replace(
  215. succeeded=True)
  216. def get_build_properties(build_json, properties):
  217. """Obtains multiple build_properties from a build.
  218. Sets a property to None if it's not in the build.
  219. """
  220. properties = set(properties)
  221. result = dict.fromkeys(properties) # Populates dict with {key: None}.
  222. for p in build_json.get('properties', []):
  223. if p[0] in properties:
  224. result[p[0]] = p[1]
  225. return result
  226. @contextmanager
  227. def log_section(url, builder, buildnum, section_hash=None):
  228. """Wraps a code block with information about a build it operates on."""
  229. logging.debug('%sbuilders/%s/builds/%d ----', url, builder, buildnum)
  230. if section_hash:
  231. logging.debug(' section hash: %s', section_hash)
  232. yield
  233. logging.debug('----')
  234. COMMIT_POSITION_REGEX = re.compile(r'(.*)@{#(\d+)}')
  235. def parse_commit_position(prop):
  236. """Determine if the revision is a SVN revision or a git commit position.
  237. If the revision is a git commit position, return just the numeric part.
  238. """
  239. if not isinstance(prop, basestring):
  240. return prop
  241. match = COMMIT_POSITION_REGEX.match(prop)
  242. if not match:
  243. return prop
  244. else:
  245. return int(match.group(2))
  246. def convert_revisions_to_positions(property_dict):
  247. """Given a dictionary of revisions, return a dict of parsed revisions."""
  248. result = {}
  249. for k, v in property_dict.iteritems():
  250. result[k] = parse_commit_position(v)
  251. return result
  252. def reject_old_revisions(failed_builds, build_db):
  253. """Ignore builds which triggered on revisions older than the current.
  254. triggered_revisions has the format: {'revision': 500,
  255. 'got_webkit_revision': 15,
  256. }
  257. Each key is a buildproperty that was previously triggered on, and each value
  258. was the value of that key. Note that all keys present in triggered_revisions
  259. are used for the comparison. Only builds where at least one number is greater
  260. than and all numbers are greater than or equal are considered 'new' and are
  261. not rejected by this function. Any change in the set of keys triggers a full
  262. reset of the recorded data. In the common case, triggered_revisions only has
  263. one key ('revision') and rejects all builds where revision is less than or
  264. equal to the last triggered revision.
  265. """
  266. triggered_revisions = build_db.aux.get('triggered_revisions', {})
  267. if not triggered_revisions:
  268. # There was no previous revision information, so by default keep all
  269. # failing builds.
  270. logging.debug('no previous revision tracking information, '
  271. 'keeping all failures.')
  272. return failed_builds
  273. def build_start_time(build):
  274. """Sorting key that returns a build's build start time.
  275. By using reversed start time, we sort such that the latest builds come
  276. first. This gives us a crude approximation of revision order, which means
  277. we can update triggered_revisions with the highest revision first. Note that
  278. this isn't perfect, but the likelihood of multiple failures occurring in the
  279. same minute is low and multi-revision sorting is potentially error-prone. An
  280. action-log based approach would obviate this hack.
  281. """
  282. return build['build'].get('times', [None])[0]
  283. kept_builds = []
  284. for build in sorted(failed_builds, key=build_start_time, reverse=True):
  285. builder = build['build']['builderName']
  286. buildnum = build['build']['number']
  287. with log_section(build['base_url'], builder, buildnum):
  288. # get_build_properties will return a dict with all the keys given to it.
  289. # Since we're giving it triggered_revisions.keys(), revisions is
  290. # guaranteed to have the same keys as triggered_revisions.
  291. revisions = convert_revisions_to_positions(get_build_properties(
  292. build['build'], triggered_revisions.keys()))
  293. logging.debug('previous revision information: %s',
  294. str(triggered_revisions))
  295. logging.debug('current revision information: %s', str(revisions))
  296. if any(x is None for x in revisions.itervalues()):
  297. # The revisions aren't in this build, err on the side of noisy.
  298. logging.debug('Nones detected in revision tracking information, '
  299. 'keeping build.')
  300. triggered_revisions = revisions
  301. kept_builds.append(build)
  302. continue
  303. paired = []
  304. for k in revisions:
  305. paired.append((triggered_revisions[k], revisions[k]))
  306. if all(l <= r for l, r in paired) and any(l < r for l, r in paired):
  307. # At least one revision is greater and all the others are >=, so let
  308. # this revision through.
  309. # TODO(stip): evaluate the greatest revision if we see a stream of
  310. # failures at once.
  311. logging.debug('keeping build')
  312. kept_builds.append(build)
  313. triggered_revisions = revisions
  314. continue
  315. logging.debug('rejecting build')
  316. build_db.aux['triggered_revisions'] = triggered_revisions
  317. return kept_builds
  318. def debounce_failures(failed_builds, current_builds_successful, build_db):
  319. """Using trigger information in build_db, make sure we don't double-fire."""
  320. @contextmanager
  321. def save_build_failures(master_url, builder, buildnum, section_hash,
  322. unsatisfied):
  323. yield
  324. build_db.masters[master_url][builder][buildnum].triggered[
  325. section_hash] = unsatisfied
  326. if failed_builds and current_builds_successful:
  327. logging.debug(
  328. 'All failing steps succeeded in later runs, not closing tree.')
  329. return []
  330. true_failed_builds = []
  331. for build, master_url, builder, buildnum, section_hash in failed_builds:
  332. with log_section(build['base_url'], builder, buildnum, section_hash):
  333. with save_build_failures(master_url, builder, buildnum, section_hash,
  334. build['unsatisfied']):
  335. build_db_builder = build_db.masters[master_url][builder]
  336. # Determine what the current and previous failing steps are.
  337. prev_triggered = []
  338. if buildnum-1 in build_db_builder:
  339. prev_triggered = build_db_builder[buildnum-1].triggered.get(
  340. section_hash, [])
  341. logging.debug(' previous failing tests: %s', ','.join(
  342. sorted(prev_triggered)))
  343. logging.debug(' current failing tests: %s', ','.join(
  344. sorted(build['unsatisfied'])))
  345. # Skip build if we already fired (or if the failing tests aren't new).
  346. if section_hash in build_db_builder[buildnum].triggered:
  347. logging.debug(' section has already been triggered for this build, '
  348. 'skipping...')
  349. continue
  350. new_tests = set(build['unsatisfied']) - set(prev_triggered)
  351. if not new_tests:
  352. logging.debug(' no new steps failed since previous build %d',
  353. buildnum-1)
  354. continue
  355. logging.debug(' new failing steps since build %d: %s', buildnum-1,
  356. ','.join(sorted(new_tests)))
  357. # If we're here it's a legit failing build.
  358. true_failed_builds.append(build)
  359. logging.debug(' build steps: %s', ', '.join(
  360. s['name'] for s in build['build']['steps']))
  361. logging.debug(' build complete: %s', bool(
  362. build['build'].get('results', None) is not None))
  363. logging.debug(' set to close tree: %s', build['close_tree'])
  364. logging.debug(' build failed: %s', bool(build['unsatisfied']))
  365. return true_failed_builds
  366. def parse_sheriff_file(url):
  367. """Given a sheriff url, download and parse the appropirate sheriff list."""
  368. with closing(logging_urlopen(url)) as f:
  369. line = f.readline()
  370. usernames_matcher_ = re.compile(r'document.write\(\'([\w, ]+)\'\)')
  371. usernames_match = usernames_matcher_.match(line)
  372. sheriffs = set()
  373. if usernames_match:
  374. usernames_str = usernames_match.group(1)
  375. if usernames_str != 'None (channel is sheriff)':
  376. for sheriff in usernames_str.split(', '):
  377. if sheriff.count('@') == 0:
  378. sheriff += '@google.com'
  379. sheriffs.add(sheriff)
  380. return sheriffs
  381. def get_sheriffs(classes, base_url):
  382. """Given a list of sheriff classes, download and combine sheriff emails."""
  383. sheriff_sets = (parse_sheriff_file(base_url % cls) for cls in classes)
  384. return reduce(operator.or_, sheriff_sets, set())
  385. def hash_message(message, url, secret):
  386. utc_now = time.time()
  387. salt = random.getrandbits(32)
  388. hasher = hmac.new(secret, message, hashlib.sha256)
  389. hasher.update(str(utc_now))
  390. hasher.update(str(salt))
  391. client_hash = hasher.hexdigest()
  392. return {'message': message,
  393. 'time': utc_now,
  394. 'salt': salt,
  395. 'url': url,
  396. 'hmac-sha256': client_hash,
  397. }
  398. def submit_email(email_app, build_data, secret, simulate):
  399. """Submit json to a mailer app which sends out the alert email."""
  400. if simulate:
  401. logging.info("Simulate: Sending e-mail via [%s]: %s", email_app, build_data)
  402. return
  403. url = email_app + '/email'
  404. data = hash_message(json.dumps(build_data, sort_keys=True), url, secret)
  405. req = urllib2.Request(url, urllib.urlencode({'json': json.dumps(data)}))
  406. with closing(logging_urlopen(req)) as f:
  407. code = f.getcode()
  408. if code != 200:
  409. response = f.read()
  410. raise Exception('error connecting to email app: code %d %s' % (
  411. code, response))
  412. def open_tree_if_possible(build_db, master_jsons, successful_builder_steps,
  413. current_builds_successful, username, password, status_url_root,
  414. set_status, emoji, simulate):
  415. if not current_builds_successful:
  416. logging.debug('Not opening tree because failing steps were detected.')
  417. return
  418. previously_failed_builds = []
  419. for master_url, master in master_jsons.iteritems():
  420. for builder in master['builders']:
  421. builder_dict = build_db.masters.get(master_url, {}).get(builder, {})
  422. for buildnum, build in builder_dict.iteritems():
  423. if build.finished:
  424. if not build.succeeded:
  425. if build.triggered:
  426. # See crbug.com/389740 for why the 0 is there.
  427. failing_steps = set(build.triggered.values()[0])
  428. else:
  429. failing_steps = set()
  430. still_failing_steps = (
  431. failing_steps - successful_builder_steps[master_url][builder])
  432. if still_failing_steps:
  433. previously_failed_builds.append(
  434. '%s on %s %s/builders/%s/builds/%d' % (
  435. ','.join(still_failing_steps), builder, master_url,
  436. urllib.quote(builder), buildnum))
  437. if previously_failed_builds:
  438. logging.debug(
  439. 'Not opening tree because previous builds weren\'t successful:')
  440. for build in previously_failed_builds:
  441. logging.debug(' %s' % build)
  442. return
  443. status = get_tree_status(status_url_root, username, password)
  444. # Don't change the status unless the tree is currently closed.
  445. if status['general_state'] != 'closed':
  446. logging.debug('Not opening tree because it is not closed (%s)'
  447. % status['general_state'])
  448. return
  449. # Don't override human closures.
  450. closed_tree_key = 'closed_tree-%s' % status_url_root
  451. last_gatekeeper_closure = build_db.aux.get(closed_tree_key)
  452. if last_gatekeeper_closure:
  453. # Line 378 of http://goo.gl/ViyxDW limits status messages to 500 chars. The
  454. # code replaces the 500th char with a unicode ellipsis, so we effectively
  455. # have 499 chars to work with.
  456. status_limit = 499
  457. if (last_gatekeeper_closure['message'][:status_limit]
  458. != status['message'][:status_limit]):
  459. logging.debug(
  460. 'Not opening tree because we didn\'t set the last message: %s vs %s'
  461. % (last_gatekeeper_closure['message'], status['message']))
  462. return
  463. else:
  464. # Backwards compatability hack.
  465. if not re.search(r"automatic", status['message'], re.IGNORECASE):
  466. logging.debug('Not opening tree because \'automatic\' was not found in %s'
  467. % status['message'])
  468. return
  469. logging.info('All builders are green, opening the tree...')
  470. tree_status = 'Tree is open (Automatic)'
  471. if emoji:
  472. random_emoji = random.choice(emoji)
  473. if random_emoji.endswith(')'):
  474. random_emoji += ' '
  475. tree_status = 'Tree is open (Automatic: %s)' % random_emoji
  476. logging.info('Opening tree with message: \'%s\'' % tree_status)
  477. build_db.aux[closed_tree_key] = {}
  478. if set_status:
  479. update_status(tree_status, status_url_root, username, password, simulate)
  480. else:
  481. logging.info('set-status not set, not connecting to chromium-status!')
  482. def generate_build_url(build):
  483. """Creates a URL to reference the build."""
  484. return '%s/builders/%s/builds/%d' % (
  485. build['base_url'].rstrip('/'),
  486. urllib.quote(build['build']['builderName']),
  487. build['build']['number']
  488. )
  489. def get_results_string(result_value):
  490. """Returns a string for a BuildBot result value (SUCCESS, FAILURE, etc.)."""
  491. return {
  492. SUCCESS: 'success',
  493. WARNINGS: 'warnings',
  494. FAILURE: 'failure',
  495. SKIPPED: 'skipped',
  496. EXCEPTION: 'exception',
  497. RETRY: 'retry',
  498. }.get(result_value, 'unknown')
  499. def close_tree_if_necessary(build_db, failed_builds, username, password,
  500. status_url_root, set_status, revision_properties,
  501. simulate):
  502. """Given a list of failed builds, close the tree if necessary."""
  503. closing_builds = [b for b in failed_builds if b['close_tree']]
  504. if not closing_builds:
  505. logging.info('no tree-closing failures!')
  506. return
  507. status = get_tree_status(status_url_root, username, password)
  508. # Don't change the status unless the tree is currently open.
  509. if status['general_state'] != 'open':
  510. return
  511. logging.info('%d failed builds found, closing the tree...' %
  512. len(closing_builds))
  513. template_build = closing_builds[0]
  514. template_vars = {
  515. 'blamelist': ','.join(template_build['build']['blame']),
  516. 'build_url': generate_build_url(template_build),
  517. 'builder_name': template_build['build']['builderName'],
  518. 'project_name': template_build['project_name'],
  519. 'unsatisfied': ','.join(template_build['unsatisfied']),
  520. 'result': get_results_string(template_build['build'].get('results')),
  521. }
  522. # First populate un-transformed build properties
  523. revision_props = get_build_properties(template_build['build'],
  524. ['revision', 'got_revision', 'buildnumber',])
  525. # Second add in transformed specified revision_properties.
  526. revision_props.update(convert_revisions_to_positions(
  527. get_build_properties(template_build['build'], revision_properties)))
  528. template_vars.update(revision_props)
  529. # Close on first failure seen.
  530. tree_status = template_build['status_template'] % template_vars
  531. logging.info('closing the tree with message: \'%s\'' % tree_status)
  532. if set_status:
  533. update_status(tree_status, status_url_root, username, password, simulate)
  534. closed_tree_key = 'closed_tree-%s' % status_url_root
  535. build_db.aux[closed_tree_key] = {
  536. 'message': tree_status,
  537. }
  538. else:
  539. logging.info('set-status not set, not connecting to chromium-status!')
  540. def notify_failures(failed_builds, sheriff_url, default_from_email,
  541. email_app_url, secret, domain, filter_domain,
  542. disable_domain_filter, simulate):
  543. # Email everyone that should be notified.
  544. emails_to_send = []
  545. for failed_build in failed_builds:
  546. waterfall_url = failed_build['base_url'].rstrip('/')
  547. build_url = generate_build_url(failed_build)
  548. project_name = failed_build['project_name']
  549. fromaddr = failed_build['build'].get('fromAddr', default_from_email)
  550. tree_notify = failed_build['tree_notify']
  551. if failed_build['unsatisfied'] <= failed_build['forgiving_steps']:
  552. blamelist = set()
  553. else:
  554. blamelist = set(failed_build['build']['blame'])
  555. sheriffs = get_sheriffs(failed_build['sheriff_classes'], sheriff_url)
  556. watchers = list(tree_notify | blamelist | sheriffs)
  557. build_data = {
  558. 'build_url': build_url,
  559. 'from_addr': fromaddr,
  560. 'project_name': project_name,
  561. 'subject_template': failed_build['subject_template'],
  562. 'steps': [],
  563. 'unsatisfied': list(failed_build['unsatisfied']),
  564. 'waterfall_url': waterfall_url,
  565. }
  566. for field in ['builderName', 'number', 'reason']:
  567. build_data[field] = failed_build['build'][field]
  568. # The default value here is 2. In the case of failing on an unfinished
  569. # build, the build won't have a result yet. As of now, chromium-build treats
  570. # anything as 'not failure' as warning. Since we can't get into
  571. # notify_failures without a failure, it makes sense to have the default
  572. # value be failure (2) here.
  573. build_data['result'] = failed_build['build'].get('results', 2)
  574. build_data['blamelist'] = failed_build['build']['blame']
  575. build_data['changes'] = failed_build['build'].get('sourceStamp', {}).get(
  576. 'changes', [])
  577. build_data['revisions'] = [x['revision'] for x in build_data['changes']]
  578. for step in failed_build['build']['steps']:
  579. new_step = {}
  580. for field in ['text', 'name', 'logs']:
  581. new_step[field] = step[field]
  582. new_step['started'] = step.get('isStarted', False)
  583. new_step['urls'] = step.get('urls', [])
  584. new_step['results'] = step.get('results', [0, None])[0]
  585. build_data['steps'].append(new_step)
  586. if email_app_url and watchers:
  587. emails_to_send.append((watchers, json.dumps(build_data, sort_keys=True)))
  588. buildnum = failed_build['build']['number']
  589. steps = failed_build['unsatisfied']
  590. builder = failed_build['build']['builderName']
  591. logging.info(
  592. 'to %s: failure in %s build %s: %s' % (', '.join(watchers),
  593. builder, buildnum,
  594. list(steps)))
  595. if not email_app_url:
  596. logging.warn('no email_app_url specified, no email sent!')
  597. filtered_emails_to_send = []
  598. for email in emails_to_send:
  599. new_watchers = [x if '@' in x else (x + '@' + domain) for x in email[0]]
  600. if not disable_domain_filter:
  601. new_watchers = [x for x in new_watchers if x.split('@')[-1] in
  602. filter_domain]
  603. if new_watchers:
  604. filtered_emails_to_send.append((new_watchers, email[1]))
  605. # Deduplicate emails.
  606. keyfunc = lambda x: x[1]
  607. for k, g in itertools.groupby(sorted(filtered_emails_to_send, key=keyfunc),
  608. keyfunc):
  609. watchers = list(reduce(operator.or_, [set(e[0]) for e in g], set()))
  610. build_data = json.loads(k)
  611. build_data['recipients'] = watchers
  612. submit_email(email_app_url, build_data, secret, simulate)
  613. def simulate_build_failure(build_db, master, builder, *steps):
  614. master_json = {
  615. 'project': {
  616. 'buildbotURL': master,
  617. 'title': 'Simulated Master',
  618. },
  619. 'builders': [builder],
  620. }
  621. build_json = (
  622. {
  623. 'builderName': builder,
  624. 'number': 0,
  625. 'steps': [{
  626. 'name': s,
  627. 'isFinished': True,
  628. 'text': [
  629. 'Simulated Build Step',
  630. ],
  631. 'logs': [],
  632. } for s in steps],
  633. 'results': FAILURE,
  634. 'reason': 'simulation',
  635. 'blame': ['you'],
  636. },
  637. master,
  638. builder,
  639. 0,
  640. )
  641. build_db.masters.setdefault(master, {})
  642. build_db.masters[master].setdefault(builder, {})
  643. build_db.masters[master][builder][0] = build_scan_db.gen_build(finished=True)
  644. return {master: master_json}, (build_json,)
  645. def get_args(argv):
  646. parser = argparse.ArgumentParser(description='Closes the tree if annotated '
  647. 'builds fail.')
  648. parser.add_argument('--build-db', default='build_db.json',
  649. help='records the last-seen build for each builder')
  650. parser.add_argument('--clear-build-db', action='store_true',
  651. help='reset build_db to be empty')
  652. parser.add_argument('--sync-build-db', action='store_true',
  653. help='don\'t process any builds, but update build_db '
  654. 'to the latest build numbers')
  655. parser.add_argument('--skip-build-db-update', action='store_true',
  656. help='don\' write to the build_db, overridden by sync and'
  657. ' clear db options')
  658. parser.add_argument('--password-file', default='.status_password',
  659. help='password file to update chromium-status')
  660. parser.add_argument('-s', '--set-status', action='store_true',
  661. help='close the tree by connecting to chromium-status')
  662. parser.add_argument('--open-tree', action='store_true',
  663. help='open the tree by connecting to chromium-status')
  664. parser.add_argument('--status-url',
  665. default='https://chromium-status.appspot.com',
  666. help='URL for root of the status app')
  667. parser.add_argument('--track-revisions', action='store_true',
  668. help='only close on increasing revisions')
  669. parser.add_argument('--revision-properties', default='revision',
  670. help='comma-separated list of buildproperties to compare '
  671. 'revision on.')
  672. parser.add_argument('--status-user', default='buildbot@chromium.org',
  673. help='username for the status app')
  674. parser.add_argument('--disable-domain-filter', action='store_true',
  675. help='allow emailing any domain')
  676. parser.add_argument('--filter-domain', default='chromium.org,google.com',
  677. help='only email users in these comma separated domains')
  678. parser.add_argument('--email-domain', default='google.com',
  679. help='default email domain to add to users without one')
  680. parser.add_argument('--sheriff-url',
  681. default='http://build.chromium.org/p/chromium/%s.js',
  682. help='URL pattern for the current sheriff list')
  683. parser.add_argument('--parallelism', default=16,
  684. help='up to this many builds can be queried '
  685. 'simultaneously')
  686. parser.add_argument('--default-from-email',
  687. default='buildbot@chromium.org',
  688. help='default email address to send from')
  689. parser.add_argument('--email-app-url',
  690. default='https://chromium-build.appspot.com/mailer',
  691. help='URL of the application to send email from')
  692. parser.add_argument('--email-app-secret-file',
  693. default='.mailer_password',
  694. help='file containing secret used in email app auth')
  695. parser.add_argument('--no-email-app', action='store_true',
  696. help='don\'t send emails')
  697. parser.add_argument('--json',
  698. default=os.path.join(DATA_DIR, 'gatekeeper.json'),
  699. help='location of gatekeeper configuration file')
  700. parser.add_argument('--emoji',
  701. default=os.path.join(DATA_DIR, 'gatekeeper_emoji.json'),
  702. help='location of gatekeeper configuration file (None to'
  703. 'turn off)')
  704. parser.add_argument('--verify', action='store_true',
  705. help='verify that the gatekeeper config file is correct')
  706. parser.add_argument('--flatten-json', action='store_true',
  707. help='display flattened gatekeeper.json for debugging')
  708. parser.add_argument('--no-hashes', action='store_true',
  709. help='don\'t insert gatekeeper section hashes')
  710. parser.add_argument('-v', '--verbose', action='store_true',
  711. help='turn on extra debugging information')
  712. parser.add_argument('master_url', nargs='*',
  713. help='The master URLs to poll.')
  714. group = parser.add_argument_group(title='Testing')
  715. group.add_argument('--simulate-master', metavar='MASTER',
  716. help='Simulate a build failure. This is the name of the '
  717. 'master on which the failure occurs.')
  718. group.add_argument('--simulate-builder', metavar='BUILDER',
  719. help='The builder to simulate the failure on.')
  720. group.add_argument('--simulate-step', metavar='NAME', default=[],
  721. action='append',
  722. help='The steps to simulate completion.')
  723. args = parser.parse_args(argv)
  724. args.email_app_secret = None
  725. args.password = None
  726. if args.no_hashes and not args.flatten_json:
  727. parser.error('specifying --no-hashes doesn\'t make sense without '
  728. '--flatten-json')
  729. if args.verify or args.flatten_json:
  730. return args
  731. if not args:
  732. parser.error('you need to specify at least one master URL')
  733. if args.no_email_app:
  734. args.email_app_url = None
  735. if args.email_app_url and not args.simulate_master:
  736. if os.path.exists(args.email_app_secret_file):
  737. with open(args.email_app_secret_file) as f:
  738. args.email_app_secret = f.read().strip()
  739. else:
  740. parser.error('Must provide email app auth with %s.' % (
  741. args.email_app_secret_file))
  742. args.filter_domain = args.filter_domain.split(',')
  743. args.master_url = [url.rstrip('/') for url in args.master_url]
  744. return args
  745. def main(argv):
  746. args = get_args(argv)
  747. logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
  748. gatekeeper_config = gatekeeper_ng_config.load_gatekeeper_config(args.json)
  749. if args.verify:
  750. return 0
  751. simulate = bool(args.simulate_master)
  752. if args.flatten_json:
  753. if not args.no_hashes:
  754. gatekeeper_config = gatekeeper_ng_config.inject_hashes(gatekeeper_config)
  755. gatekeeper_ng_config.flatten_to_json(gatekeeper_config, sys.stdout)
  756. print
  757. return 0
  758. if args.set_status and not simulate:
  759. args.password = get_pwd(args.password_file)
  760. masters = set(args.master_url)
  761. if not masters <= set(gatekeeper_config):
  762. print 'The following masters are not present in the gatekeeper config:'
  763. for m in masters - set(gatekeeper_config):
  764. print ' ' + m
  765. return 1
  766. emoji = []
  767. if args.emoji != 'None':
  768. try:
  769. with open(args.emoji) as f:
  770. emoji = json.load(f)
  771. except (IOError, ValueError) as e:
  772. logging.warning('Could not load emoji file %s: %s', args.emoji, e)
  773. if args.clear_build_db:
  774. build_db = build_scan_db.gen_db()
  775. build_scan_db.save_build_db(build_db, gatekeeper_config,
  776. args.build_db)
  777. else:
  778. build_db = build_scan_db.get_build_db(args.build_db)
  779. if not simulate:
  780. master_jsons, build_jsons = build_scan.get_updated_builds(
  781. masters, build_db, args.parallelism)
  782. else:
  783. master_jsons, build_jsons = simulate_build_failure(
  784. build_db, args.simulate_master, args.simulate_builder,
  785. *args.simulate_step)
  786. if args.sync_build_db:
  787. build_scan_db.save_build_db(build_db, gatekeeper_config,
  788. args.build_db)
  789. return 0
  790. (failure_tuples, success_tuples, successful_builder_steps,
  791. current_builds_successful) = check_builds(
  792. build_jsons, master_jsons, gatekeeper_config)
  793. # Write failure / success information back to the build_db.
  794. propagate_build_status_back_to_db(failure_tuples, success_tuples, build_db)
  795. # opening is an option, mostly to keep the unittests working which
  796. # assume that any setting of status is negative.
  797. if args.open_tree:
  798. open_tree_if_possible(build_db, master_jsons, successful_builder_steps,
  799. current_builds_successful, args.status_user, args.password,
  800. args.status_url, args.set_status, emoji, simulate)
  801. # debounce_failures does 3 things:
  802. # 1. Groups logging by builder
  803. # 2. Selects out the "build" part from the failure tuple.
  804. # 3. Rejects builds we've already warned about (and logs).
  805. new_failures = debounce_failures(failure_tuples,
  806. current_builds_successful, build_db)
  807. if args.track_revisions:
  808. # Only close the tree if it's a newer revision than before.
  809. properties = args.revision_properties.split(',')
  810. triggered_revisions = build_db.aux.get('triggered_revisions', {})
  811. if not triggered_revisions or (
  812. sorted(triggered_revisions) != sorted(properties)):
  813. logging.info('revision properties have changed from %s to %s. '
  814. 'clearing previous data.', triggered_revisions, properties)
  815. build_db.aux['triggered_revisions'] = dict.fromkeys(properties)
  816. new_failures = reject_old_revisions(new_failures, build_db)
  817. close_tree_if_necessary(build_db, new_failures,
  818. args.status_user, args.password,
  819. args.status_url, args.set_status,
  820. args.revision_properties.split(','),
  821. simulate)
  822. try:
  823. notify_failures(new_failures, args.sheriff_url,
  824. args.default_from_email, args.email_app_url,
  825. args.email_app_secret, args.email_domain,
  826. args.filter_domain, args.disable_domain_filter,
  827. simulate)
  828. finally:
  829. if not args.skip_build_db_update and not simulate:
  830. build_scan_db.save_build_db(build_db, gatekeeper_config,
  831. args.build_db)
  832. return 0
  833. if __name__ == '__main__':
  834. sys.exit(main(sys.argv[1:]))