| #!/usr/bin/python |
| # Copyright 2013 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| '''Generate stats of CQ usage.''' |
| |
| # sudo apt-get install pip; sudo pip install requests request_cache |
| import argparse |
| import collections |
| from datetime import timedelta, datetime |
| import itertools |
| import numpy |
| import operator |
| import re |
| import requests_cache |
| import time |
| import sys |
| |
| |
| class Unbuffered: |
| def __init__(self, stream): |
| self.stream = stream |
| def write(self, data): |
| self.stream.write(data) |
| self.stream.flush() |
| def __getattr__(self, attr): |
| return getattr(self.stream, attr) |
| sys.stdout = Unbuffered(sys.stdout) |
| |
| |
| CONFIG = { |
| 'chrome': { |
| 'target_base_url_regexps': [ |
| re.compile(r'^svn://svn\.chromium\.org/chrome/trunk/src'), |
| # pylint: disable=C0301 |
| re.compile(r'^https://chromium\.googlesource\.com/chromium/chromium/src(?:\.git)?(?:@[\w\-]+)?$'), |
| re.compile(r'^http://src\.chromium\.org/chrome/trunk/src'), |
| ], |
| }, |
| 'blink': { |
| 'target_base_url_regexps': [ |
| re.compile(r'^svn://svn\.chromium\.org/blink/(?:trunk/?)?$'), |
| # pylint: disable=C0301 |
| re.compile(r'^https://chromium\.googlesource\.com/chromium/blink(?:\.git)?(?:@[\w\-]+)?$'), |
| re.compile(r'^http://src\.chromium\.org/blink/(?:trunk/?)?$'), |
| ], |
| }, |
| 'skia': { |
| 'target_base_url_regexps': [ |
| re.compile(r'^http://skia.googlecode.com/svn/trunk'), |
| re.compile(r'^https://skia.googlecode.com/svn/trunk'), |
| ], |
| }, |
| } |
| config_name = None |
| |
| |
| def make_throttle_hook(timeout=1.0): |
| # pylint: disable=W0613 |
| def hook(response, *args, **kwargs): |
| print response.url |
| if not getattr(response, 'from_cache', False): |
| print 'sleeping' |
| time.sleep(timeout) |
| return response |
| return hook |
| |
| |
| # It seems reasonable to expect that rietveld can handle 20 query-per-second |
| # load from this script. Ask someone on the infra team before reducing this |
| # value. |
| seconds_between_requests = 0.05 |
| |
| requests_cache.install_cache('cq') |
| session = requests_cache.CachedSession() |
| # if args.clear: |
| # print 'Clearing cache...' |
| # session.cache.clear() |
| session.hooks = {'response': make_throttle_hook(seconds_between_requests)} |
| |
| |
| def fetch_recent_issues(limit, args, cursor=None): |
| search_url = ('https://codereview.chromium.org/search?format=json&' |
| 'limit=%d') % limit |
| if cursor: |
| search_url += '&cursor=%s' % cursor |
| if args.modified_after: |
| search_url += '&modified_after=%s' % args.modified_after |
| if args.modified_before: |
| search_url += '&modified_before=%s' % args.modified_before |
| response = session.get(search_url) |
| return response.json() |
| |
| |
| def get_target_base_url_regexps(): |
| return CONFIG[config_name]['target_base_url_regexps'] |
| |
| |
| def matches_target_base_url(url): |
| for regexp in get_target_base_url_regexps(): |
| match = regexp.match(url) |
| if match: |
| return True |
| return False |
| |
| |
| def issue_patch_string(issue_id, patch_id): |
| return '%s:%s' % (issue_id, patch_id) |
| |
| |
| def issue_display_url(issue_id): |
| return 'https://codereview.chromium.org/%s' % issue_id |
| |
| |
| def issue_url(issue_id): |
| return 'https://codereview.chromium.org/api/%s?messages=true' % issue_id |
| |
| |
| def date_from_iso_str(iso_str): |
| # Issues have one of two formats: |
| # 2013-10-17 16:43:04.391480 |
| # 2013-10-17 16:43:04 |
| try: |
| return datetime.strptime(iso_str, '%Y-%m-%d %H:%M:%S.%f') |
| except ValueError: |
| return datetime.strptime(iso_str, '%Y-%m-%d %H:%M:%S') |
| |
| |
| def round_timedelta(delta): |
| # We never care about the milliseconds when printing timedeltas: |
| return timedelta(seconds=round(delta.total_seconds())) |
| |
| |
| def fetch_issue(issue_id): |
| url = issue_url(issue_id) |
| this_json = session.get(url).json() |
| # pylint: disable=W0212 |
| cache_key = session.cache._url_to_key(url) |
| _, now = session.cache.get_response_and_time(cache_key) |
| return this_json, now |
| |
| |
| def format_line(values, aligns, widths): |
| values = map(str, values) |
| args = zip(values, aligns, widths) |
| return ' '.join([apply(align, (value, width)) |
| for value, align, width in args]) |
| |
| |
| class CQAttempt(object): |
| def __init__(self): |
| self.result = None |
| self.patch_id = None |
| self.issue_id = None |
| self.author = None |
| self.start_time = None |
| self.end_time = None |
| |
| @property |
| def issue_patch_string(self): |
| return issue_patch_string(self.issue_id, self.patch_id) |
| |
| @property |
| def duration(self): |
| return self.end_time - self.start_time |
| |
| |
| class CQAnalyzer(object): |
| def __init__(self): |
| self.state = None |
| self.issue = None |
| self.issue_id = None |
| self.author = None |
| self.patch_id = None |
| self.start_time = None |
| self.attempts = [] |
| self.manual_commits = set() |
| self.no_try_issues = set() |
| self.now = None |
| # issue_id -> time since CQ start |
| self.in_progress = {} |
| |
| # pylint: disable=C0301 |
| start_regexp = re.compile('''CQ is trying da patch. Follow status at |
| https://chromium-status.appspot.com/cq/(?P<author>.*)/(?P<issue_id>\d+)/(?P<patch_id>\d+)''') |
| committed_regexp = re.compile('Change committed as (?P<revision>\d+)') |
| # pylint: disable=C0301 |
| manual_commit_regexp = re.compile('Committed patchset #(?P<patch_id>\d+) manually as r(?P<revision>\d+)') |
| |
| def _reset(self): |
| self.state = None |
| self.author = None |
| self.patch_id = None |
| self.start_time = None |
| |
| def _start(self, patch_id, start_time, author=None): |
| if self.state == 'START' and self.patch_id == patch_id: |
| # print 'Restarted for patchset %s on %s' % ( |
| # patch_id, issue_display_url(self.issue_id)) |
| self._end('RESTART', start_time) |
| self.state = 'START' |
| self.patch_id = patch_id |
| self.author = author |
| assert start_time |
| self.start_time = start_time |
| |
| def _end(self, result, end_time): |
| if self.state != 'START': |
| print 'ERROR: %s for %s without start??' % ( |
| result, issue_display_url(self.issue_id)) |
| self._reset() |
| return |
| attempt = CQAttempt() |
| attempt.result = result |
| attempt.patch_id = self.patch_id |
| attempt.issue_id = self.issue_id |
| attempt.start_time = self.start_time |
| attempt.end_time = end_time |
| self.attempts.append(attempt) |
| self._reset() |
| |
| def _start_and_end(self, result, date): |
| self._start(result, date) |
| self._end(result, date) |
| |
| def _manual_commit(self, patch_id): |
| self.manual_commits.add(issue_patch_string(self.issue_id, patch_id)) |
| |
| def process_message(self, message): |
| text = message['text'] |
| date = date_from_iso_str(message['date']) |
| |
| match = self.manual_commit_regexp.match(text) |
| if match: |
| if self.start_time: |
| self._end('MANUAL_COMMIT', |
| date_from_iso_str(self.issue['modified'])) |
| patch_number = int(match.group('patch_id')) |
| # gcl dcommit talks in terms of patch numbers 1-5, we need to map |
| # those to patchset ids: |
| if len(self.issue['patchsets']) > (patch_number-1): |
| patch_id = self.issue['patchsets'][patch_number-1] |
| else: |
| print '%s missing patch number %d' % ( |
| self.issue_id, patch_number) |
| print self.issue['patchsets'] |
| print 'issue, now = fetch_issue(\'%s\')' % self.issue_id |
| patch_id = self.issue['patchsets'][-1] |
| self._manual_commit(patch_id) |
| return |
| |
| if message['sender'] != 'commit-bot@chromium.org': |
| return |
| |
| match = self.start_regexp.match(text) |
| if match: |
| self._start(match.group('patch_id'), date, match.group('author')) |
| return |
| match = self.committed_regexp.match(text) |
| if match: |
| self._end('SUCCESS', date) |
| return |
| |
| cq_end_messages = { |
| 'TRY': 'Retried try job', |
| 'APPLY': 'Failed to apply patch', |
| 'APPLY2': 'Failed to apply the patch', |
| 'COMPILE': 'Sorry for I got bad news for ya', |
| 'DESCRIPTION_CHANGED': ('Commit queue rejected this change because ' |
| 'the description'), |
| # This is too conservative. |
| 'REVIEWERS_CHANGED': 'List of reviewers changed.', |
| # User caused. |
| 'PATCH_CHANGED': 'Commit queue failed due to new patchset.', |
| # FAILED_TO_TRIGGER is a very serious failure, unclear why it |
| # happens! |
| 'FAILED_TO_TRIGGER': 'Failed to trigger a try job on', |
| # BINARY_FILE is just a bug in the CQ. |
| 'BINARY_FILE': 'Can\'t process patch for file', |
| 'BINARY_FILE2': 'Failed to request the patch to try', |
| # Unclear why UPDATE_STEP happens. Likely broken bots, shouldn't |
| # fail patches! |
| 'UPDATE_STEP': 'Step "update" is always a major failure.', |
| 'BERSERK': 'The commit queue went berserk', |
| 'INTERNAL_ERROR': 'Commit queue had an internal error.', |
| } |
| for result, message_text in cq_end_messages.items(): |
| if text.startswith(message_text): |
| self._end(result, date) |
| return |
| |
| cq_fail_to_start_messsages = { |
| 'FAILED_PATCHSET': 'Failed to get patchset properties', |
| 'NO_COMMENTS': 'No comments yet.', |
| 'NO_LGTM': 'No LGTM from a valid reviewer yet.', |
| } |
| for result, message_text in cq_fail_to_start_messsages.items(): |
| if text.startswith(message_text): |
| self._start_and_end(result, date) |
| return |
| |
| print 'Error %s has unknown message: %s' % ( |
| issue_display_url(self.issue_id), text) |
| self._end('UNKNOWN', date) |
| |
| def start_issue(self, issue, now): |
| # We may be dealing with cached data, so cache our concept of "now" |
| self.now = now |
| self.issue = issue |
| self.issue_id = issue['issue'] |
| |
| def end_issue(self, issue): |
| issue_id = issue['issue'] |
| assert issue_id == self.issue_id |
| if self.start_time: |
| if self.issue['commit']: |
| print '%s in progress? %s' % ( |
| round_timedelta(self.now - self.start_time), |
| issue_display_url(self.issue_id)) |
| self._end('IN_PROGRESS', self.now) |
| else: |
| # There are various reasons why the commit box may have |
| # been unchecked, including that the issue may be closed now. |
| # If those are not from a user, then that's a bug in rietveld |
| # and out of scope for this script. For now we're just ignoring |
| # cases where the box was unchecked. We could log an _end() |
| # event, but which time would we use? last_modified has at |
| # best a weak corrolation with the issue close or CQ checkbox. |
| self._reset() |
| # _reset() shouldn't be needed, but just in case. |
| self._reset() |
| self.issue_id = None |
| self.issue = None |
| |
| def process_issue(self, issue, now): |
| issue_id = issue['issue'] |
| |
| if re.search('^NOTRY=[tT][rR][uU][eE]$', issue['description'], re.M): |
| self.no_try_issues.add(issue_id) |
| |
| self.start_issue(issue, now) |
| for message in issue['messages']: |
| self.process_message(message) |
| # print '%(date)s: %(text)s' % message |
| self.end_issue(issue) |
| |
| def print_attempts_table(self): |
| result_getter = operator.attrgetter('result') |
| attempts = sorted(self.attempts, key=result_getter) |
| |
| col_aligns = (str.rjust, str.ljust, str.rjust, str.rjust) |
| col_widths = (5, 20, 16, 16) |
| col_headers = ('count', 'result', 'mean duration', 'median duration') |
| print |
| print ('CQ Attempts by result (there can be more than ' |
| 'one attempt on the same patch)') |
| print format_line(col_headers, col_aligns, col_widths) |
| print '-' * (sum(col_widths) + len(col_widths) - 1) |
| for result, group_itr in itertools.groupby(attempts, result_getter): |
| group = list(group_itr) |
| durations = map(operator.attrgetter('duration'), group) |
| mean = sum(durations, timedelta()) / len(group) |
| durations_seconds = map(timedelta.total_seconds, durations) |
| median_seconds = numpy.median(durations_seconds) |
| median = timedelta(seconds=round(median_seconds)) |
| values = (len(group), result, round_timedelta(mean), median) |
| print format_line(values, col_aligns, col_widths) |
| print |
| |
| |
| def main(): |
| parser = argparse.ArgumentParser() |
| parser.add_argument('--base-url-stats', action='store_true', |
| help='print base_url matching stats') |
| parser.add_argument('--chrome', action='store_true') |
| parser.add_argument('--clear', action='store_true', |
| help='clear the cache before starting') |
| parser.add_argument('--count', type=int, default=None, |
| help='number of issues to fetch') |
| parser.add_argument('--modified-after', default=None, |
| help=('search for issues modified after ' |
| 'this date YYYY-MM-DD')) |
| parser.add_argument('--modified-before', default=None, |
| help=('search for issues modified before ' |
| 'this date YYYY-MM-DD')) |
| parser.add_argument('--progress', help='Show progress', action='store_true') |
| parser.add_argument('--skia', action='store_true') |
| args = parser.parse_args() |
| |
| # Rietveld allows up to 1000 results per search query. I don't know if |
| # that's better for the server or not. We'll use 100 to be safe. |
| per_request_search_limit = 100 |
| if args.count: |
| per_request_search_limit = min(int(args.count/2), 100) |
| # Currently looking at the last 2000 entries. |
| target_count = args.count |
| if args.clear: |
| print 'Clearing cache...' |
| session.cache.clear() |
| global config_name |
| if args.chrome: |
| config_name = 'chrome' |
| elif args.skia: |
| config_name = 'skia' |
| else: |
| config_name = 'blink' |
| print 'Fetching and analyzing most recent issues...' |
| analyzer = CQAnalyzer() |
| |
| total_fetch_count = 0 |
| issue_count = 0 |
| cursor = None |
| url_counts = collections.Counter() |
| while True: |
| search_results = fetch_recent_issues(per_request_search_limit, args, |
| cursor) |
| if len(search_results['results']) == 0: |
| break |
| cursor = search_results['cursor'] |
| total_fetch_count += per_request_search_limit |
| |
| if args.base_url_stats: |
| url_counts.update([result['base_url'] |
| for result in search_results['results']]) |
| |
| for result in search_results['results']: |
| if not matches_target_base_url(result['base_url']): |
| continue |
| issue_count += 1 |
| if args.progress and (issue_count % 100 == 0): |
| print issue_count, |
| if target_count: |
| print target_count |
| issue, now = fetch_issue(result['issue']) |
| analyzer.process_issue(issue, now) |
| if target_count and issue_count >= target_count: |
| break |
| |
| if args.base_url_stats: |
| print |
| for url, count in url_counts.most_common(): |
| matches_target = matches_target_base_url(url) |
| print '%s %s %s' % (matches_target, count, url) |
| |
| print |
| print 'Found %s issues (in the %s most recent issues).' % ( |
| issue_count, total_fetch_count) |
| cq_issues = set([attempt.issue_id for attempt in analyzer.attempts]) |
| cq_issue_count = len(cq_issues) |
| cq_patches = set([attempt.issue_patch_string |
| for attempt in analyzer.attempts]) |
| cq_patch_count = len(cq_patches) |
| print ('%s issues (%d%%) (%d patches) tried the CQ, ' |
| 'resulting in %d CQ attempts.') % ( |
| cq_issue_count, 100 * cq_issue_count / issue_count, cq_patch_count, |
| len(analyzer.attempts)) |
| |
| no_try_issues = len(cq_issues & analyzer.no_try_issues) |
| gave_up_patches = len(cq_patches & analyzer.manual_commits) |
| print ('%s of those (%d%%) were committed manually (gave up). ' |
| '%s (%d%%) were marked NOTRY.') % ( |
| gave_up_patches, 100 * gave_up_patches / cq_issue_count, |
| no_try_issues, 100 * no_try_issues / cq_issue_count) |
| |
| analyzer.print_attempts_table() |
| |
| identifier_getter = operator.attrgetter('issue_patch_string') |
| attempts = sorted(analyzer.attempts, key=identifier_getter) |
| extra_user_clicks = 0 |
| eventually_success_durations = [] |
| eventually_success_attempts = [] |
| |
| # pylint: disable=W0612 |
| for identifier, group_itr in itertools.groupby(attempts, identifier_getter): |
| group = list(group_itr) |
| if group[0].issue_id in analyzer.no_try_issues: |
| continue |
| results = map(operator.attrgetter('result'), group) |
| # print '%s(%s): %s %s' % (identifier, len(group), total_time, results) |
| if 'SUCCESS' in results: |
| # 'RESTART' events may not require a user re-click? |
| user_initiated = filter(lambda result: result != 'RESTART', results) |
| extra_user_clicks += len(user_initiated) - 1 |
| durations = map(operator.attrgetter('duration'), group) |
| eventually_success_durations.append( |
| sum(durations, timedelta()).total_seconds()) |
| eventually_success_attempts.append(len(group)) |
| |
| print ('%s CQ starts (%d%%) were due a previous false-negative ' |
| '(CQ landed a patch it previously rejected)') % ( |
| extra_user_clicks, 100 * extra_user_clicks / len(analyzer.attempts)) |
| |
| print 'Patches which eventually land percentiles:' |
| for i in [10, 25, 50, 75, 90, 95, 99]: |
| eventually_success_durations_p = round(numpy.percentile( |
| eventually_success_durations, i)/(60*60), 3) |
| eventually_success_attempts_p = numpy.percentile( |
| eventually_success_attempts, i) |
| print '%d: %.3f hrs, %.3f attempts' % ( |
| i, eventually_success_durations_p, eventually_success_attempts_p) |
| |
| |
| if __name__ == '__main__': |
| sys.exit(main()) |