blob: 9fa3ed880fb7f69388a34ee1a176ef580470b0e3 [file] [log] [blame]
#!/usr/bin/env python
# Copyright 2018 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import argparse
import subprocess
import os
from os import path
from datetime import date, timedelta
from collections import namedtuple, defaultdict, Counter
Commit = namedtuple('Commit', ['hash', 'author', 'commit_date', 'dirs'])
# Takes a git command arguments and runs it returning the output (throwing an
# exception on error).
def _RunGitCommand(options, cmd_args):
repo_path = os.path.join(options.repo_path, '.git')
cmd = ['git', '--git-dir', repo_path] + cmd_args
return subprocess.check_output(cmd)
# return true if this author is a chromium dev and is not a bot. Pretty naive,
# looks for roller in the username.
def _IsValidAuthor(author):
return author.find('@chromium.org') > -1 and author.find('roller') == -1
# Get a list of commits from the repo and return a nested dictionary
# directory -> author -> num_commits
def processAllCommits(options):
date_limit = date.today() - timedelta(days=options.days_ago)
format_string = "%h,%ae,%cI"
cmd_args = [
'log',
'--since', date_limit.isoformat(),
'--name-only',
'--pretty=format:%s'%format_string,
]
# has to be last arg
if options.subdirectory:
cmd_args += ['--', options.subdirectory]
output = _RunGitCommand(options, cmd_args)
current_commit = None
author = None
directory_authors = defaultdict(Counter)
for line in output.splitlines():
if current_commit is None:
commit_hash, author, commit_date = line.split(",")
current_commit = Commit(hash=commit_hash, author=author,
commit_date=commit_date, dirs=set())
else:
if line == '': # all commit details read
if _IsValidAuthor(current_commit.author):
for directory in current_commit.dirs:
if directory == '':
continue
directory_authors[directory][author] += 1
current_commit = None
else:
current_commit.dirs.add(os.path.dirname(line))
return directory_authors
# Return a list of owners for a given directory by reading OWNERS files in its
# ancestors. The parsing of OWNERS files is pretty naive, it does not handle
# file imports.
def _GetOwners(options, repo_subdir):
directory_path = os.path.join(options.repo_path, repo_subdir)
owners_path = os.path.join(directory_path, 'OWNERS')
owners = []
while directory_path != '':
if os.path.isfile(owners_path):
with open(owners_path) as f:
owners.extend([line.strip() for line in f.readlines() if
line.find('@chromium.org') > -1])
directory_path = path.dirname(directory_path)
owners_path = os.path.join(directory_path, 'OWNERS')
return owners
# Return the number of commits for a given directory
def _CountDirectoryCommits(directory_authors, directory):
return sum(directory_authors[directory].values())
# Given a directory merge all its children's commits into its own, then delete
# each child subdirectory's entry if it has too few commits.
def _GroupToParentDirectory(options, directory_authors, parent):
global DIRECTORY_AUTHORS
parent_path = path.join(options.repo_path, parent)
for entry in os.listdir(parent_path):
if path.isdir(os.path.join(parent_path, entry)):
entry_dir = path.join(parent, entry)
directory_authors[parent].update(directory_authors[entry_dir])
commit_count = _CountDirectoryCommits(directory_authors, entry_dir)
if commit_count < options.dir_commit_limit:
directory_authors.pop(entry_dir)
# Merge directories with too few commits into their parent directory. This
# method changes the directory_authors dict in-place.
def mergeDirectories(options, directory_authors):
changed = False
for directory in directory_authors.keys():
if not path.exists(path.join(options.repo_path, directory)):
del directory_authors[directory]
continue
num_commits = _CountDirectoryCommits(directory_authors, directory)
if num_commits == 0:
continue
elif num_commits < options.dir_commit_limit:
parent = os.path.dirname(directory)
_GroupToParentDirectory(options, directory_authors, parent)
changed = True
return changed
# Retrieves a set of authors that should not be suggested for a directory
def _GetIgnoredAuthors(options, repo_subdir):
if options.ignore_authors:
ignored_authors = set(map(str.strip, options.ignore_authors.split(',')))
else:
ignored_authors = set()
ignored_authors.update(_GetOwners(options, repo_subdir))
return ignored_authors
# Prints out a list of suggested new owners for each directory with a high
# enough commit count.
def outputSuggestions(options, directory_authors):
for directory, authors in sorted(directory_authors.iteritems()):
commit_count = _CountDirectoryCommits(directory_authors, directory)
if commit_count < options.dir_commit_limit:
continue
ignored_authors = _GetIgnoredAuthors(options, directory)
suggestions = [(a,c) for a,c in authors.most_common()
if a not in ignored_authors and c >= options.author_cl_limit]
print "%s: %d commits in the last %d days" % \
(directory, commit_count, options.days_ago)
for author, commit_count in suggestions[:options.max_suggestions]:
print author, commit_count
print
# main 2.0
def do(options):
directory_authors = processAllCommits(options)
while mergeDirectories(options, directory_authors):
pass
outputSuggestions(options, directory_authors)
def main():
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('repo_path')
parser.add_argument('--days-ago', help='Number of days of history to search'
' through.', default=365)
parser.add_argument('--subdirectory', help='Limit to this subdirectory')
parser.add_argument('--ignore-authors', help='Ignore this comma separated'
' list of authors')
parser.add_argument('--max-suggestions', help='Maximum number of suggested'
' authors per directory.', default=5)
parser.add_argument('--author-cl-limit', help='Do not suggest authors who'
' have commited less than this to the directory.',
default=10)
parser.add_argument('--dir-commit-limit', help='Merge directories with less'
' than this number of commits into their parent'
' directory.', default=100)
options = parser.parse_args()
do(options)
if __name__ == '__main__':
main()