| #!/usr/bin/env python |
| # Copyright 2018 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| import argparse |
| import subprocess |
| import os |
| from os import path |
| from datetime import date, timedelta |
| from collections import namedtuple, defaultdict, Counter |
| |
| Commit = namedtuple('Commit', ['hash', 'author', 'commit_date', 'dirs']) |
| |
| # Takes a git command arguments and runs it returning the output (throwing an |
| # exception on error). |
| def _RunGitCommand(options, cmd_args): |
| repo_path = os.path.join(options.repo_path, '.git') |
| cmd = ['git', '--git-dir', repo_path] + cmd_args |
| return subprocess.check_output(cmd) |
| |
| |
| # return true if this author is a chromium dev and is not a bot. Pretty naive, |
| # looks for roller in the username. |
| def _IsValidAuthor(author): |
| return author.find('@chromium.org') > -1 and author.find('roller') == -1 |
| |
| |
| # Get a list of commits from the repo and return a nested dictionary |
| # directory -> author -> num_commits |
| def processAllCommits(options): |
| date_limit = date.today() - timedelta(days=options.days_ago) |
| format_string = "%h,%ae,%cI" |
| cmd_args = [ |
| 'log', |
| '--since', date_limit.isoformat(), |
| '--name-only', |
| '--pretty=format:%s'%format_string, |
| ] |
| |
| # has to be last arg |
| if options.subdirectory: |
| cmd_args += ['--', options.subdirectory] |
| |
| output = _RunGitCommand(options, cmd_args) |
| current_commit = None |
| author = None |
| directory_authors = defaultdict(Counter) |
| for line in output.splitlines(): |
| if current_commit is None: |
| commit_hash, author, commit_date = line.split(",") |
| current_commit = Commit(hash=commit_hash, author=author, |
| commit_date=commit_date, dirs=set()) |
| else: |
| if line == '': # all commit details read |
| if _IsValidAuthor(current_commit.author): |
| for directory in current_commit.dirs: |
| if directory == '': |
| continue |
| directory_authors[directory][author] += 1 |
| current_commit = None |
| else: |
| current_commit.dirs.add(os.path.dirname(line)) |
| return directory_authors |
| |
| |
| # Return a list of owners for a given directory by reading OWNERS files in its |
| # ancestors. The parsing of OWNERS files is pretty naive, it does not handle |
| # file imports. |
| def _GetOwners(options, repo_subdir): |
| directory_path = os.path.join(options.repo_path, repo_subdir) |
| owners_path = os.path.join(directory_path, 'OWNERS') |
| owners = [] |
| while directory_path != '': |
| if os.path.isfile(owners_path): |
| with open(owners_path) as f: |
| owners.extend([line.strip() for line in f.readlines() if |
| line.find('@chromium.org') > -1]) |
| directory_path = path.dirname(directory_path) |
| owners_path = os.path.join(directory_path, 'OWNERS') |
| return owners |
| |
| |
| # Return the number of commits for a given directory |
| def _CountDirectoryCommits(directory_authors, directory): |
| return sum(directory_authors[directory].values()) |
| |
| |
| # Given a directory merge all its children's commits into its own, then delete |
| # each child subdirectory's entry if it has too few commits. |
| def _GroupToParentDirectory(options, directory_authors, parent): |
| global DIRECTORY_AUTHORS |
| parent_path = path.join(options.repo_path, parent) |
| |
| for entry in os.listdir(parent_path): |
| if path.isdir(os.path.join(parent_path, entry)): |
| entry_dir = path.join(parent, entry) |
| directory_authors[parent].update(directory_authors[entry_dir]) |
| commit_count = _CountDirectoryCommits(directory_authors, entry_dir) |
| if commit_count < options.dir_commit_limit: |
| directory_authors.pop(entry_dir) |
| |
| |
| # Merge directories with too few commits into their parent directory. This |
| # method changes the directory_authors dict in-place. |
| def mergeDirectories(options, directory_authors): |
| changed = False |
| for directory in directory_authors.keys(): |
| if not path.exists(path.join(options.repo_path, directory)): |
| del directory_authors[directory] |
| continue |
| num_commits = _CountDirectoryCommits(directory_authors, directory) |
| if num_commits == 0: |
| continue |
| elif num_commits < options.dir_commit_limit: |
| parent = os.path.dirname(directory) |
| _GroupToParentDirectory(options, directory_authors, parent) |
| changed = True |
| return changed |
| |
| |
| # Retrieves a set of authors that should not be suggested for a directory |
| def _GetIgnoredAuthors(options, repo_subdir): |
| if options.ignore_authors: |
| ignored_authors = set(map(str.strip, options.ignore_authors.split(','))) |
| else: |
| ignored_authors = set() |
| ignored_authors.update(_GetOwners(options, repo_subdir)) |
| return ignored_authors |
| |
| |
| # Prints out a list of suggested new owners for each directory with a high |
| # enough commit count. |
| def outputSuggestions(options, directory_authors): |
| for directory, authors in sorted(directory_authors.iteritems()): |
| commit_count = _CountDirectoryCommits(directory_authors, directory) |
| if commit_count < options.dir_commit_limit: |
| continue |
| ignored_authors = _GetIgnoredAuthors(options, directory) |
| suggestions = [(a,c) for a,c in authors.most_common() |
| if a not in ignored_authors and c >= options.author_cl_limit] |
| print "%s: %d commits in the last %d days" % \ |
| (directory, commit_count, options.days_ago) |
| for author, commit_count in suggestions[:options.max_suggestions]: |
| print author, commit_count |
| print |
| |
| |
| # main 2.0 |
| def do(options): |
| directory_authors = processAllCommits(options) |
| while mergeDirectories(options, directory_authors): |
| pass |
| outputSuggestions(options, directory_authors) |
| |
| |
| def main(): |
| parser = argparse.ArgumentParser( |
| formatter_class=argparse.ArgumentDefaultsHelpFormatter) |
| parser.add_argument('repo_path') |
| parser.add_argument('--days-ago', help='Number of days of history to search' |
| ' through.', default=365) |
| parser.add_argument('--subdirectory', help='Limit to this subdirectory') |
| parser.add_argument('--ignore-authors', help='Ignore this comma separated' |
| ' list of authors') |
| parser.add_argument('--max-suggestions', help='Maximum number of suggested' |
| ' authors per directory.', default=5) |
| parser.add_argument('--author-cl-limit', help='Do not suggest authors who' |
| ' have commited less than this to the directory.', |
| default=10) |
| parser.add_argument('--dir-commit-limit', help='Merge directories with less' |
| ' than this number of commits into their parent' |
| ' directory.', default=100) |
| options = parser.parse_args() |
| do(options) |
| |
| |
| if __name__ == '__main__': |
| main() |