blob: 5d4b5b8f3b97e443bd1d16e8febb6c0908b087da [file] [log] [blame]
#!/usr/bin/env python3
# Copyright 2025 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import re
import os
from collections import defaultdict
from datetime import datetime
from filters import avoid_commit
# This regex is for file change details that look like the following:
# ios/chrome/browser/tab_switcher/ui_bundled/tab_grid/tab_grid_coordinator.mm | 4 ++--
# or like the following in case that stat=500 is not enough:
# .../browser/ui/tab_switcher/tab_grid/grid/tab_groups/tab_groups_constants.mm | 2 +
FILE_CHANGE_REGEX = r"""
^ \s* # Start of the line with one or more spaces.
(ios|\.{3}) # Root folder ('ios' or '...').
\/ # Path separator.
(\w+\/)* # Zero or more directory segments.
(\w+ # Filename.
(\.\w+)? # Optional extension.
)
\s* \| .* \s* # Pipe separator, e.g., " | 10 ".
[0-9]+ # Number of changed lines.
\s [+-]+ # +/- indicators.
$ # End of the line.
"""
class Commit:
"""A class to represent a single Git commit.
Parses a raw commit description string to extract key information such as
author, reviewers, date, changed files, and the primary modified folder.
"""
def __init__(self,
commit_description: str,
skip_tests: bool = True) -> None:
"""Initializes a Commit object from a raw commit commit_description.
Args:
commit_description: A string containing the full text of a Git
commit, including hash, author, date, and file statistics.
skip_tests: If True, changes in directories named 'test' will be
ignored.
"""
self.author = ''
self.reviewers = []
self.files_stats = defaultdict(int)
self.date = datetime.min
self.modified_path = ''
self.total_change = 0
self.hash = ''
self.skip_tests = skip_tests
if avoid_commit(commit_description):
return
lines = commit_description.split('\n')
self.hash = lines[0]
for line in lines[1:]:
self.analyse_line(line)
if self.total_change == 0:
return
self.determine_modified_folder()
def all_informations(self):
"""Returns all extracted commit information.
Returns:
A tuple containing the author, a list of reviewers, a dictionary of
file stats, the primary modified path, the commit date, and the
commit hash.
"""
return (self.author, self.reviewers, self.files_stats,
self.modified_path, self.date, self.hash)
def extend_paths(self) -> list[dict[str:int]]:
"""Expands file paths to include all parent directories.
Aggregates the line changes from individual files into their parent
directories, providing a view of changes at every level of the
directory tree.
Returns:
A dictionary where keys are directory paths and values are the
sum of line changes within that directory and its subdirectories,
sorted by path depth.
"""
all_paths = defaultdict(int, self.files_stats)
for path in self.files_stats:
dirname = os.path.dirname(path)
while (dirname):
all_paths[dirname] += self.files_stats[path]
dirname = os.path.dirname(dirname)
# Sort the dictionary by path length.
result = dict(
reversed(
sorted(all_paths.items(), key=lambda x: len(x[0].split('/')))))
return result
def determine_modified_folder(self):
"""Identifies the primary folder modified in the commit.
Sets the `modified_path` instance variable to the path that contains
more than 50% of the total line changes for the commit.
"""
extanded_files_stats = self.extend_paths()
for file in extanded_files_stats:
stat = extanded_files_stats[file] * 100 / self.total_change
if stat > 50:
self.modified_path = file
return
def extract_username_from_line(self, line: str) -> str:
"""Extracts a username from a commit metadata line.
Args:
line: A string from the commit description, e.g., "Author: ..."
or "Reviewed-by: ...".
Returns:
The extracted username (the part of the email before the '@').
"""
lineDetail = line.split()
email = lineDetail[-1][1:-1]
username = email.split('@')[0]
return username
def analyse_line(self, line: str) -> None:
"""Parses a single line of a commit description.
Updates the instance variables (author, reviewers, date, file_stats)
based on the content of the line. Skips lines indicating changes to
binary files.
Args:
line: A single line from the commit description text.
"""
if line.startswith('Author:'):
self.author = self.extract_username_from_line(line)
return
if 'Reviewed-by:' in line:
username = self.extract_username_from_line(line)
self.reviewers.append(username)
return
if line.startswith('Date:'):
self.date = datetime.strptime(' '.join(line.split()[1:-1]),
'%a %b %d %H:%M:%S %Y')
return
if re.match(FILE_CHANGE_REGEX, line, re.VERBOSE):
path = line.split()[0]
if self.skip_tests and 'test' in path.split(os.path.sep):
return
change_count = int(line.split()[-2])
self.total_change += change_count
directory = os.path.dirname(path)
self.files_stats[directory] += change_count