blob: 36bb69d153e65eb5b89f0f5b94c0be45815aa92b [file] [log] [blame]
# Copyright (c) 2012 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""SCM-specific utility classes."""
from collections import defaultdict
import os
import platform
import re
from typing import Mapping, List
import gclient_utils
import subprocess2
# TODO: Should fix these warnings.
# pylint: disable=line-too-long
# constants used to identify the tree state of a directory.
VERSIONED_NO = 0
VERSIONED_DIR = 1
VERSIONED_SUBMODULE = 2
def determine_scm(root):
"""Similar to upload.py's version but much simpler.
Returns 'git' or None.
"""
if os.path.isdir(os.path.join(root, '.git')):
return 'git'
try:
subprocess2.check_call(['git', 'rev-parse', '--show-cdup'],
stdout=subprocess2.DEVNULL,
stderr=subprocess2.DEVNULL,
cwd=root)
return 'git'
except (OSError, subprocess2.CalledProcessError):
return None
class GIT(object):
current_version = None
rev_parse_cache = {}
# Maps cwd -> {config key, [config values]}
# This cache speeds up all `git config ...` operations by only running a
# single subcommand, which can greatly accelerate things like
# git-map-branches.
_CONFIG_CACHE: Mapping[str, Mapping[str, List[str]]] = {}
@staticmethod
def _load_config(cwd: str) -> Mapping[str, List[str]]:
"""Loads git config for the given cwd.
The calls to this method are cached in-memory for performance. The
config is only reloaded on cache misses.
Args:
cwd: path to fetch `git config` for.
Returns:
A dict mapping git config keys to a list of its values.
"""
if cwd not in GIT._CONFIG_CACHE:
try:
rawConfig = GIT.Capture(['config', '--list', '-z'],
cwd=cwd,
strip_out=False)
except subprocess2.CalledProcessError:
return {}
cfg = defaultdict(list)
# Splitting by '\x00' gets an additional empty string at the end.
for line in rawConfig.split('\x00')[:-1]:
key, value = map(str.strip, line.split('\n', 1))
cfg[key].append(value)
GIT._CONFIG_CACHE[cwd] = cfg
return GIT._CONFIG_CACHE[cwd]
@staticmethod
def _clear_config(cwd: str) -> None:
GIT._CONFIG_CACHE.pop(cwd, None)
@staticmethod
def ApplyEnvVars(kwargs):
env = kwargs.pop('env', None) or os.environ.copy()
# Don't prompt for passwords; just fail quickly and noisily.
# By default, git will use an interactive terminal prompt when a
# username/ password is needed. That shouldn't happen in the chromium
# workflow, and if it does, then gclient may hide the prompt in the
# midst of a flood of terminal spew. The only indication that something
# has gone wrong will be when gclient hangs unresponsively. Instead, we
# disable the password prompt and simply allow git to fail noisily. The
# error message produced by git will be copied to gclient's output.
env.setdefault('GIT_ASKPASS', 'true')
env.setdefault('SSH_ASKPASS', 'true')
# 'cat' is a magical git string that disables pagers on all platforms.
env.setdefault('GIT_PAGER', 'cat')
return env
@staticmethod
def Capture(args, cwd=None, strip_out=True, **kwargs):
env = GIT.ApplyEnvVars(kwargs)
output = subprocess2.check_output(['git'] + args,
cwd=cwd,
stderr=subprocess2.PIPE,
env=env,
**kwargs)
output = output.decode('utf-8', 'replace')
return output.strip() if strip_out else output
@staticmethod
def CaptureStatus(cwd,
upstream_branch,
end_commit=None,
ignore_submodules=True):
# type: (str, str, Optional[str]) -> Sequence[Tuple[str, str]]
"""Returns git status.
Returns an array of (status, file) tuples."""
if end_commit is None:
end_commit = ''
if upstream_branch is None:
upstream_branch = GIT.GetUpstreamBranch(cwd)
if upstream_branch is None:
raise gclient_utils.Error('Cannot determine upstream branch')
command = [
'-c', 'core.quotePath=false', 'diff', '--name-status',
'--no-renames'
]
if ignore_submodules:
command.append('--ignore-submodules=all')
command.extend(['-r', '%s...%s' % (upstream_branch, end_commit)])
status = GIT.Capture(command, cwd)
results = []
if status:
for statusline in status.splitlines():
# 3-way merges can cause the status can be 'MMM' instead of 'M'.
# This can happen when the user has 2 local branches and he
# diffs between these 2 branches instead diffing to upstream.
m = re.match(r'^(\w)+\t(.+)$', statusline)
if not m:
raise gclient_utils.Error(
'status currently unsupported: %s' % statusline)
# Only grab the first letter.
results.append(('%s ' % m.group(1)[0], m.group(2)))
return results
@staticmethod
def GetConfig(cwd, key, default=None):
values = GIT._load_config(cwd).get(key, None)
if not values:
return default
return values[-1]
@staticmethod
def GetConfigBool(cwd, key):
return GIT.GetConfig(cwd, key) == 'true'
@staticmethod
def GetConfigList(cwd, key):
return GIT._load_config(cwd).get(key, [])
@staticmethod
def YieldConfigRegexp(cwd, pattern):
"""Yields (key, value) pairs for any config keys matching `pattern`."""
p = re.compile(pattern)
for name, values in GIT._load_config(cwd).items():
if p.match(name):
for value in values:
yield name, value
@staticmethod
def GetBranchConfig(cwd, branch, key, default=None):
assert branch, 'A branch must be given'
key = 'branch.%s.%s' % (branch, key)
return GIT.GetConfig(cwd, key, default)
@staticmethod
def SetConfig(cwd,
key,
value=None,
*,
value_pattern=None,
modify_all=False,
scope='local'):
"""Sets or unsets one or more config values.
Args:
cwd: path to fetch `git config` for.
key: The specific config key to affect.
value: The value to set. If this is None, `key` will be unset.
value_pattern: For use with `all=True`, allows further filtering of
the set or unset operation based on the currently configured
value. Ignored for `all=False`.
modify_all: If True, this will change a set operation to
`--replace-all`, and will change an unset operation to
`--unset-all`.
scope: By default this is the local scope, but could be `system`,
`global`, or `worktree`, depending on which config scope you
want to affect.
"""
GIT._clear_config(cwd)
args = ['config', f'--{scope}']
if value == None:
args.extend(['--unset' + ('-all' if modify_all else ''), key])
else:
if modify_all:
args.append('--replace-all')
args.extend([key, value])
if modify_all and value_pattern:
args.append(value_pattern)
GIT.Capture(args, cwd=cwd)
@staticmethod
def SetBranchConfig(cwd, branch, key, value=None):
assert branch, 'A branch must be given'
key = 'branch.%s.%s' % (branch, key)
GIT.SetConfig(cwd, key, value)
@staticmethod
def ShortBranchName(branch):
"""Converts a name like 'refs/heads/foo' to just 'foo'."""
return branch.replace('refs/heads/', '')
@staticmethod
def GetBranchRef(cwd):
"""Returns the full branch reference, e.g. 'refs/heads/main'."""
try:
return GIT.Capture(['symbolic-ref', 'HEAD'], cwd=cwd)
except subprocess2.CalledProcessError:
return None
@staticmethod
def GetRemoteHeadRef(cwd, url, remote):
"""Returns the full default remote branch reference, e.g.
'refs/remotes/origin/main'."""
if os.path.exists(cwd):
try:
# Try using local git copy first
ref = 'refs/remotes/%s/HEAD' % remote
ref = GIT.Capture(['symbolic-ref', ref], cwd=cwd)
if not ref.endswith('master'):
return ref
# Check if there are changes in the default branch for this
# particular repository.
GIT.Capture(['remote', 'set-head', '-a', remote], cwd=cwd)
return GIT.Capture(['symbolic-ref', ref], cwd=cwd)
except subprocess2.CalledProcessError:
pass
try:
# Fetch information from git server
resp = GIT.Capture(['ls-remote', '--symref', url, 'HEAD'])
regex = r'^ref: (.*)\tHEAD$'
for line in resp.split('\n'):
m = re.match(regex, line)
if m:
return ''.join(GIT.RefToRemoteRef(m.group(1), remote))
except subprocess2.CalledProcessError:
pass
# Return default branch
return 'refs/remotes/%s/main' % remote
@staticmethod
def GetBranch(cwd):
"""Returns the short branch name, e.g. 'main'."""
branchref = GIT.GetBranchRef(cwd)
if branchref:
return GIT.ShortBranchName(branchref)
return None
@staticmethod
def GetRemoteBranches(cwd):
return GIT.Capture(['branch', '-r'], cwd=cwd).split()
@staticmethod
def FetchUpstreamTuple(cwd, branch=None):
"""Returns a tuple containing remote and remote ref,
e.g. 'origin', 'refs/heads/main'
"""
try:
branch = branch or GIT.GetBranch(cwd)
except subprocess2.CalledProcessError:
pass
if branch:
upstream_branch = GIT.GetBranchConfig(cwd, branch, 'merge')
if upstream_branch:
remote = GIT.GetBranchConfig(cwd, branch, 'remote', '.')
return remote, upstream_branch
upstream_branch = GIT.GetConfig(cwd, 'rietveld.upstream-branch')
if upstream_branch:
remote = GIT.GetConfig(cwd, 'rietveld.upstream-remote', '.')
return remote, upstream_branch
# Else, try to guess the origin remote.
remote_branches = GIT.GetRemoteBranches(cwd)
if 'origin/main' in remote_branches:
# Fall back on origin/main if it exits.
return 'origin', 'refs/heads/main'
if 'origin/master' in remote_branches:
# Fall back on origin/master if it exits.
return 'origin', 'refs/heads/master'
return None, None
@staticmethod
def RefToRemoteRef(ref, remote):
"""Convert a checkout ref to the equivalent remote ref.
Returns:
A tuple of the remote ref's (common prefix, unique suffix), or None if it
doesn't appear to refer to a remote ref (e.g. it's a commit hash).
"""
# TODO(mmoss): This is just a brute-force mapping based of the expected
# git config. It's a bit better than the even more brute-force
# replace('heads', ...), but could still be smarter (like maybe actually
# using values gleaned from the git config).
m = re.match('^(refs/(remotes/)?)?branch-heads/', ref or '')
if m:
return ('refs/remotes/branch-heads/', ref.replace(m.group(0), ''))
m = re.match('^((refs/)?remotes/)?%s/|(refs/)?heads/' % remote, ref
or '')
if m:
return ('refs/remotes/%s/' % remote, ref.replace(m.group(0), ''))
return None
@staticmethod
def RemoteRefToRef(ref, remote):
assert remote, 'A remote must be given'
if not ref or not ref.startswith('refs/'):
return None
if not ref.startswith('refs/remotes/'):
return ref
if ref.startswith('refs/remotes/branch-heads/'):
return 'refs' + ref[len('refs/remotes'):]
if ref.startswith('refs/remotes/%s/' % remote):
return 'refs/heads' + ref[len('refs/remotes/%s' % remote):]
return None
@staticmethod
def GetUpstreamBranch(cwd):
"""Gets the current branch's upstream branch."""
remote, upstream_branch = GIT.FetchUpstreamTuple(cwd)
if remote != '.' and upstream_branch:
remote_ref = GIT.RefToRemoteRef(upstream_branch, remote)
if remote_ref:
upstream_branch = ''.join(remote_ref)
return upstream_branch
@staticmethod
def IsAncestor(maybe_ancestor, ref, cwd=None):
# type: (string, string, Optional[string]) -> bool
"""Verifies if |maybe_ancestor| is an ancestor of |ref|."""
try:
GIT.Capture(['merge-base', '--is-ancestor', maybe_ancestor, ref],
cwd=cwd)
return True
except subprocess2.CalledProcessError:
return False
@staticmethod
def GetOldContents(cwd, filename, branch=None):
if not branch:
branch = GIT.GetUpstreamBranch(cwd)
if platform.system() == 'Windows':
# git show <sha>:<path> wants a posix path.
filename = filename.replace('\\', '/')
command = ['show', '%s:%s' % (branch, filename)]
try:
return GIT.Capture(command, cwd=cwd, strip_out=False)
except subprocess2.CalledProcessError:
return ''
@staticmethod
def GenerateDiff(cwd,
branch=None,
branch_head='HEAD',
full_move=False,
files=None):
"""Diffs against the upstream branch or optionally another branch.
full_move means that move or copy operations should completely recreate the
files, usually in the prospect to apply the patch for a try job."""
if not branch:
branch = GIT.GetUpstreamBranch(cwd)
command = [
'-c', 'core.quotePath=false', 'diff', '-p', '--no-color',
'--no-prefix', '--no-ext-diff', branch + "..." + branch_head
]
if full_move:
command.append('--no-renames')
else:
command.append('-C')
# TODO(maruel): --binary support.
if files:
command.append('--')
command.extend(files)
diff = GIT.Capture(command, cwd=cwd, strip_out=False).splitlines(True)
for i in range(len(diff)):
# In the case of added files, replace /dev/null with the path to the
# file being added.
if diff[i].startswith('--- /dev/null'):
diff[i] = '--- %s' % diff[i + 1][4:]
return ''.join(diff)
@staticmethod
def GetAllFiles(cwd):
"""Returns the list of all files under revision control."""
command = ['-c', 'core.quotePath=false', 'ls-files', '--', '.']
return GIT.Capture(command, cwd=cwd).splitlines(False)
@staticmethod
def GetSubmoduleCommits(cwd, submodules):
# type: (string, List[string]) => Mapping[string][string]
"""Returns a mapping of staged or committed new commits for submodules."""
if not submodules:
return {}
result = subprocess2.check_output(['git', 'ls-files', '-s', '--'] +
submodules,
cwd=cwd).decode('utf-8')
commit_hashes = {}
for r in result.splitlines():
# ['<mode>', '<commit_hash>', '<stage_number>', '<path>'].
record = r.strip().split(maxsplit=3) # path can contain spaces.
assert record[0] == '160000', 'file is not a gitlink: %s' % record
commit_hashes[record[3]] = record[1]
return commit_hashes
@staticmethod
def GetCheckoutRoot(cwd):
"""Returns the top level directory of a git checkout as an absolute path.
"""
root = GIT.Capture(['rev-parse', '--show-cdup'], cwd=cwd)
return os.path.abspath(os.path.join(cwd, root))
@staticmethod
def IsInsideWorkTree(cwd):
try:
return GIT.Capture(['rev-parse', '--is-inside-work-tree'], cwd=cwd)
except (OSError, subprocess2.CalledProcessError):
return False
@staticmethod
def IsVersioned(cwd, relative_dir):
# type: (str, str) -> int
"""Checks whether the given |relative_dir| is part of cwd's repo."""
output = GIT.Capture(['ls-tree', 'HEAD', '--', relative_dir], cwd=cwd)
if not output:
return VERSIONED_NO
if output.startswith('160000'):
return VERSIONED_SUBMODULE
return VERSIONED_DIR
@staticmethod
def ListSubmodules(repo_root):
# type: (str) -> Collection[str]
"""Returns the list of submodule paths for the given repo.
Path separators will be adjusted for the current OS.
"""
if not os.path.exists(os.path.join(repo_root, '.gitmodules')):
return []
config_output = GIT.Capture(
['config', '--file', '.gitmodules', '--get-regexp', 'path'],
cwd=repo_root)
return [
line.split()[-1].replace('/', os.path.sep)
for line in config_output.splitlines()
]
@staticmethod
def CleanupDir(cwd, relative_dir):
"""Cleans up untracked file inside |relative_dir|."""
return bool(GIT.Capture(['clean', '-df', relative_dir], cwd=cwd))
@staticmethod
def ResolveCommit(cwd, rev):
cache_key = None
# We do this instead of rev-parse --verify rev^{commit}, since on
# Windows git can be either an executable or batch script, each of which
# requires escaping the caret (^) a different way.
if gclient_utils.IsFullGitSha(rev):
# Only cache full SHAs
cache_key = hash(cwd + rev)
if val := GIT.rev_parse_cache.get(cache_key):
return val
# git-rev parse --verify FULL_GIT_SHA always succeeds, even if we
# don't have FULL_GIT_SHA locally. Removing the last character
# forces git to check if FULL_GIT_SHA refers to an object in the
# local database.
rev = rev[:-1]
res = GIT.Capture(['rev-parse', '--quiet', '--verify', rev], cwd=cwd)
if cache_key:
# We don't expect concurrent execution, so we don't lock anything.
GIT.rev_parse_cache[cache_key] = res
return res
@staticmethod
def IsValidRevision(cwd, rev, sha_only=False):
"""Verifies the revision is a proper git revision.
sha_only: Fail unless rev is a sha hash.
"""
try:
sha = GIT.ResolveCommit(cwd, rev)
except subprocess2.CalledProcessError:
return None
if sha_only:
return sha == rev.lower()
return True