blob: 580d102637cbcd9c891edbf5e2c8ad65c4bceff1 [file] [log] [blame]
# Copyright 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import collections
import errno
import logging
import os
import shutil
import signal
import subprocess
import sys
import tempfile
import threading
import time
import urlparse
from infra.libs.git2.commit import Commit
from infra.libs.git2.ref import Ref
from infra.libs.git2.util import CalledProcessError, INVALID
LOGGER = logging.getLogger(__name__)
class Repo(object):
"""Represents a remote git repo.
Manages the (bare) on-disk mirror of the remote repo.
'refs/*config', # e.g. refs/meta/config, refs/infra/config
'refs/*config/main', # e.g. refs/gsubtreed-config/main
def __init__(self, url, whitelist_refs=DEFAULT_REFS):
"""Creates a new Repo for the given git repo `url`.
url (str) - The URL of the repo to manage. Can be any URL scheme that git
supports (including `file://`).
whitelist_refs (iterable[str]) - The refs to include in this mirror. By
default this includes known config refs, heads, branch-heads, notes and
tags. This list excludes gerrit CL metadata which has on the order of a
~million extra refs (as of 2018Q3). Unfortunately there's no simple way
to BLACKLIST refs as of git 2.18.
self.dry_run = False
self.repos_dir = None
self.netrc_file = None
self._url = url
self._repo_path = None
self._commit_cache = collections.OrderedDict()
self._log = LOGGER.getChild('Repo')
self._queued_refs = {}
self._whitelist_refs = sorted(whitelist_refs)
def __hash__(self):
return hash((self._url, self._repo_path))
def __getitem__(self, ref):
"""Get a Ref attached to this Repo."""
return Ref(self, ref)
# Accessors
# pylint: disable=W0212
url = property(lambda self: self._url)
repo_path = property(lambda self: self._repo_path)
def reify(self, share_from=None, timeout=None, partial_clone=False):
"""Ensures the local mirror of this Repo exists.
share_from - Either a Repo, or a path to a git repo (on disk). This will
cause objects/info/alternates to be set up to point to the
other repo for objects.
timeout - How long to wait for fetch operation to finish before killing
it, sec.
partial_clone - if True, only history will be fetched, but no file
objects. See also
Can not be used with share_from.
Default: False.
assert self.repos_dir is not None
assert not partial_clone or share_from is None, (
'partial_clone can\'t be used with share_from')
if not os.path.exists(self.repos_dir):
try:'making repos dir: %s', self.repos_dir)
except OSError as e: # pragma: no cover
if e.errno != errno.EEXIST:
parsed = urlparse.urlparse(self.url)
norm_url = parsed.netloc + parsed.path
if norm_url.endswith('.git'):
norm_url = norm_url[:-len('.git')]
folder = norm_url.replace('-', '--').replace('/', '-').lower()
rpath = os.path.abspath(os.path.join(self.repos_dir, folder))
share_objects = None
if share_from:
if isinstance(share_from, Repo):
assert share_from.repo_path, 'share_from target must be reify()\'d'
share_from = share_from.repo_path
share_objects = os.path.join(share_from, 'objects')
def ensure_config(path):
if self.netrc_file:
home = os.path.expanduser('~')
fake_home = os.path.join(path, 'fake_home')
except OSError as e: # pragma: no cover
if e.errno != errno.EEXIST:
if os.path.exists(os.path.join(home, '.gitconfig')):
os.path.join(home, '.gitconfig'),
os.path.join(fake_home, '.gitconfig'))
shutil.copy(self.netrc_file, os.path.join(fake_home, '.netrc'))
os.chmod(os.path.join(fake_home, '.netrc'), 0600)
if share_objects:
altfile = os.path.join(path, 'objects', 'info', 'alternates')
except OSError as e:
if e.errno != errno.EEXIST:
raise # pragma: no cover
add_entry = not os.path.exists(altfile)
if not add_entry:
with open(altfile, 'r') as f:
add_entry = share_objects in f.readlines()
if add_entry:
with open(altfile, 'a') as f:
print >> f, share_objects
# First config resets all fetch refs
first = True
for pattern in self._whitelist_refs:*[
'--replace-all' if first else '--add',
'remote.origin.fetch', '+%s:%s' % (pattern, pattern)
], cwd=path)
first = False
if not os.path.isdir(rpath):
self._log.debug('initializing %r -> %r', self, rpath)
tmp_path = tempfile.mkdtemp(dir=self.repos_dir)'init', '--bare', os.path.basename(tmp_path), cwd=self.repos_dir)
'remote', 'add', '--mirror=fetch', 'origin', self.url, cwd=tmp_path)
if partial_clone:'config', 'extensions.partialclone', 'origin', cwd=tmp_path)'config', 'core.repositoryformatversion', '1', cwd=tmp_path)'config', 'core.partialclonefilter', 'blob:none', cwd=tmp_path)
'fetch', stdout=sys.stdout, stderr=sys.stderr,
cwd=tmp_path, silent=True, timeout=timeout)
os.rename(tmp_path, os.path.join(self.repos_dir, folder))
self._log.debug('%r already initialized', self)
self._repo_path = rpath
# Representation
def __repr__(self):
return 'Repo({_url!r})'.format(**self.__dict__)
# Methods
def get_commit(self, hsh):
"""Creates a new ``Commit`` object for this ``Repo``.
Uses a very basic LRU cache for commit objects, keeping up to
``MAX_CACHE_SIZE`` before eviction. This cuts down on the number of
redundant git commands by > 50%, and allows expensive cached_property's to
remain for the life of the process.
If the ``Commit`` does not exist in this ``Repo``, return INVALID and do
not cache the result.
if hsh in self._commit_cache:
self._log.debug('Hit %s', hsh)
r = self._commit_cache.pop(hsh)
self._log.debug('Miss %s', hsh)
if len(self._commit_cache) >= self.MAX_CACHE_SIZE:
r = Commit(self, hsh)
if is INVALID:
return INVALID
self._commit_cache[hsh] = r
return r
def refglob(self, *globstrings):
"""Yield every Ref in this repo which matches a ``globstring`` according to
the rules of git-for-each-ref.
Defaults to all refs.
refs ='for-each-ref', '--format=%(refname)', *globstrings)
for ref in refs.splitlines():
yield self[ref]
def run(self, *args, **kwargs):
args (list): argv tokens. 'git' is always argv[0]
Keyword Args:
indata (str): data to feed to communicate()
ok_ret (set): valid return codes. Defaults to {0}.
timeout (number): How long to wait for process to finish before killing
it, sec.
kwargs: passed through to subprocess.Popen()
assert args, args
if args[0] == 'push' and self.dry_run:
self._log.warn('DRY-RUN: Would have pushed %r', args[1:])
silent = kwargs.pop('silent', False)
if args[0] in ('fetch', 'push') and not silent:
log_func =
log_func = self._log.debug
if 'cwd' not in kwargs:
assert self._repo_path is not None
kwargs.setdefault('cwd', self._repo_path)
kwargs.setdefault('stderr', subprocess.PIPE)
kwargs.setdefault('stdout', subprocess.PIPE)
indata = kwargs.pop('indata', None)
if indata:
assert 'stdin' not in kwargs
kwargs['stdin'] = subprocess.PIPE
# Point git to a fake HOME with custom .netrc. An alternative is to use
# credential.helper == 'store --file=...', but git always tries to use
# HOME/.netrc before credential helper. So faking home is necessary anyway.
if self.netrc_file:
fake_home = os.path.abspath(os.path.join(kwargs['cwd'], 'fake_home'))
if os.path.exists(fake_home):
env = kwargs.get('env', os.environ).copy()
env['HOME'] = fake_home
kwargs['env'] = env
# Git spawns subprocesses, we want to be able to kill them all.
assert 'preexec_fn' not in kwargs
if sys.platform != 'win32': # pragma: no cover
kwargs['preexec_fn'] = os.setpgrp
ok_ret = kwargs.pop('ok_ret', {0})
timeout = kwargs.pop('timeout', None)
cmd = ('git',) + args
log_func('Running %r', cmd)
started = time.time()
process = subprocess.Popen(cmd, **kwargs)
def kill_proc():
'Terminating stuck process %d, %d sec timeout exceeded',, timeout)
if sys.platform == 'win32': # pragma: no cover
assert os.getpgid( ==
os.killpg(, signal.SIGTERM)
except OSError as e: # pragma: no cover
if e.errno != errno.ESRCH:
LOGGER.exception('Unexpected exception')
except Exception: # pragma: no cover
LOGGER.exception('Unexpected exception')
killer = threading.Timer(timeout, kill_proc) if timeout else None
if killer:
output, errout = process.communicate(indata)
retcode = process.poll()
if killer:
dt = time.time() - started
if dt > 1: # pragma: no cover
log_func('Finished in %.1f sec', dt)
if retcode not in ok_ret:
raise CalledProcessError(retcode, cmd, output, errout)
if errout:
return output
def intern(self, data, typ='blob'):
'hash-object', '-w', '-t', typ, '--stdin', indata=str(data)).strip()
def fetch(self, timeout=None):
"""Update all local repo state to match remote.
Clears queue_fast_forward entries.
timeout (number): kill the process after this many seconds.
self._queued_refs = {}
LOGGER.debug('fetching %r', self)'fetch', stdout=sys.stdout, stderr=sys.stderr, timeout=timeout)
def fast_forward_push(self, refs_and_commits,
include_err=False, timeout=None):
"""Push commits to refs on the remote, and also update refs' local copies.
Refs names are specified as refs on remote, i.e. push to
``Ref('refs/heads/master')`` would update 'refs/heads/master' on remote
(and on local repo mirror), no ref name translation is happening.
Optionally captures the stderror output of the push command and returns
it as a string.
Returns the stdout of the push command, or the stdout+stderr of the push
command if ``include_err`` is specified.
refs_and_commits: dict {Ref object -> Commit to push to the ref}.
include_err: a boolean indicating to capture and return the push output.
timeout: how long to wait for push to complete before aborting, in sec.
if not refs_and_commits:
refspec = self.make_refspec(refs_and_commits)
kwargs = {'stderr': sys.stdout} if include_err else {}
kwargs['timeout'] = timeout
output ='push', 'origin', *refspec, **kwargs)
for r, c in refs_and_commits.iteritems():
return output
def make_refspec(self, refs_and_commits):
"""Returns the rendered list of refspec entries for a git-push command line.
refs_and_commits: dict {Ref object -> Commit to push to the ref}.
assert all(r.repo is self for r in refs_and_commits)
return [
'%s:%s' % (c.hsh, r.ref)
for r, c in sorted(
refs_and_commits.iteritems(), key=lambda pair: pair[0].ref)
def queue_fast_forward(self, refs_and_commits):
"""Update local refs, and keep track of which refs are updated.
Push all queued refs to remote when push_queued_fast_forwards() is called.
Refs names are specified the same as fast_forward_push()
refs_and_commits: dict {Ref object -> Commit to push to the ref}.
if not refs_and_commits:
assert all(r.repo is self for r in refs_and_commits)
for r, c in refs_and_commits.iteritems():
LOGGER.debug('Queuing %r -> %r', r.ref, c.hsh)
self._queued_refs[r] = c
def push_queued_fast_forwards(self, include_err=False, timeout=None):
"""Push refs->commits enqueued with queue_fast_forward."""
queued = self._queued_refs
self._queued_refs = {}
return self.fast_forward_push(
queued, include_err=include_err, timeout=timeout)
def notes(self, obj, ref=None):
"""Get git-notes content for this object.
See also: ``Commit.notes``.
obj (str|Ref): the hash of some object or a Ref.
ref (str|Ref): a string ref for git-notes or a Ref.
The notes content as a string, or None if there was no note for this object.
obj = obj.ref if isinstance(obj, Ref) else obj
ref = ref.ref if isinstance(ref, Ref) else ref
args = ['notes'] + (['--ref', ref] if ref else []) + ['show', obj]
except CalledProcessError:
return None