| # Copyright 2017 The LUCI Authors. All rights reserved. |
| # Use of this source code is governed under the Apache License, Version 2.0 |
| # that can be found in the LICENSE file. |
| |
| import errno |
| import logging |
| import os |
| import re |
| import sys |
| |
| from collections import namedtuple |
| |
| from gevent import subprocess |
| |
| from . import gitattr_checker |
| from . import simple_cfg |
| from .exceptions import GitFetchError, UnresolvedRefspec |
| |
| |
| LOGGER = logging.getLogger(__name__) |
| |
| |
| # revision (str): the revision of this commit (i.e. hash) |
| # author_email (str|None): the email of the author of this commit |
| # commit_timestamp (int): the unix commit timestamp for this commit |
| # message_lines (tuple(str)): the message of this commit |
| # spec (SimpleRecipesCfg): the parsed infra/config/recipes.cfg file or None. |
| # roll_candidate (bool): if this commit contains changes which are known to |
| # affect the behavior of the recipes (i.e. modifications within recipe_path |
| # and/or modifications to recipes.cfg) |
| CommitMetadata = namedtuple( |
| '_CommitMetadata', |
| 'revision author_email commit_timestamp message_lines spec roll_candidate') |
| |
| |
| class Backend: |
| def __init__(self, checkout_dir, repo_url): |
| """ |
| Args: |
| checkout_dir (str): native absolute path to local directory that this |
| Backend will manage. |
| repo_url (str|None): url to remote repository that this Backend will |
| connect to. If None, then the repo will be assumed to exist at |
| checkout_dir and all write operations will be disabled. |
| """ |
| self.checkout_dir = checkout_dir |
| self.repo_url = repo_url |
| |
| ### shared public implementations, do not override |
| |
| # This is a simple mapping of |
| # repo_url -> git_revision -> commit_metadata() |
| # It only holds cache entries for git commits (e.g. sha1 hashes) |
| _GIT_METADATA_CACHE = {} |
| |
| # This matches git commit hashes. |
| _COMMIT_RE = re.compile(r'^[a-fA-F0-9]{40}$') |
| |
| def commit_metadata(self, refspec): |
| """Cached version of _commit_metadata_impl. |
| |
| The refspec will be resolved if it's not absolute. |
| |
| Returns (CommitMetadata). |
| """ |
| revision = self.resolve_refspec(refspec) |
| key = self.repo_url |
| if key is None: |
| key = self.checkout_dir |
| cache = self._GIT_METADATA_CACHE.setdefault(key, {}) |
| if revision not in cache: |
| cache[revision] = self._commit_metadata_impl(revision) |
| return cache[revision] |
| |
| @classmethod |
| def is_resolved_revision(cls, revision): |
| return cls._COMMIT_RE.match(revision) |
| |
| @classmethod |
| def assert_resolved(cls, revision): |
| if not cls.is_resolved_revision(revision): |
| raise UnresolvedRefspec('unresolved refspec %r' % revision) |
| |
| def resolve_refspec(self, refspec): |
| if self.is_resolved_revision(refspec): |
| return refspec |
| return self._resolve_refspec_impl(refspec) |
| |
| def updates(self, refspec, revision): |
| """Returns a list of revisions contained in |refspec| starting after |
| |revision|. |
| |
| Returns list(CommitMetadata) - The commit metadata in the range |
| (revision,refspec]. |
| """ |
| assert not self.is_resolved_revision(refspec) |
| self.assert_resolved(revision) |
| return self._updates_impl(refspec, revision) |
| |
| ### direct overrides. These are public methods which must be overridden. |
| |
| def fetch(self, refspec): |
| """Does a fetch for the provided refspec (e.g. get all data from remote), if |
| this backend supports it. Otherwise does nothing. |
| |
| Args: |
| refspec (str) - a git refspec which is resolvable on the |
| remote git repo, e.g. 'refs/heads/main', 'deadbeef...face', etc. |
| """ |
| raise NotImplementedError() |
| |
| def checkout(self, refspec, revision=None): |
| """Checks out given |repo| at |refspec| to |checkout_dir|. |
| |
| Args: |
| refspec (str) - a git refspec which is resolvable on the |
| remote git repo, e.g. 'refs/heads/main', etc. This will be used to |
| fetch if the local git repo doesn't have `revision`. |
| revision (str|None) - The git revision to checkout. If None, fetch will |
| do an ls-remote operation to resolve `refspec` to its current revision. |
| """ |
| # TODO(iannucci): Alter the contract for this method so that it only checks |
| # out the files referred to according to the rules that the bundle |
| # subcommand uses. |
| raise NotImplementedError() |
| |
| def cat_file(self, revision, file_path): |
| """Returns the bytes of the given |file_path| in |revision|. |
| |
| Args: |
| revision (str) - The revision to cat the file from. |
| file_path (str) - The git path for the file (from the root of the repo). |
| |
| Returns the file contents as a str. |
| """ |
| raise NotImplementedError() |
| |
| def ls_files(self, *args): |
| """Returns the stdout from `git ls-files *args` in this repo. |
| |
| Args: |
| args (List[str]) - Additional arguments to pass to ls_files. |
| |
| Returns the stdout of the command. |
| """ |
| raise NotImplementedError() |
| |
| |
| ### private overrides. Override these in the implementations, but don't call |
| ### externally. |
| |
| def _updates_impl(self, revision, other_revision): |
| """Returns a list of revisions |revision| through |other_revision|. This |
| includes |revision| and |other_revision|. |
| |
| Args: |
| revision (str) - the first git commit |
| other_revision (str) - the second git commit |
| |
| Returns list(CommitMetadata) - The commit metadata in the range |
| [revision,other_revision]. |
| """ |
| raise NotImplementedError() |
| |
| def _resolve_refspec_impl(self, refspec): |
| """Resolves the refspec to it's current REMOTE value. |
| |
| This must resolve to the remote value even when using a local clone (i.e. |
| GitBackend). |
| |
| Args: |
| refspec (str) - a git refspec which is resolvable on the |
| remote git repo, e.g. 'refs/heads/main', 'deadbeef...face', etc. |
| |
| Returns (str) - The git commit for the given refspec. |
| """ |
| raise NotImplementedError() |
| |
| def _commit_metadata_impl(self, revision): |
| """Returns CommitMetadata for commit |revision|.""" |
| raise NotImplementedError() |
| |
| |
| class GitBackend(Backend): |
| """GitBackend uses a local git checkout.""" |
| |
| if sys.platform.startswith(('win', 'cygwin')): |
| GIT_BINARY = 'git.bat' |
| else: |
| GIT_BINARY = 'git' |
| |
| def __init__(self, *args, **kwargs): |
| super(GitBackend, self).__init__(*args, **kwargs) |
| self._did_ensure = False |
| self._resolved_refspecs = {} |
| self._gitattr_checker = gitattr_checker.AttrChecker(self.checkout_dir) |
| |
| def _git(self, *args): |
| """Runs a git command. |
| |
| Will automatically set low speed limit/time, and cd into the checkout_dir. |
| |
| Args: |
| *args (str) - The list of command arguments to pass to git. |
| |
| Raises GitFetchError on failure. |
| """ |
| if self.GIT_BINARY.endswith('.bat'): |
| # On the esteemed Windows Operating System, '^' is an escape character. |
| # Since .bat files are running cmd.exe under the hood, they interpret this |
| # escape character. We need to ultimately get a single ^, so we need two |
| # ^'s for when we invoke the .bat, and each of those needs to be escaped |
| # when the bat ultimately invokes the git.exe binary. This leaves us with |
| # a total of 4x the ^'s that we originally wanted. Hooray. |
| args = [a.replace('^', '^^^^') for a in args] |
| |
| cmd = [ |
| self.GIT_BINARY, |
| '-c', 'advice.detachedHead=false', # to avoid spamming logs |
| '-C', self.checkout_dir, |
| ] + list(args) |
| |
| try: |
| return self._execute(*cmd) |
| except subprocess.CalledProcessError as e: |
| raise GitFetchError('%r failed: %s: %s' % (cmd, e, e.output)) |
| |
| def _execute(self, *args): |
| """Runs a raw command. Separate so it's easily mockable.""" |
| LOGGER.info('Running: %s', args) |
| |
| process = subprocess.Popen( |
| args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) |
| output, stderr = process.communicate() |
| retcode = process.poll() |
| if retcode: |
| if output and stderr: |
| new_output = 'STDOUT\n%s\nSTDERR\n%s' % (output, stderr) |
| else: |
| new_output = output or stderr |
| raise subprocess.CalledProcessError( |
| retcode, args, new_output) |
| return output.decode('utf-8') |
| |
| def _ensure_local_repo_exists(self): |
| """Ensures that self.checkout_dir is a valid git repository. |
| |
| Safe to call multiple times. If this is successful, the GitBackend will not |
| try to re-initialize the checkout_dir again. |
| |
| Raises GitFetchError if it detected that checkout_dir is likely not a valid |
| git repo. |
| """ |
| if self._did_ensure: |
| return |
| if not os.path.isdir(os.path.join(self.checkout_dir, '.git')): |
| try: |
| # Note that it's safe to re-init an existing git repo. This should allow |
| # us to switch between GitBackend and other Backends. |
| self._execute(self.GIT_BINARY, 'init', self.checkout_dir) |
| self._did_ensure = True |
| except subprocess.CalledProcessError as e: |
| raise GitFetchError(False, 'Git "init" failed: %s' % e) |
| |
| def _has_rev(self, revision): |
| """Returns True iff the on-disk repo has the given revision.""" |
| self.assert_resolved(revision) |
| try: |
| # Use commit_metadata since it's cached and we're likely to call it |
| # shortly after _has_rev anyway. |
| self.commit_metadata(revision) |
| return True |
| except GitFetchError: |
| return False |
| |
| |
| ### Backend implementations |
| |
| def fetch(self, refspec): |
| if self.repo_url is None: |
| raise ValueError('cannot call GitBackend.fetch without a `repo_url`') |
| self._ensure_local_repo_exists() |
| |
| args = ['fetch', self.repo_url] |
| if not self.is_resolved_revision(refspec): |
| args.append(refspec) |
| |
| LOGGER.info('fetching %s', self.repo_url) |
| self._git(*args) |
| |
| def checkout(self, refspec, revision=None): |
| if not revision: |
| revision = self.resolve_refspec(refspec) |
| |
| LOGGER.info('Checking out %r in %s (%s)', |
| revision, self.checkout_dir, self.repo_url) |
| self._ensure_local_repo_exists() |
| |
| if not self._has_rev(revision): |
| self.fetch(refspec) |
| |
| # reset touches index.lock which is problematic when multiple processes are |
| # accessing the recipes at the same time. To allieviate this, we do a quick |
| # diff, which will exit if `revision` is not already checked out. |
| try: |
| self._git('diff', '--quiet', revision) |
| except GitFetchError: |
| # At this point there are two possibilities: |
| # 1) We have multiple processes attempting to manipulate this checkout. |
| # 1a) They're trying to check out the same version (ok) |
| # 2b) They're trying to check out different versions (undefined) |
| # 2) This checkout had a killed manipulation attempt and index.lock is |
| # stale (we're the only process manipulating it now). |
| # |
| # In 1a and 2, removing the lockfile is correct. In 1b it means that |
| # recipes.cfg is mutating while we're doing the fetch, so this is |
| # definitely undefined-behavior territory. |
| # |
| # To unblock 1a and 2, removing the lockfile should be safe, though it may |
| # result in other errors on Windows. |
| # |
| # In any event, it shouldn't make the situation worse. |
| index_lock = os.path.join(self.checkout_dir, '.git', 'index.lock') |
| try: |
| os.remove(index_lock) |
| except OSError as exc: |
| if exc.errno != errno.ENOENT: |
| LOGGER.warn('failed to remove %r, reset will fail: %s', index_lock, |
| exc) |
| self._git('reset', '-q', '--hard', revision) |
| |
| def cat_file(self, revision, file_path): |
| self.assert_resolved(revision) |
| return self._git('cat-file', 'blob', '%s:%s' % (revision, file_path)) |
| |
| def ls_files(self, *args): |
| return self._git('ls-files', *args) |
| |
| def _updates_impl(self, refspec, revision): |
| other_revision = self._resolve_refspec_impl(refspec) |
| if not self._has_rev(other_revision): |
| self.fetch(refspec) |
| args = [ |
| 'rev-list', |
| '--reverse', |
| '--topo-order', |
| '%s..%s' % (revision, other_revision), |
| ] |
| return [ |
| self.commit_metadata(rev) |
| for rev in self._git(*args).strip().split('\n') |
| if bool(rev) |
| ] |
| |
| def _resolve_refspec_impl(self, refspec): |
| self._ensure_local_repo_exists() |
| rslt = self._resolved_refspecs.get(refspec) |
| if rslt is None: |
| # Can return e.g. |
| # |
| # b4a1b1365895c5962fb3654aff61290be2a492ed HEAD |
| # 39bbb4e3749b0a9ebc6cb36d8b679b147e4ed270 refs/remotes/origin/HEAD |
| # |
| # So we need the 'splitlines' bit too. |
| source = self.repo_url if self.repo_url is not None else '.' |
| mapping = { |
| ref: csum for csum, ref in (l.split() for l in self._git( |
| 'ls-remote', source, refspec).splitlines()) |
| } |
| rslt = mapping[refspec] |
| assert self.is_resolved_revision(rslt), repr(rslt) |
| # Cache the refspec so that the candidate algorithm isn't repeatedly doing |
| # network traffic for the same ref. If there's no repo URL, this is either |
| # a local override and doesn't require network traffic or it's a fake |
| # backend for tests, which would intentionally be mutating the remote |
| # state. |
| if self.repo_url is not None: |
| self._resolved_refspecs[refspec] = rslt |
| return rslt |
| |
| def _commit_metadata_impl(self, revision): |
| self.assert_resolved(revision) |
| |
| # show |
| # %`author Email` |
| # %`newline` |
| # %`commit time` |
| # %`newline` |
| # %`Body` |
| meta = self._git( |
| 'show', '-s', '--format=%aE%n%ct%n%B', revision).rstrip('\n').splitlines() |
| |
| try: |
| spec = simple_cfg.SimpleRecipesCfg.from_json_string( |
| self.cat_file(revision, simple_cfg.RECIPES_CFG_LOCATION_REL)) |
| except GitFetchError: |
| spec = None |
| except ValueError: # commit with unparsable recipes.cfg |
| spec = None |
| |
| # check diff to see if it touches anything interesting. |
| changed_files = set(self._git( |
| 'diff-tree', '-r', '--no-commit-id', '--name-only', '%s^!' % revision) |
| .splitlines()) |
| |
| recipes_path = spec.recipes_path if spec else '' |
| |
| has_interesting_changes = ( |
| simple_cfg.RECIPES_CFG_LOCATION_REL in changed_files or |
| any(f.startswith(recipes_path) for f in changed_files) or |
| any(f.split('/')[-1] == '.gitattributes' for f in changed_files) or |
| self._gitattr_checker.check_files(revision, changed_files)) |
| |
| return CommitMetadata(revision, meta[0], |
| int(meta[1]), tuple(meta[2:]), |
| spec, has_interesting_changes) |