| # Copyright 2017 The LUCI Authors. All rights reserved. |
| # Use of this source code is governed under the Apache License, Version 2.0 |
| # that can be found in the LICENSE file. |
| |
| import collections |
| import collections.abc |
| import functools |
| import logging |
| import os |
| import sys |
| import time |
| |
| from ..fetch import GitBackend |
| |
| from .commit_list import CommitList |
| from .roll_candidate import RollCandidate |
| |
| LOGGER = logging.getLogger(__name__) |
| |
| |
| class _Config(collections.abc.Mapping): |
| """An immutable mapping type storing the revisions to pin repos to. |
| |
| Instances are hashable and in contrast to FrozenDict, the equality |
| comparison ignores iteration order. |
| |
| Instances do not necessarily represent a complete config. |
| """ |
| |
| def __init__(self, revisions_by_repo): |
| self._revisions_by_repo = dict(revisions_by_repo) |
| # Calculate the hash immediately so that we know all the items are |
| # hashable too. |
| self._hash = hash(tuple(sorted(self._revisions_by_repo.items()))) |
| |
| def __hash__(self): |
| return self._hash |
| |
| def __getitem__(self, key): |
| return self._revisions_by_repo[key] |
| |
| def __iter__(self): |
| return iter(self._revisions_by_repo) |
| |
| def __len__(self): |
| return len(self._revisions_by_repo) |
| |
| def __str__(self): |
| return '{}({})'.format(type(self).__name__, self._revisions_by_repo) |
| |
| |
| def memoize(f): |
| """Decorator that can be applied to a method to memoize the results. |
| |
| Args: |
| f - The function to decorate with memoization. The first argument of |
| the function should be self - the instance of the class. The |
| function can take an arbitrary amount of additional positional |
| arguments. All arguments must be hashable. |
| |
| Returns: |
| A function wrapping `f`. `f` will be executed only once for a given |
| set of input arguments. |
| """ |
| |
| cache = {} |
| |
| @functools.wraps(f) |
| def cached(self, *args): |
| if args in cache: |
| return cache[args] |
| ret = f(self, *args) |
| cache[args] = ret |
| return ret |
| |
| return cached |
| |
| |
| class _ConfigFinder: |
| |
| def __init__(self, commit_lists_by_repo, new_repo_commit_list_getter): |
| self._commit_lists_by_repo = commit_lists_by_repo |
| self._new_repo_commit_list_getter = new_repo_commit_list_getter |
| |
| @memoize |
| def find_configs(self, repo, revision, repos_to_pin): |
| """Find configs for candidate commits. |
| |
| Args: |
| repo (str) - The repo to perform the starting pin on. |
| revision (str) - The commit hash value to use for the starting pin. |
| repos_to_pin (set(str)) - The repos that need to be pinned by the |
| resulting configs. Additional repos may be pinned if a commit |
| adds new dependencies. |
| |
| Returns: |
| A set of configs where each config is a mapping from the repo name |
| to the revision to be used for the repo. The config will contain |
| entries for all of the provided top level repos and all of their |
| dependencies, which may contain new repos. The configs are |
| guaranteed to be consistent but may roll some repos backwards. |
| """ |
| commit = self._commit_lists_by_repo[repo].lookup(revision) |
| config = {} |
| repos_to_pin = set(repos_to_pin) |
| configs = set() |
| if self._pin(config, repo, commit, repos_to_pin): |
| configs.update( |
| self._find_configs_impl(_Config(config), frozenset(repos_to_pin))) |
| else: |
| LOGGER.info('No consistent configs could be created by pinning %s to %s', |
| repo, revision) |
| return configs |
| |
| # This is cached so that if there are multiple top level repos, when a config |
| # is chosen that only moves one of them, the configs that were computed for |
| # moving the other pin don't need to be recomputed (assuming no repo becomes a |
| # top level repo due to a dependency edge being removed). This requires that |
| # the arguments are hashable (_Config and frozenset). This also requires that |
| # the implementation not use cursors into the commit lists as that would |
| # invalidate results when the cursors are advanced. |
| @memoize |
| def _find_configs_impl(self, config, repos_to_pin): |
| if not repos_to_pin: |
| return [config] |
| |
| repo = next(iter(repos_to_pin)) |
| |
| configs = set() |
| for commit in self._commit_lists_by_repo[repo].compatible_commits(config): |
| pinned = dict(config) |
| to_pin = set(repos_to_pin) |
| if self._pin(pinned, repo, commit, to_pin): |
| configs.update( |
| self._find_configs_impl(_Config(pinned), frozenset(to_pin))) |
| |
| return configs |
| |
| def _pin(self, config, repo, commit, repos_to_pin): |
| config[repo] = commit.revision |
| new_pins_by_repo = {} |
| for dep_repo, dep in commit.spec.deps.items(): |
| if dep_repo not in repos_to_pin and dep_repo not in config: |
| new_pins_by_repo[dep_repo] = dep |
| config[dep_repo] = dep.revision |
| repos_to_pin.difference_update(config) |
| |
| for dep_repo, dep in new_pins_by_repo.items(): |
| clist = self._new_repo_commit_list_getter(dep_repo, dep) |
| if not clist.is_compatible(dep.revision, config): |
| return False |
| dep_commit = clist.lookup(dep.revision) |
| if not self._pin(config, dep_repo, dep_commit, repos_to_pin): |
| return False |
| |
| return True |
| |
| |
| def _score(commit_lists_by_repo, config, current_config, top_level_repos): |
| backwards_rolls = 0 |
| new_deps = 0 |
| movement = 0 |
| timestamp = 0 |
| |
| for repo, revision in config.items(): |
| clist = commit_lists_by_repo[repo] |
| if repo not in current_config: |
| new_deps += 1 |
| movement += clist.dist(revision) |
| else: |
| dist = clist.dist(current_config[repo], revision) |
| # If it's moving backwards, it doesn't matter how far, just increment the |
| # count of backwards rolls, which are compared before the movement |
| if dist is None: |
| backwards_rolls += 1 |
| else: |
| movement += dist |
| |
| timestamp = max(commit_lists_by_repo[repo].lookup(revision).commit_timestamp |
| for repo, revision in config.items() |
| if repo in top_level_repos) |
| |
| return backwards_rolls, new_deps, movement, timestamp |
| |
| |
| class _CandidateCallback: |
| |
| def __init__(self): |
| self._accepted = False |
| |
| @property |
| def accepted(self): |
| return self._accepted |
| |
| def accept(self): |
| self._accepted = True |
| |
| |
| def _get_roll_candidates_impl(recipe_deps, commit_lists_by_repo): |
| """Generator for configs to try rolling. |
| |
| All yielded configs will be consistent; configs are produced by |
| creating partial configs based on varying a single top-level pin and |
| successively updating the config with the commits for each other |
| top-level pin that are consistent with the partial config. |
| |
| If the incoming config is consistent, all yielded configs will involve |
| at least one "interesting" commit being rolled, where "interesting" |
| means "the commit modifies one or more recipe related files", and is |
| defined by CommitMetadata.roll_candidate. If the incoming config is |
| not consistent, then some configs may be initially yielded that do not |
| roll any "interesting" commits. |
| |
| Configs are yielded in increasing order of the commit movement across |
| all repos, with new deps and backwards rolls being considered larger |
| movement than advancing an existing pin any amount. |
| |
| As a tiebreaker, the commit with the lowest commit timestamp will be |
| picked. So if two repos cause the same amount of global commit |
| movement, the "older" of the two commits will roll first. Clearly this |
| is best-effort as there's no meaningful enforcement of timestamps |
| between different repos, but it's usually close enough to reality to |
| be helpful. This is done to reflect a realistic commit ordering; it |
| doesn't make sense for two independent dependencies to move such that |
| a very large time gap develops between them (assuming the timestamps |
| are sensible). All that said, the precise timestamp value is not |
| necessary for the correctness of the algorithm, since it's only |
| involvement is a tiebreaker between otherwise valid choices. |
| |
| Args: |
| recipe_deps (RecipeDeps) - The recipe deps object. |
| commit_lists_by_repo (dict(str, CommitList)) - Mapping from repo |
| name to the commits for the repo. This will be updated as new |
| repos are added. |
| |
| Returns: |
| A generator that yields pairs of config objects. The config objects |
| are mappings from repo name to the revision for the repo. The |
| configs will include all repos that should be set in recipes.cfg, |
| not just those that have changed. The first element is the currently |
| accepted config and the second is the candidate config. The caller |
| should send a boolean value to the generator indicating whether the |
| config was accpted or not, which will affect how subsequent configs |
| are generated. Any value sent before the first yield will be |
| ignored. |
| """ |
| |
| # Cache of backends for new repos, the backend caches resolved refspecs, so |
| # this will prevent repeated network traffic for the same repo |
| new_backends_by_repo = {} |
| |
| # Local function so that it can use the value of current_config |
| def get_new_repo_commit_list(repo, dep): |
| # Once a repo is incorporated into the current config, we will no |
| # longer re-fetch it |
| assert repo not in current_config, '{} is in current config: {}'.format( |
| repo, current_config) |
| if repo in commit_lists_by_repo: |
| clist = commit_lists_by_repo[repo] |
| try: |
| clist.lookup(dep.revision) |
| except: |
| pass |
| else: |
| return clist |
| |
| backend = new_backends_by_repo.get(repo) |
| if backend is None: |
| # We check it out in the `.recipe_deps` folder. This is a slight |
| # abstraction leak, but adding this to RecipeDeps just for autoroller |
| # seemed like a worse alternative. |
| dep_path = os.path.join(recipe_deps.recipe_deps_path, repo) |
| backend = GitBackend(dep_path, dep.url) |
| backend.checkout(dep.branch, dep.revision) |
| |
| clist = CommitList.from_backend(dep, backend) |
| commit_lists_by_repo[repo] = clist |
| return clist |
| |
| config_finder = _ConfigFinder(commit_lists_by_repo, get_new_repo_commit_list) |
| |
| # Tracks the commits we make candidates from |
| cursors_by_repo = { |
| repo: clist.cursor() for repo, clist in commit_lists_by_repo.items() |
| } |
| |
| # Tracks the currently accepted config for the purposes of comparison/scoring, |
| # determining which repos are top level (and therefore do not have their |
| # revisions explicitly set by other repos) and determining which repos must be |
| # pinned by new configs |
| current_config = _Config({ |
| repo: cursor.current.revision |
| for repo, cursor in cursors_by_repo.items() |
| }) |
| top_level_repos = None |
| yielded_configs = set([current_config]) |
| |
| while True: |
| if top_level_repos is None: |
| top_level_repos = set(current_config) |
| for repo, revision in current_config.items(): |
| commit = commit_lists_by_repo[repo].lookup(revision) |
| top_level_repos.difference_update(commit.spec.deps) |
| top_level_repos = frozenset(top_level_repos) |
| |
| candidate_configs = set() |
| for repo, cursor in cursors_by_repo.items(): |
| if repo not in top_level_repos: |
| continue |
| commit = cursor.current |
| candidate_configs.update( |
| config_finder.find_configs(repo, commit.revision, current_config)) |
| candidate_configs.difference_update(yielded_configs) |
| |
| # Because all yielded configs have been removed, at this point, one of the |
| # following conditions is true for each of the candidate configs: |
| # 1. The config has never been produced by the config finder before; the |
| # score must be computed for the first time |
| # 2. The config has been produced by the config finder in a previous round |
| # but a lower-scoring config in the round was accepted; the score must be |
| # re-computed because the current config has changed |
| # So there's no opportunity for caching the scores. |
| key_fn = (lambda c: _score(commit_lists_by_repo, c, current_config, |
| top_level_repos)) |
| candidate_configs = sorted(candidate_configs, key=key_fn) |
| |
| for candidate_config in candidate_configs: |
| yielded_configs.add(candidate_config) |
| accepted = yield current_config, candidate_config |
| if accepted: |
| new_revisions = candidate_config |
| current_config = candidate_config |
| # Setting top_level_repos to None will cause it to be recomputed at the |
| # start of the next round in case a change removed a dependency edge |
| # creating a new top level repo |
| top_level_repos = None |
| break |
| else: |
| new_revisions = {} |
| # Only advance the top level repos, the revisions of the top level repos |
| # determine the revisions of other repos. The cursors for the other repos |
| # will be updated when configs are accepted. |
| for repo in top_level_repos: |
| next_candidate = cursors_by_repo[repo].next_roll_candidate |
| if next_candidate: |
| new_revisions[repo] = next_candidate.revision |
| |
| # There are no more candidates |
| if not new_revisions: |
| return |
| |
| for repo, revision in new_revisions.items(): |
| cursor = cursors_by_repo.get(repo) |
| if cursor is None: |
| cursor = commit_lists_by_repo[repo].cursor() |
| cursors_by_repo[repo] = cursor |
| cursor.advance_to(revision) |
| |
| |
| def _report_commit_counts(commit_lists_by_repo): |
| commits_to_consider = { |
| r: len(commits) - 1 |
| for r, commits in commit_lists_by_repo.items() |
| if len(commits) > 1 |
| } |
| for repo, commits in commits_to_consider.items(): |
| print(' %s: %d commits' % (repo, commits), file=sys.stderr) |
| if LOGGER.isEnabledFor(logging.INFO): |
| count = sum(commits_to_consider.values()) |
| LOGGER.info('analyzing %d commits across %d repos', count, |
| len(commits_to_consider)) |
| sys.stdout.flush() |
| |
| |
| def _describe_candidate_config(current_config, candidate_config, |
| commit_lists_by_repo): |
| config_description = [] |
| for repo, revision in candidate_config.items(): |
| if repo not in current_config: |
| prev_revision = '*not set*' |
| dist_description = 'new' |
| else: |
| prev_revision = current_config[repo] |
| dist = commit_lists_by_repo[repo].dist(prev_revision, revision) |
| if dist == 0: |
| continue |
| if dist is None: |
| dist_description = 'backwards' |
| else: |
| dist_description = '{} commits'.format(dist) |
| config_description.append(' {}: {} -> {} ({})'.format( |
| repo, prev_revision, revision, dist_description)) |
| return '\n'.join(config_description) |
| |
| |
| def get_roll_candidates(recipe_deps): |
| """Returns a list of RollCandidate objects. |
| |
| Prints diagnostic information to stderr. |
| |
| Args: |
| * recipe_deps (RecipeDeps) |
| |
| Returns: |
| good_candidates (list(RollCandidate)) - The list of valid roll candidates in |
| least-changed to most-changed order. |
| bad_candidates (list(RollCandidate)) - The list of invalid (but considered) |
| roll candidates. |
| repos (dict(repo_name, CommitList)) - A repos dictionary suitable for |
| invoking RollCandidate.changelist(). |
| """ |
| if not all(repo.backend for repo in recipe_deps.repos.values()): |
| raise ValueError('get_roll_candidates does not work with -O overrides.') |
| |
| start = time.time() |
| |
| print('finding roll candidates... ', file=sys.stderr) |
| commit_lists_by_repo = { |
| repo_name: |
| CommitList.from_backend(recipe_deps.main_repo.simple_cfg.deps[repo_name], |
| repo.backend) |
| for repo_name, repo in recipe_deps.repos.items() |
| if repo_name != recipe_deps.main_repo_id |
| } |
| |
| _report_commit_counts(commit_lists_by_repo) |
| |
| current_pb = recipe_deps.main_repo.recipes_cfg_pb2 |
| |
| good_candidates = [] |
| bad_candidates = [] |
| |
| candidates_generator = _get_roll_candidates_impl(recipe_deps, |
| commit_lists_by_repo) |
| |
| # The initial value of accepted does not matter, the generator only |
| # uses the sent value after yielding |
| accepted = None |
| i = 0 |
| while True: |
| try: |
| current_config, candidate_config = candidates_generator.send(accepted) |
| except StopIteration: |
| break |
| i += 1 |
| accepted = False |
| |
| if LOGGER.isEnabledFor(logging.INFO): |
| config_description = _describe_candidate_config(current_config, |
| candidate_config, |
| commit_lists_by_repo) |
| LOGGER.info("Checking config #%s\n%s", i, config_description) |
| |
| # TODO(iannucci): rationalize what happens if there's a conflict in e.g. |
| # branch/url. |
| for repo, revision in candidate_config.items(): |
| if repo not in current_pb.deps: |
| clist = commit_lists_by_repo[repo] |
| current_pb.deps[repo].url = clist.url |
| current_pb.deps[repo].branch = clist.branch |
| current_pb.deps[repo].revision = revision |
| |
| backwards_roll = False |
| for repo, revision in current_config.items(): |
| clist = commit_lists_by_repo[repo] |
| if clist.dist(revision, candidate_config[repo]) is None: |
| backwards_roll = True |
| break |
| if backwards_roll: |
| LOGGER.info('rejecting config #%s due to backwards roll', i) |
| bad_candidates.append(RollCandidate(current_pb)) |
| else: |
| LOGGER.info('config #%s accepted', i) |
| good_candidates.append(RollCandidate(current_pb)) |
| # Signal that the config is accepted so that the _impl function will start |
| # a new round using this config as the current config |
| accepted = True |
| |
| LOGGER.info("terminating: no more candidates") |
| |
| print( |
| 'found %d/%d good/bad candidates in %0.2f seconds' % |
| (len(good_candidates), len(bad_candidates), time.time() - start), |
| file=sys.stderr) |
| sys.stdout.flush() |
| return good_candidates, bad_candidates, commit_lists_by_repo |