blob: 847f950826cc3f7d8a54e173f6b6ed95c1b9e9fd [file] [log] [blame]
# Copyright 2017 The LUCI Authors. All rights reserved.
# Use of this source code is governed under the Apache License, Version 2.0
# that can be found in the LICENSE file.
import sys
import time
from recipe_engine import package
from .commit_list import CommitList
from .roll_candidate import RollCandidate
def get_commitlists(deps):
"""Returns {'project_id': CommitList} for every repo this recipe repo depends
on.
Args:
context (PackageContext) - The local context for where the repos live, etc.
deps (dict(project_id, RepoSpec)) - The dependencies to grab.
Returns dict(project_id, CommitList) where each project_id is one of the repos
mentioned in the deps.
"""
return {
project_id: CommitList.from_repo_spec(repo_spec)
for project_id, repo_spec in deps.iteritems()
}
def find_best_rev(repos):
"""Returns the project_id of the best repo to roll.
"Best" is determined by "is an interesting commit and moves the least amount
of commits, globally".
"Interesting" means "the commit modifies one or more recipe related files",
and is defined by CommitMetadata.roll_candidate.
There are two ways that rolling a repo can move commits:
1) As dependencies. Rolling repo A that depends on (B, C) will take
a penalty for each commit that B and C need to move in order to be
compatible with the new A revision.
2) As dependees. Rolling repo A which is depended on by (B, C) will take
a penalty for each commit that B and C need to move in order to be
compatible with the new A revision.
Each repo is analyzed with these two rules and a score is computed. The score
may be negative if one of the other repos (e.g. B or C) needs A to catch up
(say that B already rolled forward, and now it requires that its dependency
A needs to roll forward too).
As a tiebreaker, the commit with the lowest commit timestamp will be picked.
So if two repos cause the same amount of global commit movement, the "older"
of the two commits will roll first. Clearly this is best-effort as there's no
meaningful enforcement of timestamps between different repos, but it's usually
close enough to reality to be helpful. This is done to reflect a realistic
commit ordering; it doesn't make sense for two independent dependencies to
move such that a very large time gap develops between them (assuming the
timestamps are sensible). All that said, the precise timestamp value is not
necessary for the correctness of the algorithm, since it's only involvement is
a tiebreaker between otherwise valid choices.
The repo with the lowest score wins and is returned. Note that rolling this
repo does NOT guarantee a consistent dependency graph. This is OK, as it means
that the outer loop will just call this function multiple times to roll each
best repo until there's a consistent graph.
Args:
repos (dict(project_id, CommitList)) - The repos to analyze. This function
will only read from the dict and CommitLists (it will not modify them).
Returns (str) - The project_id of the repo to advance next.
"""
# The project_ids of all the repos that can move
repo_set = set(repos)
best_project_id = None
best_score = () # (# commits moved, timestamp)
for project_id, clist in repos.iteritems():
assert isinstance(clist, CommitList)
candidate, movement_score = clist.next_roll_candidate
if not candidate:
continue
unaccounted_repos = set(repo_set)
# first, determine if rolling this repo will force other repos to move.
for d_pid, dep in candidate.spec.deps.iteritems():
unaccounted_repos.discard(d_pid)
movement_score += repos[d_pid].dist_to(dep.revision)
# Next, see if any unaccounted_repos depend on this repo.
for pid in unaccounted_repos:
movement_score += repos[pid].dist_compatible_with(pid, candidate.revision)
score = (movement_score, candidate.commit_timestamp)
if not best_score or score < best_score:
best_score = score
best_project_id = project_id
return best_project_id
def is_consistent(spec_pb, repos):
for pid in spec_pb.deps:
for d_pid, dep in repos[pid].current.spec.deps.iteritems():
if dep.revision != spec_pb.deps[d_pid].revision:
return False
return True
def _get_roll_candidates_impl(context, package_spec, repos):
current_pb = package_spec.spec_pb
ret_good = []
ret_bad = []
while True:
best_project_id = find_best_rev(repos)
if best_project_id is None:
# end when there's no best rev to roll
return ret_good, ret_bad
rev = repos[best_project_id].advance()
if not rev:
return ret_good, ret_bad
for d_pid, dep in sorted(rev.spec.deps.items()):
if not repos[d_pid].advance_to(dep.revision):
return ret_good, ret_bad
# TODO(iannucci): rationalize what happens if there's a conflict in e.g.
# branch/url.
# First, copy all revisions from clists to current_pb. Note that this will
# accumulate new repos! Because we don't have a quick way to evaluate
# implicit vs. explicit repo dependencies, we can't algorithmically remove
# them currently.
#
# However, we expect this to be a pretty uncommon case, and removing them by
# hand should be sufficient for now.
#
# If someone really wants to make it automatic, they'll need to make a quick
# way to scan all DEPS within the current repo to see which packages are
# actually used. If you have this information, then removing a repo here
# would be easy (if we don't directly depend on it and none of our
# immediate dependencies list it, then we can remove it).
for pid, clist in repos.iteritems():
current_pb.deps[pid].revision = clist.current.revision
# Next, copy ONE instance of any new repos
for pid in current_pb.deps:
for d_pid, dep in repos[pid].current.spec.deps.iteritems():
if d_pid not in current_pb.deps:
current_pb.deps[d_pid].CopyFrom(dep)
if not is_consistent(current_pb, repos):
ret_bad.append(RollCandidate(current_pb))
else:
ret_good.append(RollCandidate(current_pb))
# See if this roll introduced any new dependencies we need to worry about
# going forward.
new_repos = set(current_pb.deps) - set(repos)
if new_repos:
# Going forward we have to account for the new repos. current_pb has all
# the old and new repos already.
package_spec = package.PackageSpec.from_package_pb(context, current_pb)
package_spec.ensure_up_to_date(context)
# Add any newly discovered repos to our repos set. We don't replace
# repos because we want to keep the metadata for all already-rolled
# revisions.
repos.update(get_commitlists({
pid: repo_spec
for pid, repo_spec in package_spec.deps.iteritems()
if pid in new_repos
}))
def get_roll_candidates(context, package_spec):
"""Returns a list of RollCandidate objects.
Args:
context (PackageContext)
package_spec (PackageSpec)
Returns:
good_candidates (list(RollCandidate)) - The list of valid roll candidates in
least-changed to most-changed order.
bad_candidates (list(RollCandidate)) - The list of invalid (but considered)
roll candidates.
repos (dict(project_id, CommitList)) - A repos dictionary suitable for
invoking RollCandidate.changelist().
"""
start = time.time()
# not on py3 so we can't use print(..., flush=True) :(
sys.stdout.write('finding roll candidates... ')
sys.stdout.flush()
repos = get_commitlists(package_spec.deps)
ret_good, ret_bad = _get_roll_candidates_impl(context, package_spec, repos)
print('found %d/%d good/bad candidates in %0.2f seconds' % (
len(ret_good), len(ret_bad), time.time()-start))
sys.stdout.flush()
return ret_good, ret_bad, repos