blob: b1f45f859b9e58b143a8758d659b1ddcb918cbb8 [file] [log] [blame]
// Copyright 2020 The LUCI Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package cv.internal.changelist;
option go_package = "go.chromium.org/luci/cv/internal/changelist";
import "google/protobuf/timestamp.proto";
import "go.chromium.org/luci/common/proto/gerrit/gerrit.proto";
// Snapshot stores a snapshot of CL info as seen by CV at a certain time.
//
// When stored in CL entity, represents latest known Gerrit data.
// When stored in RunCL entity, represents data pertaining to a fixed patchset.
message Snapshot {
// Next tag: 12.
// The timestamp from external system.
// Used to determine if re-querying external system is needed.
google.protobuf.Timestamp external_update_time = 1;
// LUCI project in the context of which this snapshot was saved.
//
// Since a CL isn't a resource of CV, CV can't infer whether specific LUCI
// project has access to a CL w/o re-querying Gerrit and effectively
// recomputing the snapshot.
string luci_project = 2;
// Resolved dependencies of a CL.
repeated Dep deps = 3;
// Patchset is incremental number of the latest patchset (aka revision).
int32 patchset = 4;
// MinEquivalentPatchset is the smallest and hence the earliest patchset
// which is code-wise equivalent to the latest one.
//
// See gerrit.EquivalentPatchsetRange function for details.
//
// CV tracks this to determine which prior tryjobs can be re-used and which
// can be canceled.
int32 min_equivalent_patchset = 5;
// If set, indicates problems while ingesting CL into CV, which ought to be
// communicated back to user.
repeated CLError errors = 6;
// Outdated establishes conditions for refreshing Snapshot after CV mutations.
message Outdated {
// TODO(tandrii): support gating refresh on having refresher start no
// earlier than some point in time, e.g. EVersion of this CL entity.
// TODO(tandrii): support gating refresh on having minimum
// external_update_time or equivalent.
}
// Outdated if set means Snapshot must be considered likely outdated due to
// recent CV mutations.
//
// In particular, Project Manager does not act on the CLs with .Outdated set.
Outdated outdated = 7;
// Metadata is an ordered list of key-value pairs, which may later be
// interpreted by CV guts.
//
// For example,
// [("No-Tree-Checks", "True"), ("NOTRY", "TRUE")].
//
// In case of Gerrit CLs, these are extracted from CL descriptions,
// The Git-Footer-Style keys are normalized.
// The values are stripped from beginning and trailing whitespace.
repeated StringPair metadata = 8;
// CL-kind specific data.
oneof kind {
Gerrit gerrit = 11;
}
}
enum DepKind {
DEP_KIND_UNSPECIFIED = 0;
// Dep MUST be patched in / submitted before the dependent CL.
HARD = 1;
// Dep SHOULD be patched in / submitted before the dependent CL,
// but doesn't have to be.
SOFT = 2;
}
message Dep {
// CLID is internal CV ID of a CL which is the dependency.
int64 clid = 1;
DepKind kind = 2;
}
message Gerrit {
// Gerrit host.
string host = 5;
// Info contains subset of ChangeInfo listed below.
//
// NOTE: keep this list in sync with RemoveUnusedGerritInfo() function.
// * number
// * owner
// * id
// * email (may be not set)
// * project
// * ref
// * status
// * current_revision
// * revisions
// * kind
// * number
// * ref
// * created
// * commit (for current_revision only)
// * message (current CL description)
// * labels
// * optional
// * all (only if vote != 0)
// * user
// * id
// * email (may be not set)
// * value
// * messages
// * id
// * date
// * message
// * author
// * id
// * realauthor
// * id
// * updated
// * created
gerrit.ChangeInfo info = 1;
// Files are filenames touched in the current revision.
//
// It's derived from gerrit.ListFilesResponse, see
// https://gerrit-review.googlesource.com/Documentation/rest-api-changes.html#list-files.
repeated string files = 2;
// Git dependencies of the current revision.
repeated GerritGitDep git_deps = 3;
// Free-form dependencies. Currently, sourced from CQ-Depend footers.
// In the future, this may be derived from Gerrit hashtags, topics, or other
// mechanisms.
repeated GerritSoftDep soft_deps = 4;
}
// GerritGitDep is a dependency discovered via Git child->parent chain for one Gerrit CL.
message GerritGitDep {
// Host is omitted because it's always the same as that of the CL.
// Gerrit Change number.
int64 change = 1;
// Immediate is set iff this dep is an immediate parent of the Gerrit CL.
//
// Immediate dep must be submitted before its child.
// Non-immediate CLs don't necessarily have to be submitted before:
// for example, for a chain <base> <- A1 <- B1 <- C1 <- D1
// D1's deps are [A,B,C] but only C is immediate, and 1 stands for patchset.
// Developer may then swap B,C without re-uploading D (say, to avoid
// patchset churn), resulting in a new logical chain:
// <base> <- A1 <- C2 <- B2
// \
// <- B1 <- C1 <- D1
//
// In this case, Gerrit's related changes for D1 will still return A1,B1,C1,
// which CV interprets as C must be landed before D, while B and A should
// be landed before D.
bool immediate = 2;
}
message GerritSoftDep {
// Gerrit host.
string host = 1;
// Gerrit change number.
int64 change = 2;
}
// ApplicableConfig keeps track of configs applicable to a CL.
//
// This is computed based on known set of LUCI project configs, versions of
// which are updated by CV independently, so the ApplicableConfig are also
// eventually consistent.
//
// Normally, there is 1 applicable configs = exactly 1 project with 1 config
// group. If CL is no longer watched by CV, there will be 0 applicable configs.
//
// Sometimes, there can be 2+ applicable configs. This happens if either:
// * eventual consistency: responsibility for CL is moved from one LUCI project
// to another. Three is no way to make this atomically, so CL may temporarily
// end up with 0 or 2 projects watching it, before settling on just 1.
// * misconfiguration: two projects or 2 different ConfigGroups within the same
// project watch the same CL.
// In either case, CV refuses to guess and will abstain from processing such
// CLs, but storing the list is very useful for CV debugging and potentially for
// better diagnostic messages to CV users and LUCI project owners.
message ApplicableConfig {
message Project {
string name = 1;
// ID of the specific ConfigGroup. See cv/internal/config.ConfigGroupID.
//
// The referenced version may no longer be available to datastore,
// commonly happening if CL wasn't active for a long time.
repeated string config_group_ids = 2;
}
repeated Project projects = 2;
}
// Access records which LUCI project can or can't see a CL.
//
// If a LUCI project has Access, it means both:
// (1) the project can read details of the CL (via Git/Gerrit ACLs);
// (2) the project is the only LUCI project watching this CL in CV
// (via the CV config).
// Note: there can still be several applicable ConfigGroups of the same
// project (see ApplicableConfig).
//
// In practice, .Access is set in 4 cases:
//
// (a) `CQ-Depend: host:number` Gerrit CL footers allow users to specify
// arbitrary dependencies, which typically happens due to typos,
// but malicious actors can try to get CL details of restricted projects.
// Either way, CV must not be a confused deputy here and must keep track
// which project can see what.
//
// (b) due to recent re-configuration of one or more LUCI projects, either
// in CV config and/or in Gerrit ACLs, the previously watched & readable CL
// becomes unwatched and/or unreadable.
//
// (c) a previously existing CL was deleted (e.g. by its owner or Gerrit
// administrators).
//
// (d) eventual consistency of Gerrit masquerading as HTTP 404 on stale replica,
// while quorum of replicas think CL actually exists and specific LUCI
// project having access to it.
//
// Unfortunately, (d) isn't easy to distinguish from (b) and (c), so CV resorts
// to tracking time since CL became invisible -- the longer, the more likely it
// is (b) or (c).
//
// Furthermore, in case of (a), iff CV knows nothing about specific Gerrit CL
// identified as `CQ-Depend: host:change`, CV in general can't determine which
// LUCI project is allowed to watch this CL *before* fetching Gerrit project
// (repo) and target ref.
//
//
// NOTE on CV as confused deputy.
//
// CV works with multiple LUCI projects. As of this writing (June 2021),
// unfortunately, CV doesn't verify that Gerrit repos watched by a LUCI project
// are in fact owned by that LUCI project. Thus, nothing prevents one LUCI
// project from starting to watch repos de-facto owned by another LUCI project.
// This in turn brings 2 problems:
//
// (1) Denial of service: unsolved.
// Mitigation: CV will refuse to work with CLs which are watched by more
// than 1 project. Since CV will communicate by posting message to affected
// CL, this should be noticed and fixed quickly.
//
// (2) Information leaks: solved.
// Each LUCI project MUST use project-scoped service account (PSSA)
// (migration is under way, see https://crbug.com/824492).
// CV uses this account for all interaction with Gerrit on behalf a specific
// LUCI project. Corresponding Gerrit repos:
// * SHOULD limit read access to its own PSSA + developers,
// * MUST limit Submit rights to its own PSSA and possibly developers.
//
// For example,
// * `infra` project has all its Gerrit CLs public and doesn't care about
// information leaks. All other LUCI projects can read its CLs, as well
// as the whole Internet.
// * `infra-internal` project protects its Gerrit CLs, making them visible
// to `infra-internal-scoped@...` account only.
// When CV queries Gerrit on `infra-internal` behalf, CV uses
// `infra-internal-scoped` account and can fetch the data.
// * Suppose malicious actor compromised `infra` repo, and placed a new CV
// config there to start watching CLs of the `infra-internal` project
// as well as super/secret/repo, which wasn't watched by any CV before.
// * Unfortunately, CV can't currently object to the new config.
// * However, when querying Gerrit on `infra` behalf, CV uses
// `infra-scoped@...` account, which presumably won't be configured with
// read access to neither infra-internal nor super/secret/repo.
// * So, corresponding CLs will have .Access entry recording that
// `infra` has no access to them.
// * NOTE: CLs of infra-internal will also have .ApplicableConfig with two
// projects there, which will prevent normal operation of
// `infra-internal` CV but will not cause any leaks.
message Access {
message Project {
// Deprecated. Use no_access_time instead.
bool no_access = 1;
// The time when this was last re-confirmed.
google.protobuf.Timestamp update_time = 2;
// The time after which CV should consider lack of access stable.
//
// TODO(crbug/1216630): may be unset until backfil is done,
// in which case use `no_access` field.
google.protobuf.Timestamp no_access_time = 3;
}
map<string, Project> by_project = 1;
// TODO(tandrii): per-project ApplicableConfig here.
}
// CLError encapsulates all kinds of CL errors, which ultimately result in
// purging of the CL while communicating the reason to the relevant users.
//
// The primary goal of the CLError is to transport via CV guts sufficient
// information to generate a clear user-friendly error message.
message CLError {
// Next tag is 10.
oneof kind {
bool owner_lacks_email = 1;
WatchedByManyConfigGroups watched_by_many_config_groups = 2;
WatchedByManyProjects watched_by_many_projects = 8;
InvalidDeps invalid_deps = 3;
string unsupported_mode = 4;
bool self_cq_depend = 5;
string corrupt_gerrit_metadata = 6;
ReusedTrigger reused_trigger = 7;
// Set to true when the footer "Commit: false" is present.
bool commit_blocked = 9;
}
message WatchedByManyConfigGroups {
// Config group names without LUCI project prefix.
repeated string config_groups = 1;
}
message WatchedByManyProjects {
repeated string projects = 1;
}
message InvalidDeps {
// Deps not watched by the same LUCI project as the dependent.
repeated Dep unwatched = 1;
// Deps watched by the same LUCI project but different config group.
repeated Dep wrong_config_group = 2;
// Not yet submitted deps of a full run in non-combinable mode.
repeated Dep single_full_deps = 3;
// Not yet CQ-ed deps of a Run in combinable mode.
repeated Dep combinable_untriggered = 4;
// CQ-ed deps of a different mode.
repeated Dep combinable_mismatched_mode = 5;
message TooMany {
int32 actual = 1;
int32 max_allowed = 2;
}
// There are more non-submitted deps than is supported by CV.
TooMany too_many = 6;
}
// ReusedTrigger means a CL trigger (e.g. CQ+1 vote) has already resulted in
// a CQ Run which was finalized.
//
// Two known cases when this happens with a Gerrit CL:
// 1. A user uploads a CL on ref A, then votes CQ+1.
// Before the Dry Run completes, the CL is moved to ref B, while
// preserving the CQ+1 vote.
// The old Run is finalized, but the new Run has the exact same trigger,
// which in CQDaemon-compatible mode means the new Run's ID is exactly the
// same as the old one, so CV can't create a new Run.
// TODO(crbug/1223349): after CQDaemon is deleted, the Run ID generation scheme
// can take into account the ref of a CL, and this use case can be allowed.
// 2. The same as above but instead of moving CL between refs, abandon and
// restore the CL.
message ReusedTrigger {
// ID of the finalized Run.
string run = 1;
}
}
// CLUpdatedEvent is just a CL ID pinned to its latest known EVersion.
message CLUpdatedEvent {
int64 clid = 1;
int64 eversion = 2;
}
// CLUpdatedEvents is a batch of CLUpdatedEvents.
message CLUpdatedEvents {
repeated CLUpdatedEvent events = 1;
}
// A string key-value pair.
message StringPair {
string key = 1;
string value = 2;
}