blob: 7b13e7ab93b89db768e9aae6c0eabf7f4ca6cbc5 [file] [log] [blame]
# Copyright 2018 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Returns a sequence of new innocent CLs which failed pre-cq.
Queries CIDB for sanity Pre-CQ build failures.
"""
from __future__ import print_function
from __future__ import absolute_import
from __future__ import division
import datetime
import itertools
from chromite.lib import constants
from chromite.lib import cidb
from chromite.lib import clactions
from chromite.lib import parallel
from chromite.lib import cros_logging as logging
from google.cloud import datastore # pylint: disable=E0611,import-error
from infra_libs import ts_mon
from exonerator import checkpointlib
from exonerator import innocent_cls_cq
from exonerator import innocent_cls_precq_flake
PRECQ_PROCESSED_KEY = 'PreCQProcessed'
_SANITY_BUILD_REASON = 'sanity-pre-cq'
# Forgive failed builds up to 3 days before a sanity failure, and 1 day
# afterward.
_DAYS_LOWER_LIMIT = 3
_DAYS_UPPER_LIMIT = 1
_PRE_CQ_CL_LIMITS = {
# Only kick off one build per tick during peak hours
True: 1,
# Kick off pre-CQ builds faster during non-peak hours
False: 5,
}
_SANITY_FAILURES_EXAMINED = ts_mon.GaugeMetric(
'chromeos/exonerator/sanity/failures_examined',
description=('The current number of failed sanity runs being examined by '
'CL Exonerator.'),
field_spec=None)
def NewInnocentCLs(conn, on_peak, checkpoint=True):
"""Finds new innocent CLs since the last run, with checkpointing.
Looping over the results with a for loop will result in a checkpoint for
each CL as it is processed. Do not convert the results to a list unless you
don't care that this will checkpoint immediately during the list construction.
Args:
conn: The CIDBConnection to use.
on_peak: Whether it's business hours (peak load) or off-peak.
checkpoint: Whether to save progress after every buildMessage processed.
Yields:
lists of ChangeWithBuild objects, representing possibly innocent CLs that
have Pre-CQ faliures which were associtaed with a recent sanity build
failure.
"""
limit = _PRE_CQ_CL_LIMITS[on_peak]
innocents = _InnocentCLsFromSanityBuildFailuresCheckpointed(
conn, on_peak, limit, checkpoint)
# TODO(crbug.com/820379) add some logic here to find all the CLs which
# participated in the build. Put them together in a batch to yield.
# Also change docstring of limit to be "number of BUILDS to process."
# Verify that this build was the latest run. We don't want to exonerate a
# stale pre-cq run when there is a newer run.
filtered = [
innocent_cls_cq.ChangeWithBuild(change, build_id)
for change, build_id in innocents
if _ShouldExonerate(conn, change, build_id)]
return [filtered] if filtered else []
def _InnocentCLsFromSanityBuildFailuresCheckpointed(conn, on_peak, limit,
checkpoint):
"""Finds Pre-CQ failures to forgive.
Checkpoints processed builds with a PreCQForgiven row. Specifically,
x = next(fetch)
# x is not checkpointed yet
y = next(fetch) # This checkpoints that x is done
Args:
conn: The CIDBConnection to use.
on_peak: Whether it's business hours (peak load) or off-peak.
limit: The maximum number of innocent CLs to yield.
checkpoint: Whether to save progress after every build yielded.
Yields:
tuples of (change, pre-cq build id) to forgive.
"""
# The first priority is to forgive pre-CQ builds associated with a sanity
# failure. Afterwards, we can consider pre-CQ failures due to flakey pre-CQ
# configs.
seq = itertools.chain(
_InnocentCLsFromSanityBuildFailures(conn),
innocent_cls_precq_flake.InnocentPreCQsFromFlake(conn, on_peak))
if not checkpoint:
return itertools.islice(seq, limit)
ds = datastore.Client()
def _AlreadyProcessed(item):
change, build_id = item
return _AlreadyForgiven(ds, change, build_id)
def _Save(item):
change, build_id = item
entity = ds.get(key=ds.key(PRECQ_PROCESSED_KEY, build_id))
if not entity:
logging.info('Inserting Pre-CQ checkpoint row %s.', build_id)
entity = datastore.Entity(key=ds.key(PRECQ_PROCESSED_KEY, build_id))
else:
logging.info('Updating Pre-CQ checkpoint row %s.', build_id)
# A build may include more than one CL, so add a property to the entity for
# each CL of the build that was processed.
entity.update({str(change.gerrit_number): True})
ds.put(entity)
return checkpointlib.CheckpointSequence(
_AlreadyProcessed, _Save, seq, limit=limit)
def _InnocentCLsFromSanityBuildFailures(conn):
"""Finds Pre-CQ failures to forgive.
Args:
conn: The CIDBConnection to use.
Yields:
Tuples of (change, pre-cq build) to forgive.
"""
sanity_runs = conn.GetLatestBuildRequestsForReason(_SANITY_BUILD_REASON)
for sanity_run in _FailedSanityRuns(conn, sanity_runs):
for change, build_id in _InnocentPreCQsFromSanity(conn, sanity_run):
yield change, build_id
def _FailedSanityRuns(conn, sanity_runs):
"""Filters the latest sanity runs to only the failed sanity runs.
Args:
conn: The CIDBConnection to use.
sanity_runs: An iterable of sanity builds (has a .build_id).
Returns:
The sanity runs that failed.
"""
# TODO(phobbs) add a JOIN buildTable in GetLatestBuildRequestsForReason
# to avoid creating / destroying a bunch of cidb connections.
statuses = parallel.RunTasksInProcessPool(
conn.GetBuildStatus,
[[run.build_id] for run in sanity_runs],
processes=len(sanity_runs))
faileds = [status['status'] == constants.BUILDER_STATUS_FAILED
for status in statuses]
_SANITY_FAILURES_EXAMINED.set(sum(map(int, faileds)))
for sanity_run, failed in itertools.izip(sanity_runs, faileds):
if failed:
# TODO(crbug.com/820379) yield build ids from here.
yield sanity_run
def _InnocentPreCQsFromSanity(conn, sanity_failure):
"""Finds forgiveable Pre-CQ failures from a sanity failure
Args:
conn: A CIDBConnection
sanity_failure: A BuildRequest corresponding to a sanity build failure.
Yields:
(GerritChangeTuple, build_id) corresponding to pre-cq failures to forgive
"""
lower_limit = (
sanity_failure.timestamp - datetime.timedelta(days=_DAYS_LOWER_LIMIT))
upper_limit = (
sanity_failure.timestamp + datetime.timedelta(days=_DAYS_UPPER_LIMIT))
builds = conn.GetBuildHistory(
sanity_failure.request_build_config,
cidb.CIDBConnection.NUM_RESULTS_NO_LIMIT,
start_date=lower_limit.date(),
end_date=upper_limit.date(),)
for build in builds:
actions = conn.GetActionsForBuild(build['id'])
if not actions:
continue
if any(action.status == constants.BUILDER_STATUS_FAILED
for action in actions):
action = actions[0]
change = clactions.GerritPatchTuple(
action.change_number, action.patch_number,
action.change_source == 'internal')
# TODO(crbug.com/820379) yield build ids from here.
yield change, action.build_id
def _AlreadyForgiven(ds, change, build_id):
"""Whether the given Pre-CQ build was forgiven already.
Args:
ds: The cloud Datastore client.
change: A GerritPatchTuple instance.
build_id: The CIDB build id for the pre-cq run.
"""
entity = ds.get(key=ds.key(PRECQ_PROCESSED_KEY, build_id))
# Each entity stores the CLs that were processed by exonerator for that
# build. For simplicity, assume gerrit_number:change is a one-to-one mapping.
# Technically, this is false, but it's extremely unlikely to affect the
# behavior.
return (entity is not None
and entity.get(str(change.gerrit_number), False))
def _ShouldExonerate(conn, change, build_id):
"""Whether we should exonerate the patch.
Args:
conn: The cidb.CIDBConnection
change: A GerritPatchTuple to possibly exonerate
build_id: The threshold build_id.
"""
actions = conn.GetActionsForChanges([change])
return (
not _ExistsNewerPreCQForCL(actions, build_id)
and not _PatchAlreadyExonerated(actions, change))
def _ExistsNewerPreCQForCL(actions, build_id):
"""Whether there is a newer action for a given CL of a certain type.
Args:
actions: The CLActions for the change
build_id: The threshold build_id.
Returns:
A boolean indicating whether there are any new CLActions of the given
|action_type|.
"""
actions_for_build = [a for a in actions if a.build_id == build_id]
try:
build_config = actions_for_build[0].build_config
except IndexError:
return False
newer_pickup_actions_for_build_config = [
a.build_id > build_id
for a in actions
if (a.build_config == build_config
and a.action == constants.CL_ACTION_PICKED_UP)]
return any(newer_pickup_actions_for_build_config)
def _PatchAlreadyExonerated(actions, change):
"""Whether the patch was already exonerated once.
This prevents infinitely retrying pre-cq on a single patch.
TODO: consider retrying more times (2 or 3?) if there is low pre-cq load.
Args:
actions: The CLActions for the change
change: A GerritPatchTuple to find actions for.
"""
exonerations_for_patch = [
a for a in actions
if (a.action == constants.CL_ACTION_EXONERATED
and a.patch_number == change.patch_number)]
return bool(exonerations_for_patch)