| # Copyright 2018 The Chromium OS Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """Returns a sequence of new innocent CLs which failed pre-cq. |
| |
| Queries CIDB for sanity Pre-CQ build failures. |
| """ |
| |
| from __future__ import print_function |
| from __future__ import absolute_import |
| from __future__ import division |
| |
| import datetime |
| import itertools |
| |
| from chromite.lib import constants |
| from chromite.lib import cidb |
| from chromite.lib import clactions |
| from chromite.lib import parallel |
| from chromite.lib import cros_logging as logging |
| from google.cloud import datastore # pylint: disable=E0611,import-error |
| from infra_libs import ts_mon |
| |
| from exonerator import checkpointlib |
| from exonerator import innocent_cls_cq |
| from exonerator import innocent_cls_precq_flake |
| |
| |
| PRECQ_PROCESSED_KEY = 'PreCQProcessed' |
| _SANITY_BUILD_REASON = 'sanity-pre-cq' |
| |
| # Forgive failed builds up to 3 days before a sanity failure, and 1 day |
| # afterward. |
| _DAYS_LOWER_LIMIT = 3 |
| _DAYS_UPPER_LIMIT = 1 |
| |
| _PRE_CQ_CL_LIMITS = { |
| # Only kick off one build per tick during peak hours |
| True: 1, |
| # Kick off pre-CQ builds faster during non-peak hours |
| False: 5, |
| } |
| |
| |
| _SANITY_FAILURES_EXAMINED = ts_mon.GaugeMetric( |
| 'chromeos/exonerator/sanity/failures_examined', |
| description=('The current number of failed sanity runs being examined by ' |
| 'CL Exonerator.'), |
| field_spec=None) |
| |
| |
| def NewInnocentCLs(conn, on_peak, checkpoint=True): |
| """Finds new innocent CLs since the last run, with checkpointing. |
| |
| Looping over the results with a for loop will result in a checkpoint for |
| each CL as it is processed. Do not convert the results to a list unless you |
| don't care that this will checkpoint immediately during the list construction. |
| |
| Args: |
| conn: The CIDBConnection to use. |
| on_peak: Whether it's business hours (peak load) or off-peak. |
| checkpoint: Whether to save progress after every buildMessage processed. |
| |
| Yields: |
| lists of ChangeWithBuild objects, representing possibly innocent CLs that |
| have Pre-CQ faliures which were associtaed with a recent sanity build |
| failure. |
| """ |
| limit = _PRE_CQ_CL_LIMITS[on_peak] |
| innocents = _InnocentCLsFromSanityBuildFailuresCheckpointed( |
| conn, on_peak, limit, checkpoint) |
| |
| # TODO(crbug.com/820379) add some logic here to find all the CLs which |
| # participated in the build. Put them together in a batch to yield. |
| # Also change docstring of limit to be "number of BUILDS to process." |
| |
| # Verify that this build was the latest run. We don't want to exonerate a |
| # stale pre-cq run when there is a newer run. |
| filtered = [ |
| innocent_cls_cq.ChangeWithBuild(change, build_id) |
| for change, build_id in innocents |
| if _ShouldExonerate(conn, change, build_id)] |
| return [filtered] if filtered else [] |
| |
| |
| def _InnocentCLsFromSanityBuildFailuresCheckpointed(conn, on_peak, limit, |
| checkpoint): |
| """Finds Pre-CQ failures to forgive. |
| |
| Checkpoints processed builds with a PreCQForgiven row. Specifically, |
| |
| x = next(fetch) |
| # x is not checkpointed yet |
| y = next(fetch) # This checkpoints that x is done |
| |
| Args: |
| conn: The CIDBConnection to use. |
| on_peak: Whether it's business hours (peak load) or off-peak. |
| limit: The maximum number of innocent CLs to yield. |
| checkpoint: Whether to save progress after every build yielded. |
| |
| Yields: |
| tuples of (change, pre-cq build id) to forgive. |
| """ |
| # The first priority is to forgive pre-CQ builds associated with a sanity |
| # failure. Afterwards, we can consider pre-CQ failures due to flakey pre-CQ |
| # configs. |
| seq = itertools.chain( |
| _InnocentCLsFromSanityBuildFailures(conn), |
| innocent_cls_precq_flake.InnocentPreCQsFromFlake(conn, on_peak)) |
| |
| if not checkpoint: |
| return itertools.islice(seq, limit) |
| |
| ds = datastore.Client() |
| |
| def _AlreadyProcessed(item): |
| change, build_id = item |
| return _AlreadyForgiven(ds, change, build_id) |
| |
| def _Save(item): |
| change, build_id = item |
| entity = ds.get(key=ds.key(PRECQ_PROCESSED_KEY, build_id)) |
| if not entity: |
| logging.info('Inserting Pre-CQ checkpoint row %s.', build_id) |
| entity = datastore.Entity(key=ds.key(PRECQ_PROCESSED_KEY, build_id)) |
| else: |
| logging.info('Updating Pre-CQ checkpoint row %s.', build_id) |
| # A build may include more than one CL, so add a property to the entity for |
| # each CL of the build that was processed. |
| entity.update({str(change.gerrit_number): True}) |
| ds.put(entity) |
| |
| return checkpointlib.CheckpointSequence( |
| _AlreadyProcessed, _Save, seq, limit=limit) |
| |
| |
| def _InnocentCLsFromSanityBuildFailures(conn): |
| """Finds Pre-CQ failures to forgive. |
| |
| Args: |
| conn: The CIDBConnection to use. |
| |
| Yields: |
| Tuples of (change, pre-cq build) to forgive. |
| """ |
| sanity_runs = conn.GetLatestBuildRequestsForReason(_SANITY_BUILD_REASON) |
| for sanity_run in _FailedSanityRuns(conn, sanity_runs): |
| for change, build_id in _InnocentPreCQsFromSanity(conn, sanity_run): |
| yield change, build_id |
| |
| |
| def _FailedSanityRuns(conn, sanity_runs): |
| """Filters the latest sanity runs to only the failed sanity runs. |
| |
| Args: |
| conn: The CIDBConnection to use. |
| sanity_runs: An iterable of sanity builds (has a .build_id). |
| |
| Returns: |
| The sanity runs that failed. |
| """ |
| # TODO(phobbs) add a JOIN buildTable in GetLatestBuildRequestsForReason |
| # to avoid creating / destroying a bunch of cidb connections. |
| statuses = parallel.RunTasksInProcessPool( |
| conn.GetBuildStatus, |
| [[run.build_id] for run in sanity_runs], |
| processes=len(sanity_runs)) |
| |
| faileds = [status['status'] == constants.BUILDER_STATUS_FAILED |
| for status in statuses] |
| |
| _SANITY_FAILURES_EXAMINED.set(sum(map(int, faileds))) |
| |
| for sanity_run, failed in itertools.izip(sanity_runs, faileds): |
| if failed: |
| # TODO(crbug.com/820379) yield build ids from here. |
| yield sanity_run |
| |
| |
| def _InnocentPreCQsFromSanity(conn, sanity_failure): |
| """Finds forgiveable Pre-CQ failures from a sanity failure |
| |
| Args: |
| conn: A CIDBConnection |
| sanity_failure: A BuildRequest corresponding to a sanity build failure. |
| |
| Yields: |
| (GerritChangeTuple, build_id) corresponding to pre-cq failures to forgive |
| """ |
| lower_limit = ( |
| sanity_failure.timestamp - datetime.timedelta(days=_DAYS_LOWER_LIMIT)) |
| upper_limit = ( |
| sanity_failure.timestamp + datetime.timedelta(days=_DAYS_UPPER_LIMIT)) |
| builds = conn.GetBuildHistory( |
| sanity_failure.request_build_config, |
| cidb.CIDBConnection.NUM_RESULTS_NO_LIMIT, |
| start_date=lower_limit.date(), |
| end_date=upper_limit.date(),) |
| |
| for build in builds: |
| actions = conn.GetActionsForBuild(build['id']) |
| if not actions: |
| continue |
| if any(action.status == constants.BUILDER_STATUS_FAILED |
| for action in actions): |
| action = actions[0] |
| change = clactions.GerritPatchTuple( |
| action.change_number, action.patch_number, |
| action.change_source == 'internal') |
| # TODO(crbug.com/820379) yield build ids from here. |
| yield change, action.build_id |
| |
| |
| def _AlreadyForgiven(ds, change, build_id): |
| """Whether the given Pre-CQ build was forgiven already. |
| |
| Args: |
| ds: The cloud Datastore client. |
| change: A GerritPatchTuple instance. |
| build_id: The CIDB build id for the pre-cq run. |
| """ |
| entity = ds.get(key=ds.key(PRECQ_PROCESSED_KEY, build_id)) |
| # Each entity stores the CLs that were processed by exonerator for that |
| # build. For simplicity, assume gerrit_number:change is a one-to-one mapping. |
| # Technically, this is false, but it's extremely unlikely to affect the |
| # behavior. |
| return (entity is not None |
| and entity.get(str(change.gerrit_number), False)) |
| |
| |
| def _ShouldExonerate(conn, change, build_id): |
| """Whether we should exonerate the patch. |
| |
| Args: |
| conn: The cidb.CIDBConnection |
| change: A GerritPatchTuple to possibly exonerate |
| build_id: The threshold build_id. |
| """ |
| actions = conn.GetActionsForChanges([change]) |
| return ( |
| not _ExistsNewerPreCQForCL(actions, build_id) |
| and not _PatchAlreadyExonerated(actions, change)) |
| |
| |
| def _ExistsNewerPreCQForCL(actions, build_id): |
| """Whether there is a newer action for a given CL of a certain type. |
| |
| Args: |
| actions: The CLActions for the change |
| build_id: The threshold build_id. |
| |
| Returns: |
| A boolean indicating whether there are any new CLActions of the given |
| |action_type|. |
| """ |
| actions_for_build = [a for a in actions if a.build_id == build_id] |
| try: |
| build_config = actions_for_build[0].build_config |
| except IndexError: |
| return False |
| |
| newer_pickup_actions_for_build_config = [ |
| a.build_id > build_id |
| for a in actions |
| if (a.build_config == build_config |
| and a.action == constants.CL_ACTION_PICKED_UP)] |
| return any(newer_pickup_actions_for_build_config) |
| |
| |
| def _PatchAlreadyExonerated(actions, change): |
| """Whether the patch was already exonerated once. |
| |
| This prevents infinitely retrying pre-cq on a single patch. |
| TODO: consider retrying more times (2 or 3?) if there is low pre-cq load. |
| |
| Args: |
| actions: The CLActions for the change |
| change: A GerritPatchTuple to find actions for. |
| """ |
| exonerations_for_patch = [ |
| a for a in actions |
| if (a.action == constants.CL_ACTION_EXONERATED |
| and a.patch_number == change.patch_number)] |
| return bool(exonerations_for_patch) |