blob: 062398a897fc8794c5080b4df1c1b579a9b250ba [file] [log] [blame]
#!/usr/bin/env vpython3
# Copyright 2021 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Script for finding and suppressing flaky GPU tests.
This relies on ResultDB BigQuery data under the hood, so it requires the `bq`
tool which is available as part of the Google Cloud SDK
https://cloud.google.com/sdk/docs/quickstarts.
Example usage, which finds all failures in the past 5 days. Any tests that
failed more than twice on a configuration is marked as flaky, and any that
failed more than 5 times is marked as failing:
suppress_flakes.py \
--project chrome-unexpected-pass-data \
--sample-period 5
"""
import argparse
from flake_suppressor import expectations
from flake_suppressor import queries
from flake_suppressor import result_output
from flake_suppressor import results as results_module
def ParseArgs():
# TODO(crbug.com/1192733): Add flaky and failure thresholds, likely in the
# form of % of failures out of the total runs for a (test, tags) combination.
# <1% can be ignored, > 50% can be treated as a failure instead of a flake.
parser = argparse.ArgumentParser(
description=('Script for automatically suppressing flaky/failing GPU '
'Telemetry-based tests.'))
parser.add_argument('--project',
required=True,
help=('The billing project to use for BigQuery queries. '
'Must have access to the ResultDB BQ tables, e.g. '
'"chrome-luci-data.chromium.gpu_ci_test_results".'))
parser.add_argument('--sample-period',
type=int,
default=1,
help=('The number of days to sample data from.'))
parser.add_argument('--no-group-by-tags',
action='store_false',
default=True,
dest='group_by_tags',
help=('Append added expectations to the end of the file '
'instead of attempting to automatically group with '
'similar expectations.'))
args = parser.parse_args()
return args
def main():
args = ParseArgs()
results = queries.GetFlakyOrFailingTests(args.sample_period, args.project)
aggregated_results = results_module.AggregateResults(results)
result_output.GenerateHtmlOutputFile(aggregated_results)
print('If there are many instances of failed tests, that may be indicative '
'of an issue that should be handled in some other way, e.g. reverting '
'a bad CL.')
input('\nBeginning of user input section - press any key to continue')
expectations.IterateThroughResultsForUser(aggregated_results,
args.group_by_tags)
print('\nGenerated expectations likely contain conflicting tags that need to '
'be removed.')
if __name__ == '__main__':
main()