blob: 12d5b1a2c0b2814eec083fb6d6cd167bf0340ffe [file] [log] [blame]
#!/usr/bin/env vpython3
# Copyright 2021 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Script for finding and suppressing flaky GPU tests.
This relies on ResultDB BigQuery data under the hood, so it requires the `bq`
tool which is available as part of the Google Cloud SDK
https://cloud.google.com/sdk/docs/quickstarts.
Example usage, which finds all failures in the past 5 days. Any tests that
failed more than twice on a configuration is marked as flaky, and any that
failed more than 5 times is marked as failing:
suppress_flakes.py \
--project chrome-unexpected-pass-data \
--sample-period 5
"""
import argparse
from flake_suppressor import expectations
from flake_suppressor import queries
from flake_suppressor import result_output
from flake_suppressor import results as results_module
def ParseArgs():
# TODO(crbug.com/1192733): Add flaky and failure thresholds, likely in the
# form of % of failures out of the total runs for a (test, tags) combination.
# <1% can be ignored, > 50% can be treated as a failure instead of a flake.
parser = argparse.ArgumentParser(
description=('Script for automatically suppressing flaky/failing GPU '
'Telemetry-based tests.'))
parser.add_argument('--project',
required=True,
help=('The billing project to use for BigQuery queries. '
'Must have access to the ResultDB BQ tables, e.g. '
'"chrome-luci-data.chromium.gpu_ci_test_results".'))
parser.add_argument('--sample-period',
type=int,
default=1,
help=('The number of days to sample data from.'))
parser.add_argument('--no-group-by-tags',
action='store_false',
default=True,
dest='group_by_tags',
help=('Append added expectations to the end of the file '
'instead of attempting to automatically group with '
'similar expectations.'))
parser.add_argument('--no-prompt-for-user-input',
action='store_false',
default=True,
dest='prompt_for_user_input',
help=('Generate expectations automatically based on '
'thresholds instead of prompting the user each '
'time. The user will still need to add associated '
'bugs to generated expectations afterwards.'))
parser.add_argument('--ignore-threshold',
type=float,
default=0.01,
help=('The fraction of failed tests under which flakes '
'will be ignored instead of having an expectation '
'added when --no-prompt-for-user-input is used.'))
parser.add_argument('--flaky-threshold',
type=float,
default=0.5,
help=('The fraction of failed tests under which flakes '
'will be marked as RetryOnFailure when '
'--no-prompt-for-user-input is used. Above this, '
'failures will be marked as Failure.'))
parser.add_argument('--include-all-tags',
action='store_true',
default=False,
help=('Use all tags generated by a configuration when '
'creating an expectation rather than attempting '
'to only use the most specific one. This should '
'only need to be passed if the tags in the '
'expectation files are not ordered from least '
'specific to most specific.'))
args = parser.parse_args()
if not args.prompt_for_user_input:
if args.ignore_threshold < 0:
raise ValueError('--ignore-threshold must be positive')
if args.flaky_threshold < 0:
raise ValueError('--flaky-threshold must be positive')
if args.flaky_threshold <= args.ignore_threshold:
raise ValueError(
'--flaky-threshold must be greater than --ignore-threshold')
return args
def main():
args = ParseArgs()
expectations.AssertCheckoutIsUpToDate()
results = queries.GetFlakyOrFailingTests(args.sample_period, args.project)
aggregated_results = results_module.AggregateResults(results)
result_output.GenerateHtmlOutputFile(aggregated_results)
print('If there are many instances of failed tests, that may be indicative '
'of an issue that should be handled in some other way, e.g. reverting '
'a bad CL.')
if args.prompt_for_user_input:
input('\nBeginning of user input section - press any key to continue')
expectations.IterateThroughResultsForUser(aggregated_results,
args.group_by_tags,
args.include_all_tags)
else:
result_counts = queries.GetResultCounts(args.sample_period, args.project)
expectations.IterateThroughResultsWithThresholds(
aggregated_results, args.group_by_tags, result_counts,
args.ignore_threshold, args.flaky_threshold, args.include_all_tags)
print('\nGenerated expectations will need to have bugs manually added.')
print('\nGenerated expectations likely contain conflicting tags that need to '
'be removed.')
if __name__ == '__main__':
main()