| #!/usr/bin/env vpython3 |
| # Copyright 2021 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| """Script for finding and suppressing flaky GPU tests. |
| |
| This relies on ResultDB BigQuery data under the hood, so it requires the `bq` |
| tool which is available as part of the Google Cloud SDK |
| https://cloud.google.com/sdk/docs/quickstarts. |
| |
| Example usage, which finds all failures in the past 5 days. Any tests that |
| failed more than twice on a configuration is marked as flaky, and any that |
| failed more than 5 times is marked as failing: |
| |
| suppress_flakes.py \ |
| --project chrome-unexpected-pass-data \ |
| --sample-period 5 |
| """ |
| |
| import argparse |
| |
| from flake_suppressor import expectations |
| from flake_suppressor import queries |
| from flake_suppressor import result_output |
| from flake_suppressor import results as results_module |
| |
| |
| def ParseArgs(): |
| # TODO(crbug.com/1192733): Add flaky and failure thresholds, likely in the |
| # form of % of failures out of the total runs for a (test, tags) combination. |
| # <1% can be ignored, > 50% can be treated as a failure instead of a flake. |
| parser = argparse.ArgumentParser( |
| description=('Script for automatically suppressing flaky/failing GPU ' |
| 'Telemetry-based tests.')) |
| parser.add_argument('--project', |
| required=True, |
| help=('The billing project to use for BigQuery queries. ' |
| 'Must have access to the ResultDB BQ tables, e.g. ' |
| '"chrome-luci-data.chromium.gpu_ci_test_results".')) |
| parser.add_argument('--sample-period', |
| type=int, |
| default=1, |
| help=('The number of days to sample data from.')) |
| parser.add_argument('--no-group-by-tags', |
| action='store_false', |
| default=True, |
| dest='group_by_tags', |
| help=('Append added expectations to the end of the file ' |
| 'instead of attempting to automatically group with ' |
| 'similar expectations.')) |
| parser.add_argument('--no-prompt-for-user-input', |
| action='store_false', |
| default=True, |
| dest='prompt_for_user_input', |
| help=('Generate expectations automatically based on ' |
| 'thresholds instead of prompting the user each ' |
| 'time. The user will still need to add associated ' |
| 'bugs to generated expectations afterwards.')) |
| parser.add_argument('--ignore-threshold', |
| type=float, |
| default=0.01, |
| help=('The fraction of failed tests under which flakes ' |
| 'will be ignored instead of having an expectation ' |
| 'added when --no-prompt-for-user-input is used.')) |
| parser.add_argument('--flaky-threshold', |
| type=float, |
| default=0.5, |
| help=('The fraction of failed tests under which flakes ' |
| 'will be marked as RetryOnFailure when ' |
| '--no-prompt-for-user-input is used. Above this, ' |
| 'failures will be marked as Failure.')) |
| parser.add_argument('--include-all-tags', |
| action='store_true', |
| default=False, |
| help=('Use all tags generated by a configuration when ' |
| 'creating an expectation rather than attempting ' |
| 'to only use the most specific one. This should ' |
| 'only need to be passed if the tags in the ' |
| 'expectation files are not ordered from least ' |
| 'specific to most specific.')) |
| args = parser.parse_args() |
| |
| if not args.prompt_for_user_input: |
| if args.ignore_threshold < 0: |
| raise ValueError('--ignore-threshold must be positive') |
| if args.flaky_threshold < 0: |
| raise ValueError('--flaky-threshold must be positive') |
| if args.flaky_threshold <= args.ignore_threshold: |
| raise ValueError( |
| '--flaky-threshold must be greater than --ignore-threshold') |
| |
| return args |
| |
| |
| def main(): |
| args = ParseArgs() |
| expectations.AssertCheckoutIsUpToDate() |
| results = queries.GetFlakyOrFailingTests(args.sample_period, args.project) |
| aggregated_results = results_module.AggregateResults(results) |
| result_output.GenerateHtmlOutputFile(aggregated_results) |
| print('If there are many instances of failed tests, that may be indicative ' |
| 'of an issue that should be handled in some other way, e.g. reverting ' |
| 'a bad CL.') |
| if args.prompt_for_user_input: |
| input('\nBeginning of user input section - press any key to continue') |
| expectations.IterateThroughResultsForUser(aggregated_results, |
| args.group_by_tags, |
| args.include_all_tags) |
| else: |
| result_counts = queries.GetResultCounts(args.sample_period, args.project) |
| expectations.IterateThroughResultsWithThresholds( |
| aggregated_results, args.group_by_tags, result_counts, |
| args.ignore_threshold, args.flaky_threshold, args.include_all_tags) |
| print('\nGenerated expectations will need to have bugs manually added.') |
| print('\nGenerated expectations likely contain conflicting tags that need to ' |
| 'be removed.') |
| |
| |
| if __name__ == '__main__': |
| main() |