content/test/gpu/suppress_flakes.py - chromium/src - Git at Google

 #!/usr/bin/env vpython3
 # Copyright 2021 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 """Script for finding and suppressing flaky GPU tests.

 This relies on ResultDB BigQuery data under the hood, so it requires the `bq`
 tool which is available as part of the Google Cloud SDK
 https://cloud.google.com/sdk/docs/quickstarts.

 Example usage, which finds all failures in the past 5 days. Any tests that
 failed more than twice on a configuration is marked as flaky, and any that
 failed more than 5 times is marked as failing:

 suppress_flakes.py \
   --project chrome-unexpected-pass-data \
   --sample-period 5
 """

 import argparse

 from flake_suppressor import expectations
 from flake_suppressor import queries
 from flake_suppressor import result_output
 from flake_suppressor import results as results_module


 def ParseArgs():
   # TODO(crbug.com/1192733): Add flaky and failure thresholds, likely in the
   # form of % of failures out of the total runs for a (test, tags) combination.
   # <1% can be ignored, > 50% can be treated as a failure instead of a flake.
   parser = argparse.ArgumentParser(
       description=('Script for automatically suppressing flaky/failing GPU '
                    'Telemetry-based tests.'))
   parser.add_argument('--project',
                       required=True,
                       help=('The billing project to use for BigQuery queries. '
                             'Must have access to the ResultDB BQ tables, e.g. '
                             '"chrome-luci-data.chromium.gpu_ci_test_results".'))
   parser.add_argument('--sample-period',
                       type=int,
                       default=1,
                       help=('The number of days to sample data from.'))
   parser.add_argument('--no-group-by-tags',
                       action='store_false',
                       default=True,
                       dest='group_by_tags',
                       help=('Append added expectations to the end of the file '
                             'instead of attempting to automatically group with '
                             'similar expectations.'))
   parser.add_argument('--no-prompt-for-user-input',
                       action='store_false',
                       default=True,
                       dest='prompt_for_user_input',
                       help=('Generate expectations automatically based on '
                             'thresholds instead of prompting the user each '
                             'time. The user will still need to add associated '
                             'bugs to generated expectations afterwards.'))
   parser.add_argument('--ignore-threshold',
                       type=float,
                       default=0.01,
                       help=('The fraction of failed tests under which flakes '
                             'will be ignored instead of having an expectation '
                             'added when --no-prompt-for-user-input is used.'))
   parser.add_argument('--flaky-threshold',
                       type=float,
                       default=0.5,
                       help=('The fraction of failed tests under which flakes '
                             'will be marked as RetryOnFailure when '
                             '--no-prompt-for-user-input is used. Above this, '
                             'failures will be marked as Failure.'))
   parser.add_argument('--include-all-tags',
                       action='store_true',
                       default=False,
                       help=('Use all tags generated by a configuration when '
                             'creating an expectation rather than attempting '
                             'to only use the most specific one. This should '
                             'only need to be passed if the tags in the '
                             'expectation files are not ordered from least '
                             'specific to most specific.'))
   args = parser.parse_args()

   if not args.prompt_for_user_input:
     if args.ignore_threshold < 0:
       raise ValueError('--ignore-threshold must be positive')
     if args.flaky_threshold < 0:
       raise ValueError('--flaky-threshold must be positive')
     if args.flaky_threshold <= args.ignore_threshold:
       raise ValueError(
           '--flaky-threshold must be greater than --ignore-threshold')

   return args


 def main():
   args = ParseArgs()
   expectations.AssertCheckoutIsUpToDate()
   results = queries.GetFlakyOrFailingTests(args.sample_period, args.project)
   aggregated_results = results_module.AggregateResults(results)
   result_output.GenerateHtmlOutputFile(aggregated_results)
   print('If there are many instances of failed tests, that may be indicative '
         'of an issue that should be handled in some other way, e.g. reverting '
         'a bad CL.')
   if args.prompt_for_user_input:
     input('\nBeginning of user input section - press any key to continue')
     expectations.IterateThroughResultsForUser(aggregated_results,
                                               args.group_by_tags,
                                               args.include_all_tags)
   else:
     result_counts = queries.GetResultCounts(args.sample_period, args.project)
     expectations.IterateThroughResultsWithThresholds(
         aggregated_results, args.group_by_tags, result_counts,
         args.ignore_threshold, args.flaky_threshold, args.include_all_tags)
     print('\nGenerated expectations will need to have bugs manually added.')
   print('\nGenerated expectations likely contain conflicting tags that need to '
         'be removed.')


 if __name__ == '__main__':
   main()
	#!/usr/bin/env vpython3
	# Copyright 2021 The Chromium Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.
	"""Script for finding and suppressing flaky GPU tests.

	This relies on ResultDB BigQuery data under the hood, so it requires the `bq`
	tool which is available as part of the Google Cloud SDK
	https://cloud.google.com/sdk/docs/quickstarts.

	Example usage, which finds all failures in the past 5 days. Any tests that
	failed more than twice on a configuration is marked as flaky, and any that
	failed more than 5 times is marked as failing:

	suppress_flakes.py \
	--project chrome-unexpected-pass-data \
	--sample-period 5
	"""

	import argparse

	from flake_suppressor import expectations
	from flake_suppressor import queries
	from flake_suppressor import result_output
	from flake_suppressor import results as results_module


	def ParseArgs():
	# TODO(crbug.com/1192733): Add flaky and failure thresholds, likely in the
	# form of % of failures out of the total runs for a (test, tags) combination.
	# <1% can be ignored, > 50% can be treated as a failure instead of a flake.
	parser = argparse.ArgumentParser(
	description=('Script for automatically suppressing flaky/failing GPU '
	'Telemetry-based tests.'))
	parser.add_argument('--project',
	required=True,
	help=('The billing project to use for BigQuery queries. '
	'Must have access to the ResultDB BQ tables, e.g. '
	'"chrome-luci-data.chromium.gpu_ci_test_results".'))
	parser.add_argument('--sample-period',
	type=int,
	default=1,
	help=('The number of days to sample data from.'))
	parser.add_argument('--no-group-by-tags',
	action='store_false',
	default=True,
	dest='group_by_tags',
	help=('Append added expectations to the end of the file '
	'instead of attempting to automatically group with '
	'similar expectations.'))
	parser.add_argument('--no-prompt-for-user-input',
	action='store_false',
	default=True,
	dest='prompt_for_user_input',
	help=('Generate expectations automatically based on '
	'thresholds instead of prompting the user each '
	'time. The user will still need to add associated '
	'bugs to generated expectations afterwards.'))
	parser.add_argument('--ignore-threshold',
	type=float,
	default=0.01,
	help=('The fraction of failed tests under which flakes '
	'will be ignored instead of having an expectation '
	'added when --no-prompt-for-user-input is used.'))
	parser.add_argument('--flaky-threshold',
	type=float,
	default=0.5,
	help=('The fraction of failed tests under which flakes '
	'will be marked as RetryOnFailure when '
	'--no-prompt-for-user-input is used. Above this, '
	'failures will be marked as Failure.'))
	parser.add_argument('--include-all-tags',
	action='store_true',
	default=False,
	help=('Use all tags generated by a configuration when '
	'creating an expectation rather than attempting '
	'to only use the most specific one. This should '
	'only need to be passed if the tags in the '
	'expectation files are not ordered from least '
	'specific to most specific.'))
	args = parser.parse_args()

	if not args.prompt_for_user_input:
	if args.ignore_threshold < 0:
	raise ValueError('--ignore-threshold must be positive')
	if args.flaky_threshold < 0:
	raise ValueError('--flaky-threshold must be positive')
	if args.flaky_threshold <= args.ignore_threshold:
	raise ValueError(
	'--flaky-threshold must be greater than --ignore-threshold')

	return args


	def main():
	args = ParseArgs()
	expectations.AssertCheckoutIsUpToDate()
	results = queries.GetFlakyOrFailingTests(args.sample_period, args.project)
	aggregated_results = results_module.AggregateResults(results)
	result_output.GenerateHtmlOutputFile(aggregated_results)
	print('If there are many instances of failed tests, that may be indicative '
	'of an issue that should be handled in some other way, e.g. reverting '
	'a bad CL.')
	if args.prompt_for_user_input:
	input('\nBeginning of user input section - press any key to continue')
	expectations.IterateThroughResultsForUser(aggregated_results,
	args.group_by_tags,
	args.include_all_tags)
	else:
	result_counts = queries.GetResultCounts(args.sample_period, args.project)
	expectations.IterateThroughResultsWithThresholds(
	aggregated_results, args.group_by_tags, result_counts,
	args.ignore_threshold, args.flaky_threshold, args.include_all_tags)
	print('\nGenerated expectations will need to have bugs manually added.')
	print('\nGenerated expectations likely contain conflicting tags that need to '
	'be removed.')


	if __name__ == '__main__':
	main()