content/test/gpu/trim_culprit_cls.py - chromium/src - Git at Google

 #!/usr/bin/env vpython3
 # Copyright 2020 The Chromium Authors
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 """
 Script for determining which CLs in a blamelist ran on a certain trybot.

 There are cases where CLs can be absolved of a CI failure if they ran on a
 similar trybot before being submitted. This CL will go through each CL in a
 given blamelist and determine whether they ran on a specified trybot or not.

 This script depends on the `bq` tool, which is available as part of the Google
 Cloud SDK https://cloud.google.com/sdk/docs/quickstarts.

 Example usage:

 trim_culprit_cls.py \
   --start-revision <first/oldest revision in the blamelist> \
   --end-revision <last/newest revision in the blamelist> \
   --trybot <optional trybot name> \
   --project <billing project>

 Concrete example:

 trim_culprit_cls.py \
   --start-revision 1cdf916d194215f1e4139f295e494fc1c1863c3c \
   --end-revision 9aa31419100be8d0f02708a500aaed7c33a53a10 \
   --trybot win_optional_gpu_tests_rel \
   --project chromium-swarm

 The --project argument can be any project you are associated with in the
 Google Cloud console https://console.cloud.google.com/ (see drop-down menu in
 the top left corner).
 """

 from __future__ import print_function

 import argparse
 import json
 import re
 import subprocess

 # pylint: disable=line-too-long
 # Schemas:
 # - go/buildbucket-bq and go/buildbucket-proto/build.proto
 # - go/luci/cq/bq and
 #   https://source.chromium.org/chromium/infra/infra/+/main:go/src/go.chromium.org/luci/cv/api/bigquery/v1/attempt.proto
 #
 # Original author: maruel@
 QUERY_TEMPLATE = """\
 WITH cq_builds AS (
   SELECT
     build.id,
     build.critical,
     start_time,
     TIMESTAMP_DIFF(end_time, start_time, SECOND) AS duration,
     cl.change,
     cl.patchset
   FROM `commit-queue.chromium.attempts` CROSS JOIN UNNEST(builds) AS build CROSS JOIN UNNEST(gerrit_changes) AS cl
   WHERE
     cl.host = 'chromium-review.googlesource.com'
     AND cl.project = 'chromium/src'
     AND cl.change = {cl_number}
 ),

 builds AS (
   SELECT
     patchset,
     bb.builder.project||'/'||bb.builder.bucket||'/'||bb.builder.builder AS builder,
     'ci.chromium.org/b/'||bb.id AS url,
     cq.critical,
     bb.status,
     cq.start_time,
     duration
   FROM cq_builds AS cq INNER JOIN `cr-buildbucket.chromium.builds` AS bb ON cq.id = bb.id
   WHERE
     # Performance optimization.
     bb.create_time >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 30 DAY)
 )

 SELECT * FROM builds ORDER BY patchset DESC, critical, builder, start_time
 """
 # pylint: enable=line-too-long

 GERRIT_URL_REGEX = re.compile(r'^\s*Reviewed-on: (?P<gerrit_url>.*)$',
                               re.MULTILINE)


 class ChangeList():
   """Class for storing relevant information for a CL."""

   def __init__(self):
     self.revision = None
     self.gerrit_url = None
     self._cl_number = None
     self.largest_patchset = None
     self.ran_trybot = None

   @property
   def cl_number(self):
     assert self.gerrit_url
     if not self._cl_number:
       self._cl_number = self.gerrit_url.split('/')[-1]
     return self._cl_number

   def __str__(self):
     assert self.revision is not None
     assert self.gerrit_url is not None
     assert self.largest_patchset is not None
     assert self.ran_trybot is not None
     s = f'{self.revision} ({self.gerrit_url})'
     if not self.ran_trybot:
       s += ' <<<< Did not run trybot'
     return s


 def QueryTrybotsForCl(cl_number, project):
   """Queries BigQuery for the tryjobs run for a CL.

   Args:
     cl_number: An int or string containing the CL number to query.
     project: A string containing the billing project to use for queries.

   Returns:
     A list of dicts, each entry containing data for one trybot run.
   """
   query = QUERY_TEMPLATE.format(cl_number=cl_number)

   cmd = [
       'bq',
       'query',
       '--format=json',
       f'--project_id={project}',
       '--max_rows=500',
       '--use_legacy_sql=false',
       query,
   ]
   with open('/dev/null', 'w', encoding='utf-8') as devnull:
     stdout = subprocess.check_output(cmd, stderr=devnull)
   return json.loads(stdout)


 def FillTrybotRuns(blamelist, trybot, project):
   """Fills the trybot data for the entries in |blamelist|

   Args:
     blamelist: A list of ChangeList objects with their gerrit_url fields filled.
     trybot: A string containing the name of the trybot to check for.
     project: A string containing the billing project to use for queries.
   """
   total_cls = len(blamelist)
   for i, entry in enumerate(blamelist):
     print(f'Getting data for CL {i + 1}/{total_cls}')
     largest_patchset = 0
     all_trybots = QueryTrybotsForCl(entry.cl_number, project)
     assert all_trybots
     # Query orders results by patchset, ensuring that we get relevant results
     # even if the number of tryjobs exceeds the row limit, but loading the JSON
     # into a dict doesn't preserve ordering, so find the largest patchset now.
     for tryjob in all_trybots:
       patchset = int(tryjob['patchset'])
       largest_patchset = max(largest_patchset, patchset)
     entry.largest_patchset = largest_patchset

     for tryjob in all_trybots:
       if largest_patchset != int(tryjob['patchset']):
         continue
       # 'builder' field is in the form project/bucket/builder, e.g.
       # chromium/try/android-marshmallow-arm64-rel
       if trybot == tryjob['builder'].split('/')[-1]:
         entry.ran_trybot = True
         break
     if entry.ran_trybot is None:
       entry.ran_trybot = False


 def FillGerritUrls(blamelist):
   """Fills the Gerrit URLs for the entries in |blamelist|

   Args:
     blamelist: A list of ChangeList objects with their revision fields filled.
   """
   cmd_template = [
       'git',
       'show',
       '--name-only',
   ]
   for entry in blamelist:
     assert entry.revision
     stdout = subprocess.check_output(cmd_template + [entry.revision],
                                      stderr=subprocess.STDOUT)
     match = GERRIT_URL_REGEX.search(stdout)
     assert match
     entry.gerrit_url = match.groupdict()['gerrit_url']
     assert entry.gerrit_url


 def GetBlamelist(start_revision, end_revision):
   """Gets a revision blamelist between the two given revisions.

   Args:
     start_revision: A string containing the earliest revision in the blamelist.
     end_revision: A string containing the latest revision in the blamelist.

   Returns:
     A list of ChangeList objects with their revision fields filled in, each
     corresponding to a revision in the blamelist. The first entry is the
     latest in the blamelist.
   """
   cmd = [
       'git',
       'log',
       '--pretty=oneline',
       f'{start_revision}~1..{end_revision}',
   ]
   stdout = subprocess.check_output(cmd, stderr=subprocess.STDOUT)

   blamelist = []
   for line in stdout.splitlines():
     cl = ChangeList()
     cl.revision = line.split()[0]
     blamelist.append(cl)
   return blamelist


 def ParseArgs():
   parser = argparse.ArgumentParser(
       description='Script to determine which CLs in a blamelist did not run a '
       'particular trybot.')
   parser.add_argument('--start-revision',
                       required=True,
                       help='The earliest revision in the blamelist.')
   parser.add_argument('--end-revision',
                       required=True,
                       help='The latest revision in the blamelist.')
   parser.add_argument('--project',
                       required=True,
                       help='A billing project to use for queries.')
   parser.add_argument('--trybot',
                       required=True,
                       help='The name of the trybot to look for.')
   return parser.parse_args()


 def main():
   args = ParseArgs()
   blamelist = GetBlamelist(args.start_revision, args.end_revision)
   FillGerritUrls(blamelist)
   FillTrybotRuns(blamelist, args.trybot, args.project)
   print('\n\nBlamelist (latest first):\n')
   for entry in blamelist:
     print(entry)


 if __name__ == '__main__':
   main()
	#!/usr/bin/env vpython3
	# Copyright 2020 The Chromium Authors
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.
	"""
	Script for determining which CLs in a blamelist ran on a certain trybot.

	There are cases where CLs can be absolved of a CI failure if they ran on a
	similar trybot before being submitted. This CL will go through each CL in a
	given blamelist and determine whether they ran on a specified trybot or not.

	This script depends on the `bq` tool, which is available as part of the Google
	Cloud SDK https://cloud.google.com/sdk/docs/quickstarts.

	Example usage:

	trim_culprit_cls.py \
	--start-revision <first/oldest revision in the blamelist> \
	--end-revision <last/newest revision in the blamelist> \
	--trybot <optional trybot name> \
	--project <billing project>

	Concrete example:

	trim_culprit_cls.py \
	--start-revision 1cdf916d194215f1e4139f295e494fc1c1863c3c \
	--end-revision 9aa31419100be8d0f02708a500aaed7c33a53a10 \
	--trybot win_optional_gpu_tests_rel \
	--project chromium-swarm

	The --project argument can be any project you are associated with in the
	Google Cloud console https://console.cloud.google.com/ (see drop-down menu in
	the top left corner).
	"""

	from __future__ import print_function

	import argparse
	import json
	import re
	import subprocess

	# pylint: disable=line-too-long
	# Schemas:
	# - go/buildbucket-bq and go/buildbucket-proto/build.proto
	# - go/luci/cq/bq and
	# https://source.chromium.org/chromium/infra/infra/+/main:go/src/go.chromium.org/luci/cv/api/bigquery/v1/attempt.proto
	#
	# Original author: maruel@
	QUERY_TEMPLATE = """\
	WITH cq_builds AS (
	SELECT
	build.id,
	build.critical,
	start_time,
	TIMESTAMP_DIFF(end_time, start_time, SECOND) AS duration,
	cl.change,
	cl.patchset
	FROM `commit-queue.chromium.attempts` CROSS JOIN UNNEST(builds) AS build CROSS JOIN UNNEST(gerrit_changes) AS cl
	WHERE
	cl.host = 'chromium-review.googlesource.com'
	AND cl.project = 'chromium/src'
	AND cl.change = {cl_number}
	),

	builds AS (
	SELECT
	patchset,
	bb.builder.project\|\|'/'\|\|bb.builder.bucket\|\|'/'\|\|bb.builder.builder AS builder,
	'ci.chromium.org/b/'\|\|bb.id AS url,
	cq.critical,
	bb.status,
	cq.start_time,
	duration
	FROM cq_builds AS cq INNER JOIN `cr-buildbucket.chromium.builds` AS bb ON cq.id = bb.id
	WHERE
	# Performance optimization.
	bb.create_time >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 30 DAY)
	)

	SELECT * FROM builds ORDER BY patchset DESC, critical, builder, start_time
	"""
	# pylint: enable=line-too-long

	GERRIT_URL_REGEX = re.compile(r'^\sReviewed-on: (?P<gerrit_url>.)$',
	re.MULTILINE)


	class ChangeList():
	"""Class for storing relevant information for a CL."""

	def __init__(self):
	self.revision = None
	self.gerrit_url = None
	self._cl_number = None
	self.largest_patchset = None
	self.ran_trybot = None

	@property
	def cl_number(self):
	assert self.gerrit_url
	if not self._cl_number:
	self._cl_number = self.gerrit_url.split('/')[-1]
	return self._cl_number

	def __str__(self):
	assert self.revision is not None
	assert self.gerrit_url is not None
	assert self.largest_patchset is not None
	assert self.ran_trybot is not None
	s = f'{self.revision} ({self.gerrit_url})'
	if not self.ran_trybot:
	s += ' <<<< Did not run trybot'
	return s


	def QueryTrybotsForCl(cl_number, project):
	"""Queries BigQuery for the tryjobs run for a CL.

	Args:
	cl_number: An int or string containing the CL number to query.
	project: A string containing the billing project to use for queries.

	Returns:
	A list of dicts, each entry containing data for one trybot run.
	"""
	query = QUERY_TEMPLATE.format(cl_number=cl_number)

	cmd = [
	'bq',
	'query',
	'--format=json',
	f'--project_id={project}',
	'--max_rows=500',
	'--use_legacy_sql=false',
	query,
	]
	with open('/dev/null', 'w', encoding='utf-8') as devnull:
	stdout = subprocess.check_output(cmd, stderr=devnull)
	return json.loads(stdout)


	def FillTrybotRuns(blamelist, trybot, project):
	"""Fills the trybot data for the entries in \|blamelist\|

	Args:
	blamelist: A list of ChangeList objects with their gerrit_url fields filled.
	trybot: A string containing the name of the trybot to check for.
	project: A string containing the billing project to use for queries.
	"""
	total_cls = len(blamelist)
	for i, entry in enumerate(blamelist):
	print(f'Getting data for CL {i + 1}/{total_cls}')
	largest_patchset = 0
	all_trybots = QueryTrybotsForCl(entry.cl_number, project)
	assert all_trybots
	# Query orders results by patchset, ensuring that we get relevant results
	# even if the number of tryjobs exceeds the row limit, but loading the JSON
	# into a dict doesn't preserve ordering, so find the largest patchset now.
	for tryjob in all_trybots:
	patchset = int(tryjob['patchset'])
	largest_patchset = max(largest_patchset, patchset)
	entry.largest_patchset = largest_patchset

	for tryjob in all_trybots:
	if largest_patchset != int(tryjob['patchset']):
	continue
	# 'builder' field is in the form project/bucket/builder, e.g.
	# chromium/try/android-marshmallow-arm64-rel
	if trybot == tryjob['builder'].split('/')[-1]:
	entry.ran_trybot = True
	break
	if entry.ran_trybot is None:
	entry.ran_trybot = False


	def FillGerritUrls(blamelist):
	"""Fills the Gerrit URLs for the entries in \|blamelist\|

	Args:
	blamelist: A list of ChangeList objects with their revision fields filled.
	"""
	cmd_template = [
	'git',
	'show',
	'--name-only',
	]
	for entry in blamelist:
	assert entry.revision
	stdout = subprocess.check_output(cmd_template + [entry.revision],
	stderr=subprocess.STDOUT)
	match = GERRIT_URL_REGEX.search(stdout)
	assert match
	entry.gerrit_url = match.groupdict()['gerrit_url']
	assert entry.gerrit_url


	def GetBlamelist(start_revision, end_revision):
	"""Gets a revision blamelist between the two given revisions.

	Args:
	start_revision: A string containing the earliest revision in the blamelist.
	end_revision: A string containing the latest revision in the blamelist.

	Returns:
	A list of ChangeList objects with their revision fields filled in, each
	corresponding to a revision in the blamelist. The first entry is the
	latest in the blamelist.
	"""
	cmd = [
	'git',
	'log',
	'--pretty=oneline',
	f'{start_revision}~1..{end_revision}',
	]
	stdout = subprocess.check_output(cmd, stderr=subprocess.STDOUT)

	blamelist = []
	for line in stdout.splitlines():
	cl = ChangeList()
	cl.revision = line.split()[0]
	blamelist.append(cl)
	return blamelist


	def ParseArgs():
	parser = argparse.ArgumentParser(
	description='Script to determine which CLs in a blamelist did not run a '
	'particular trybot.')
	parser.add_argument('--start-revision',
	required=True,
	help='The earliest revision in the blamelist.')
	parser.add_argument('--end-revision',
	required=True,
	help='The latest revision in the blamelist.')
	parser.add_argument('--project',
	required=True,
	help='A billing project to use for queries.')
	parser.add_argument('--trybot',
	required=True,
	help='The name of the trybot to look for.')
	return parser.parse_args()


	def main():
	args = ParseArgs()
	blamelist = GetBlamelist(args.start_revision, args.end_revision)
	FillGerritUrls(blamelist)
	FillTrybotRuns(blamelist, args.trybot, args.project)
	print('\n\nBlamelist (latest first):\n')
	for entry in blamelist:
	print(entry)


	if __name__ == '__main__':
	main()