testing/buildbot/scripts/upload_test_result_artifacts.py - chromium/src - Git at Google

 #!/usr/bin/env python
 # Copyright 2017 The Chromium Authors
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 """Uploads test results artifacts.

 This script takes a list of json test results files, the format of which is
 described in
 https://chromium.googlesource.com/chromium/src/+/main/docs/testing/json_test_results_format.md.
 For each file, it looks for test artifacts embedded in each test. It detects
 this by looking for the top level "artifact_type_info" key.

 The script, by default, uploads every artifact stored on the local disk (a URI
 with the 'file' scheme) to google storage.
 """

 from __future__ import print_function

 import argparse
 import copy
 import json
 import hashlib
 import os
 import shutil
 import sys
 import tempfile

 root_dir = os.path.abspath(
     os.path.join(os.path.dirname(__file__), '..', '..', '..'))
 sys.path.append(os.path.join(root_dir, 'build', 'android'))
 from pylib.utils import google_storage_helper


 def get_file_digest(filepath):
   """Get the cloud storage path for uploading a file.

   Hashes the file contents to determine the filename.
   """
   with open(filepath, 'rb') as f:
     # TODO: switch to sha256. crbug.com/787113
     m = hashlib.sha1()
     while True:
       chunk = f.read(64 * 1024)
       if not chunk:
         break
       m.update(chunk)
     return m.hexdigest()


 def get_tests(test_trie):
   """Gets all tests in this test trie.

   It detects if an entry is a test by looking for the 'expected' and 'actual'
   keys in the dictionary.

   The keys of the dictionary are tuples of the keys. A test trie like
   "foo": {
     "bar": {
       "baz": {
         "actual": "PASS",
         "expected": "PASS",
       }
     }
   }

   Would give you
   {
     ('foo', 'bar', 'baz'): {
       "actual": "PASS",
       "expected": "PASS",
     }
   }

   NOTE: If you are getting an error with a stack trace ending in this function,
   file a bug with crbug.com/new and cc martiniss@.
   """
   if not isinstance(test_trie, dict):
     raise ValueError("expected %s to be a dict" % test_trie)

   tests = {}

   for k, v in test_trie.items():
     if 'expected' in v and 'actual' in v:
       tests[(k,)] = v
     else:
       for key, val in get_tests(v).items():
         tests[(k,) + key] = val

   return tests


 def upload_directory_to_gs(local_path, bucket, gs_path, dry_run):
   if dry_run:
     print('would have uploaded %s to %s' % (local_path, gs_path))
     return

   # -m does multithreaded uploads, which is needed because we upload multiple
   # files. -r copies the whole directory.
   google_storage_helper.upload(
       gs_path, local_path, bucket, gs_args=['-m'], command_args=['-r'])


 def hash_artifacts(tests, artifact_root):
   hashed_artifacts = []
   # Sort for testing consistency.
   for test_obj in sorted(tests.values()):
     for name, location in sorted(list(test_obj.get('artifacts', {}).items()),
                                  key=lambda pair: pair[0]):
       absolute_filepath = os.path.join(artifact_root, location)
       file_digest = get_file_digest(absolute_filepath)
       # Location is set to file digest because it's relative to the google
       # storage root.
       test_obj['artifacts'][name] = file_digest
       hashed_artifacts.append((file_digest, absolute_filepath))

   return hashed_artifacts


 def prep_artifacts_for_gs_upload(hashed_artifacts, tempdir):
   for file_digest, absolute_filepath in hashed_artifacts:
     new_location = os.path.join(tempdir, file_digest)

     # Since we used content addressed hashing, the file might already exist.
     if not os.path.exists(new_location):
       shutil.copyfile(absolute_filepath, new_location)


 def upload_artifacts(data, artifact_root, dry_run, bucket):
   """Uploads artifacts to google storage.

   Args:
     * data: The test results data to upload. Assumed to include 'tests' and
       'artifact_type_info' top level keys.
     * artifact_root: The local directory where artifact locations are relative
       to.
     * dry_run: If true, this run is a test run, and no actual changes should be
       made. This includes no uploading any data to cloud storage.
   Returns:
     The test results data, with rewritten artifact locations.
   """
   local_data = copy.deepcopy(data)
   type_info = local_data['artifact_type_info']

   # Put the hashing algorithm as part of the filename, so that it's
   # easier to change the algorithm if we need to in the future.
   gs_path = 'sha1'

   tests = get_tests(local_data['tests'])
   # Do a validation pass first. Makes sure no filesystem operations occur if
   # there are invalid artifacts.
   for test_obj in tests.values():
     for artifact_name in test_obj.get('artifacts', {}):
       if artifact_name not in type_info:
         raise ValueError(
             'Artifact %r type information not present' % artifact_name)

   tempdir = tempfile.mkdtemp(prefix='upload_test_artifacts')
   try:
     hashed_artifacts = hash_artifacts(tests, artifact_root)
     prep_artifacts_for_gs_upload(hashed_artifacts, tempdir)

     # Add * to include all files in that directory.
     upload_directory_to_gs(
         os.path.join(tempdir, '*'), bucket, gs_path, dry_run)

     local_data['artifact_permanent_location'] = 'gs://%s/%s' % (
         bucket, gs_path)
     return local_data
   finally:
     shutil.rmtree(tempdir)

 def main():
   parser = argparse.ArgumentParser()
   # This would be test-result-file, but argparse doesn't translate
   # test-result-file to args.test_result_file automatically, and dest doesn't
   # seem to work on positional arguments.
   parser.add_argument('test_result_file')
   parser.add_argument('--output-file', type=os.path.realpath,
                       help='If set, the input json test results file will be'
                       ' rewritten to include new artifact location data, and'
                       ' dumped to this value.')
   parser.add_argument('-n', '--dry-run', action='store_true',
                       help='If true, this script will not upload any files, and'
                            ' will instead just print to stdout what path it'
                            ' would have uploaded each file. Useful for testing.'
                       )
   parser.add_argument('--artifact-root', required=True, type=os.path.realpath,
                       help='The file path where artifact locations are rooted.')
   parser.add_argument('--bucket', default='chromium-test-artifacts',
                       help='The google storage bucket to upload artifacts to.'
                       ' The default bucket is public and accessible by anyone.')
   parser.add_argument('-q', '--quiet', action='store_true',
                       help='If set, does not print the transformed json file'
                            ' to stdout.')

   args = parser.parse_args()

   with open(args.test_result_file) as f:
     data = json.load(f)

   type_info = data.get('artifact_type_info')
   if not type_info:
     print('File %r did not have %r top level key. Not processing.' %
           (args.test_result_file, 'artifact_type_info'))
     return 1

   new_data = upload_artifacts(
       data, args.artifact_root, args.dry_run, args.bucket)
   if args.output_file:
     with open(args.output_file, 'w') as f:
       json.dump(new_data, f)

   if new_data and not args.quiet:
     print(json.dumps(new_data, indent=2, separators=(',', ': '),
                      sort_keys=True))
   return 0

 if __name__ == '__main__':
   sys.exit(main())
	#!/usr/bin/env python
	# Copyright 2017 The Chromium Authors
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.
	"""Uploads test results artifacts.

	This script takes a list of json test results files, the format of which is
	described in
	https://chromium.googlesource.com/chromium/src/+/main/docs/testing/json_test_results_format.md.
	For each file, it looks for test artifacts embedded in each test. It detects
	this by looking for the top level "artifact_type_info" key.

	The script, by default, uploads every artifact stored on the local disk (a URI
	with the 'file' scheme) to google storage.
	"""

	from __future__ import print_function

	import argparse
	import copy
	import json
	import hashlib
	import os
	import shutil
	import sys
	import tempfile

	root_dir = os.path.abspath(
	os.path.join(os.path.dirname(__file__), '..', '..', '..'))
	sys.path.append(os.path.join(root_dir, 'build', 'android'))
	from pylib.utils import google_storage_helper


	def get_file_digest(filepath):
	"""Get the cloud storage path for uploading a file.

	Hashes the file contents to determine the filename.
	"""
	with open(filepath, 'rb') as f:
	# TODO: switch to sha256. crbug.com/787113
	m = hashlib.sha1()
	while True:
	chunk = f.read(64 * 1024)
	if not chunk:
	break
	m.update(chunk)
	return m.hexdigest()


	def get_tests(test_trie):
	"""Gets all tests in this test trie.

	It detects if an entry is a test by looking for the 'expected' and 'actual'
	keys in the dictionary.

	The keys of the dictionary are tuples of the keys. A test trie like
	"foo": {
	"bar": {
	"baz": {
	"actual": "PASS",
	"expected": "PASS",
	}
	}
	}

	Would give you
	{
	('foo', 'bar', 'baz'): {
	"actual": "PASS",
	"expected": "PASS",
	}
	}

	NOTE: If you are getting an error with a stack trace ending in this function,
	file a bug with crbug.com/new and cc martiniss@.
	"""
	if not isinstance(test_trie, dict):
	raise ValueError("expected %s to be a dict" % test_trie)

	tests = {}

	for k, v in test_trie.items():
	if 'expected' in v and 'actual' in v:
	tests[(k,)] = v
	else:
	for key, val in get_tests(v).items():
	tests[(k,) + key] = val

	return tests


	def upload_directory_to_gs(local_path, bucket, gs_path, dry_run):
	if dry_run:
	print('would have uploaded %s to %s' % (local_path, gs_path))
	return

	# -m does multithreaded uploads, which is needed because we upload multiple
	# files. -r copies the whole directory.
	google_storage_helper.upload(
	gs_path, local_path, bucket, gs_args=['-m'], command_args=['-r'])


	def hash_artifacts(tests, artifact_root):
	hashed_artifacts = []
	# Sort for testing consistency.
	for test_obj in sorted(tests.values()):
	for name, location in sorted(list(test_obj.get('artifacts', {}).items()),
	key=lambda pair: pair[0]):
	absolute_filepath = os.path.join(artifact_root, location)
	file_digest = get_file_digest(absolute_filepath)
	# Location is set to file digest because it's relative to the google
	# storage root.
	test_obj['artifacts'][name] = file_digest
	hashed_artifacts.append((file_digest, absolute_filepath))

	return hashed_artifacts


	def prep_artifacts_for_gs_upload(hashed_artifacts, tempdir):
	for file_digest, absolute_filepath in hashed_artifacts:
	new_location = os.path.join(tempdir, file_digest)

	# Since we used content addressed hashing, the file might already exist.
	if not os.path.exists(new_location):
	shutil.copyfile(absolute_filepath, new_location)


	def upload_artifacts(data, artifact_root, dry_run, bucket):
	"""Uploads artifacts to google storage.

	Args:
	* data: The test results data to upload. Assumed to include 'tests' and
	'artifact_type_info' top level keys.
	* artifact_root: The local directory where artifact locations are relative
	to.
	* dry_run: If true, this run is a test run, and no actual changes should be
	made. This includes no uploading any data to cloud storage.
	Returns:
	The test results data, with rewritten artifact locations.
	"""
	local_data = copy.deepcopy(data)
	type_info = local_data['artifact_type_info']

	# Put the hashing algorithm as part of the filename, so that it's
	# easier to change the algorithm if we need to in the future.
	gs_path = 'sha1'

	tests = get_tests(local_data['tests'])
	# Do a validation pass first. Makes sure no filesystem operations occur if
	# there are invalid artifacts.
	for test_obj in tests.values():
	for artifact_name in test_obj.get('artifacts', {}):
	if artifact_name not in type_info:
	raise ValueError(
	'Artifact %r type information not present' % artifact_name)

	tempdir = tempfile.mkdtemp(prefix='upload_test_artifacts')
	try:
	hashed_artifacts = hash_artifacts(tests, artifact_root)
	prep_artifacts_for_gs_upload(hashed_artifacts, tempdir)

	# Add * to include all files in that directory.
	upload_directory_to_gs(
	os.path.join(tempdir, '*'), bucket, gs_path, dry_run)

	local_data['artifact_permanent_location'] = 'gs://%s/%s' % (
	bucket, gs_path)
	return local_data
	finally:
	shutil.rmtree(tempdir)

	def main():
	parser = argparse.ArgumentParser()
	# This would be test-result-file, but argparse doesn't translate
	# test-result-file to args.test_result_file automatically, and dest doesn't
	# seem to work on positional arguments.
	parser.add_argument('test_result_file')
	parser.add_argument('--output-file', type=os.path.realpath,
	help='If set, the input json test results file will be'
	' rewritten to include new artifact location data, and'
	' dumped to this value.')
	parser.add_argument('-n', '--dry-run', action='store_true',
	help='If true, this script will not upload any files, and'
	' will instead just print to stdout what path it'
	' would have uploaded each file. Useful for testing.'
	)
	parser.add_argument('--artifact-root', required=True, type=os.path.realpath,
	help='The file path where artifact locations are rooted.')
	parser.add_argument('--bucket', default='chromium-test-artifacts',
	help='The google storage bucket to upload artifacts to.'
	' The default bucket is public and accessible by anyone.')
	parser.add_argument('-q', '--quiet', action='store_true',
	help='If set, does not print the transformed json file'
	' to stdout.')

	args = parser.parse_args()

	with open(args.test_result_file) as f:
	data = json.load(f)

	type_info = data.get('artifact_type_info')
	if not type_info:
	print('File %r did not have %r top level key. Not processing.' %
	(args.test_result_file, 'artifact_type_info'))
	return 1

	new_data = upload_artifacts(
	data, args.artifact_root, args.dry_run, args.bucket)
	if args.output_file:
	with open(args.output_file, 'w') as f:
	json.dump(new_data, f)

	if new_data and not args.quiet:
	print(json.dumps(new_data, indent=2, separators=(',', ': '),
	sort_keys=True))
	return 0

	if __name__ == '__main__':
	sys.exit(main())