[code coverage] Moving merge scripts src-side.
This is the first part of moving the code coverage merge scripts to src.
A recipe-side change will follow once this has landed.
R=martiniss,liaoyuke,sajjadm
BUG=928577
Change-Id: I213f13651a4a6440ef3fbd9e813ff7371de32ec9
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1521465
Commit-Queue: Roberto Carrillo <robertocn@chromium.org>
Reviewed-by: Stephen Martinis <martiniss@chromium.org>
Reviewed-by: Yuke Liao <liaoyuke@chromium.org>
Cr-Commit-Position: refs/heads/master@{#640613}
diff --git a/testing/merge_scripts/code_coverage/OWNERS b/testing/merge_scripts/code_coverage/OWNERS
new file mode 100644
index 0000000..c3d0dbb
--- /dev/null
+++ b/testing/merge_scripts/code_coverage/OWNERS
@@ -0,0 +1,3 @@
+liaoyuke@chromium.org
+robertocn@chromium.org
+sajjadm@chromium.org
diff --git a/testing/merge_scripts/code_coverage/merge_lib.py b/testing/merge_scripts/code_coverage/merge_lib.py
new file mode 100644
index 0000000..8e40f44
--- /dev/null
+++ b/testing/merge_scripts/code_coverage/merge_lib.py
@@ -0,0 +1,199 @@
+# Copyright 2019 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""Functions for interacting with llvm-profdata"""
+
+import logging
+import multiprocessing
+import os
+import subprocess
+
+logging.basicConfig(
+ format='[%(asctime)s %(levelname)s] %(message)s', level=logging.DEBUG)
+
+
+def _call_profdata_tool(profile_input_file_paths,
+ profile_output_file_path,
+ profdata_tool_path,
+ retries=3):
+ """Calls the llvm-profdata tool.
+
+ Args:
+ profile_input_file_paths: A list of relative paths to the files that
+ are to be merged.
+ profile_output_file_path: The path to the merged file to write.
+ profdata_tool_path: The path to the llvm-profdata executable.
+
+ Returns:
+ A list of paths to profiles that had to be excluded to get the merge to
+ succeed, suspected of being corrupted or malformed.
+
+ Raises:
+ CalledProcessError: An error occurred merging profiles.
+ """
+ logging.info('Merging profiles.')
+
+ try:
+ subprocess_cmd = [
+ profdata_tool_path, 'merge', '-o', profile_output_file_path,
+ '-sparse=true'
+ ]
+ subprocess_cmd.extend(profile_input_file_paths)
+
+ # Redirecting stderr is required because when error happens, llvm-profdata
+ # writes the error output to stderr and our error handling logic relies on
+ # that output.
+ output = subprocess.check_output(subprocess_cmd, stderr=subprocess.STDOUT)
+ logging.info('Merge succeeded with output: %r', output)
+ except subprocess.CalledProcessError as error:
+ if len(profile_input_file_paths) > 1 and retries >= 0:
+ logging.warning('Merge failed with error output: %r', error.output)
+
+ # The output of the llvm-profdata command will include the path of
+ # malformed files, such as
+ # `error: /.../default.profraw: Malformed instrumentation profile data`
+ invalid_profiles = [
+ f for f in profile_input_file_paths if f in error.output
+ ]
+
+ if not invalid_profiles:
+ logging.info(
+ 'Merge failed, but wasn\'t able to figure out the culprit invalid '
+ 'profiles from the output, so skip retry and bail out.')
+ raise error
+
+ valid_profiles = list(
+ set(profile_input_file_paths) - set(invalid_profiles))
+ if valid_profiles:
+ logging.warning(
+ 'Following invalid profiles are removed as they were mentioned in '
+ 'the merge error output: %r', invalid_profiles)
+ logging.info('Retry merging with the remaining profiles: %r',
+ valid_profiles)
+ return invalid_profiles + _call_profdata_tool(
+ valid_profiles, profile_output_file_path, profdata_tool_path,
+ retries - 1)
+
+ logging.error('Failed to merge profiles, return code (%d), output: %r' %
+ (error.returncode, error.output))
+ raise error
+
+ logging.info('Profile data is created as: "%r".', profile_output_file_path)
+ return []
+
+
+def _get_profile_paths(input_dir, input_extension):
+ """Finds all the profiles in the given directory (recursively)."""
+ paths = []
+ for dir_path, _sub_dirs, file_names in os.walk(input_dir):
+ paths.extend([
+ os.path.join(dir_path, fn)
+ for fn in file_names
+ if fn.endswith(input_extension)
+ ])
+ return paths
+
+
+def _validate_and_convert_profraws(profraw_files, profdata_tool_path):
+ """Validates and converts profraws to profdatas.
+
+ For each given .profraw file in the input, this method first validates it by
+ trying to convert it to an indexed .profdata file, and if the validation and
+ conversion succeeds, the generated .profdata file will be included in the
+ output, otherwise, won't.
+
+ This method is mainly used to filter out invalid profraw files.
+
+ Args:
+ profraw_files: A list of .profraw paths.
+ profdata_tool_path: The path to the llvm-profdata executable.
+
+ Returns:
+ A tulple:
+ A list of converted .profdata files of *valid* profraw files.
+ A list of *invalid* profraw files.
+ """
+ logging.info('Validating and converting .profraw files.')
+
+ for profraw_file in profraw_files:
+ if not profraw_file.endswith('.profraw'):
+ raise RuntimeError('%r is expected to be a .profraw file.' % profraw_file)
+
+ cpu_count = multiprocessing.cpu_count()
+ counts = max(10, cpu_count - 5) # Use 10+ processes, but leave 5 cpu cores.
+ pool = multiprocessing.Pool(counts)
+ output_profdata_files = multiprocessing.Manager().list()
+ invalid_profraw_files = multiprocessing.Manager().list()
+
+ for profraw_file in profraw_files:
+ pool.apply_async(_validate_and_convert_profraw,
+ (profraw_file, output_profdata_files,
+ invalid_profraw_files, profdata_tool_path))
+
+ pool.close()
+ pool.join()
+
+ # Remove inputs, as they won't be needed and they can be pretty large.
+ for input_file in profraw_files:
+ os.remove(input_file)
+
+ return list(output_profdata_files), list(invalid_profraw_files)
+
+
+def _validate_and_convert_profraw(profraw_file, output_profdata_files,
+ invalid_profraw_files, profdata_tool_path):
+ output_profdata_file = profraw_file.replace('.profraw', '.profdata')
+ subprocess_cmd = [
+ profdata_tool_path, 'merge', '-o', output_profdata_file, '-sparse=true',
+ profraw_file
+ ]
+
+ try:
+ # Redirecting stderr is required because when error happens, llvm-profdata
+ # writes the error output to stderr and our error handling logic relies on
+ # that output.
+ output = subprocess.check_output(subprocess_cmd, stderr=subprocess.STDOUT)
+ logging.info('Validating and converting %r to %r succeeded with output: %r',
+ profraw_file, output_profdata_file, output)
+ output_profdata_files.append(output_profdata_file)
+ except subprocess.CalledProcessError as error:
+ logging.warning('Validating and converting %r to %r failed with output: %r',
+ profraw_file, output_profdata_file, error.output)
+ invalid_profraw_files.append(profraw_file)
+
+
+def merge_profiles(input_dir, output_file, input_extension, profdata_tool_path):
+ """Merges the profiles produced by the shards using llvm-profdata.
+
+ Args:
+ input_dir (str): The path to traverse to find input profiles.
+ output_file (str): Where to write the merged profile.
+ input_extension (str): File extension to look for in the input_dir.
+ e.g. '.profdata' or '.profraw'
+ profdata_tool_path: The path to the llvm-profdata executable.
+ Returns:
+ The list of profiles that had to be excluded to get the merge to
+ succeed.
+ """
+ profile_input_file_paths = _get_profile_paths(input_dir, input_extension)
+ invalid_profraw_files = []
+ if input_extension == '.profraw':
+ profile_input_file_paths, invalid_profraw_files = (
+ _validate_and_convert_profraws(profile_input_file_paths,
+ profdata_tool_path))
+ logging.info('List of converted .profdata files: %r',
+ profile_input_file_paths)
+ logging.info((
+ 'List of invalid .profraw files that failed to validate and convert: %r'
+ ), invalid_profraw_files)
+
+ invalid_profdata_files = _call_profdata_tool(
+ profile_input_file_paths=profile_input_file_paths,
+ profile_output_file_path=output_file,
+ profdata_tool_path=profdata_tool_path)
+
+ # Remove inputs, as they won't be needed and they can be pretty large.
+ for input_file in profile_input_file_paths:
+ os.remove(input_file)
+
+ return invalid_profraw_files + invalid_profdata_files
diff --git a/testing/merge_scripts/code_coverage/merge_profiles.py b/testing/merge_scripts/code_coverage/merge_profiles.py
new file mode 100755
index 0000000..354080a2
--- /dev/null
+++ b/testing/merge_scripts/code_coverage/merge_profiles.py
@@ -0,0 +1,67 @@
+#!/usr/bin/python
+# Copyright 2019 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""This script merges code coverage profiles from multiple shards.
+
+It is functionally identical to merge_steps.py but it accepts the parameters
+passed by swarming api.
+"""
+
+import argparse
+import json
+import logging
+import os
+import sys
+
+import merge_lib as merger
+
+
+def _MergeAPIArgumentParser(*args, **kwargs):
+ """Parameters passed to this merge script, as per:
+ https://chromium.googlesource.com/chromium/tools/build/+/master/scripts/slave/recipe_modules/swarming/resources/merge_api.py
+ """
+ parser = argparse.ArgumentParser(*args, **kwargs)
+ parser.add_argument('--build-properties', help=argparse.SUPPRESS)
+ parser.add_argument('--summary-json', help=argparse.SUPPRESS)
+ parser.add_argument('--task-output-dir', help=argparse.SUPPRESS)
+ parser.add_argument(
+ '-o', '--output-json', required=True, help=argparse.SUPPRESS)
+ parser.add_argument(
+ '--profdata-dir', required=True, help='where to store the merged data')
+ parser.add_argument(
+ '--llvm-profdata', required=True, help='path to llvm-profdata executable')
+ parser.add_argument('jsons_to_merge', nargs='*', help=argparse.SUPPRESS)
+ return parser
+
+
+def main():
+ desc = "Merge profraw files in <--task-output-dir> into a single profdata."
+ parser = _MergeAPIArgumentParser(description=desc)
+ params = parser.parse_args()
+ invalid_profiles = merger.merge_profiles(
+ params.task_output_dir,
+ os.path.join(params.profdata_dir, 'default.profdata'), '.profraw',
+ params.llvm_profdata)
+ if invalid_profiles:
+ with open(os.path.join(params.profdata_dir, 'invalid_profiles.json'),
+ 'w') as f:
+ json.dump(invalid_profiles, f)
+
+ # TODO(crbug.com/921300) This script doesn't know how to merge test results,
+ # and the correct solution should be taking other merge script as inputs to
+ # perform the merge.
+ # However, to work around the issue that fuzzer test steps are red, following
+ # logic directly copy paste the output json if there is only one shard, and
+ # this strategy should work for test targets that only have one shard, such
+ # as fuzzer targets and simple gtests targets.
+ if len(params.jsons_to_merge) == 1:
+ with open(params.jsons_to_merge[0]) as f_read:
+ with open(params.output_json, 'w') as f_write:
+ f_write.write(f_read.read())
+
+
+if __name__ == '__main__':
+ logging.basicConfig(
+ format='[%(asctime)s %(levelname)s] %(message)s', level=logging.INFO)
+ sys.exit(main())
diff --git a/testing/merge_scripts/code_coverage/merge_profiles_test.py b/testing/merge_scripts/code_coverage/merge_profiles_test.py
new file mode 100755
index 0000000..5cbe5c2
--- /dev/null
+++ b/testing/merge_scripts/code_coverage/merge_profiles_test.py
@@ -0,0 +1,197 @@
+#!/usr/bin/env vpython
+# Copyright 2019 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import json
+import os
+import subprocess
+import sys
+import unittest
+
+THIS_DIR = os.path.dirname(os.path.abspath(__file__))
+sys.path.insert(
+ 0, os.path.abspath(os.path.join(THIS_DIR, os.pardir, os.pardir, os.pardir,
+ 'third_party', 'pymock')))
+
+import mock
+
+import merge_profiles
+import merge_steps
+import merge_lib as merger
+
+
+class MergeProfilesTest(unittest.TestCase):
+
+ def __init__(self, *args, **kwargs):
+ super(MergeProfilesTest, self).__init__(*args, **kwargs)
+ self.maxDiff = None
+
+ def test_merge_script_api_parameters(self):
+ """Test the step-level merge front-end."""
+ build_properties = json.dumps({
+ 'some': {
+ 'complicated': ['nested', {
+ 'json': None,
+ 'object': 'thing',
+ }]
+ }
+ })
+ task_output_dir = 'some/task/output/dir'
+ profdata_dir = '/some/different/path/to/profdata/default.profdata'
+ profdata_file = os.path.join(profdata_dir, 'default.profdata')
+ args = [
+ 'script_name', '--output-json', 'output.json', '--build-properties',
+ build_properties, '--summary-json', 'summary.json', '--task-output-dir',
+ task_output_dir, '--profdata-dir', profdata_dir, '--llvm-profdata',
+ 'llvm-profdata', 'a.json', 'b.json', 'c.json'
+ ]
+ with mock.patch.object(merger, 'merge_profiles') as mock_merge:
+ mock_merge.return_value = None
+ with mock.patch.object(sys, 'argv', args):
+ merge_profiles.main()
+ self.assertEqual(
+ mock_merge.call_args,
+ mock.call(task_output_dir, profdata_file, '.profraw',
+ 'llvm-profdata'))
+
+ def test_merge_steps_parameters(self):
+ """Test the build-level merge front-end."""
+ input_dir = 'some/task/output/dir'
+ output_file = '/some/different/path/to/profdata/merged.profdata'
+ args = [
+ 'script_name',
+ '--input-dir',
+ input_dir,
+ '--output-file',
+ output_file,
+ '--llvm-profdata',
+ 'llvm-profdata',
+ ]
+ with mock.patch.object(merger, 'merge_profiles') as mock_merge:
+ mock_merge.return_value = None
+ with mock.patch.object(sys, 'argv', args):
+ merge_steps.main()
+ self.assertEqual(
+ mock_merge.call_args,
+ mock.call(input_dir, output_file, '.profdata', 'llvm-profdata'))
+
+ @mock.patch.object(merger, '_validate_and_convert_profraws')
+ def test_merge_profraw(self, mock_validate_and_convert_profraws):
+ mock_input_dir_walk = [
+ ('/b/some/path', ['0', '1', '2', '3'], ['summary.json']),
+ ('/b/some/path/0', [],
+ ['output.json', 'default-1.profraw', 'default-2.profraw']),
+ ('/b/some/path/1', [],
+ ['output.json', 'default-1.profraw', 'default-2.profraw']),
+ ]
+
+ mock_validate_and_convert_profraws.return_value = [
+ '/b/some/path/0/default-1.profdata',
+ '/b/some/path/1/default-2.profdata',
+ ], [
+ '/b/some/path/0/default-2.profraw',
+ '/b/some/path/1/default-1.profraw',
+ ]
+
+ with mock.patch.object(os, 'walk') as mock_walk:
+ with mock.patch.object(os, 'remove'):
+ mock_walk.return_value = mock_input_dir_walk
+ with mock.patch.object(subprocess, 'check_output') as mock_exec_cmd:
+ merger.merge_profiles('/b/some/path', 'output/dir/default.profdata',
+ '.profraw', 'llvm-profdata')
+ self.assertEqual(
+ mock.call(
+ [
+ 'llvm-profdata',
+ 'merge',
+ '-o',
+ 'output/dir/default.profdata',
+ '-sparse=true',
+ '/b/some/path/0/default-1.profdata',
+ '/b/some/path/1/default-2.profdata',
+ ],
+ stderr=-2,
+ ), mock_exec_cmd.call_args)
+
+ self.assertTrue(mock_validate_and_convert_profraws.called)
+
+ @mock.patch.object(merger, '_validate_and_convert_profraws')
+ def test_merge_profdata(self, mock_validate_and_convert_profraws):
+ mock_input_dir_walk = [
+ ('/b/some/path', ['base_unittests', 'url_unittests'], ['summary.json']),
+ ('/b/some/path/base_unittests', [], ['output.json',
+ 'default.profdata']),
+ ('/b/some/path/url_unittests', [], ['output.json', 'default.profdata']),
+ ]
+ with mock.patch.object(os, 'walk') as mock_walk:
+ with mock.patch.object(os, 'remove'):
+ mock_walk.return_value = mock_input_dir_walk
+ with mock.patch.object(subprocess, 'check_output') as mock_exec_cmd:
+ merger.merge_profiles('/b/some/path', 'output/dir/default.profdata',
+ '.profdata', 'llvm-profdata')
+ self.assertEqual(
+ mock.call(
+ [
+ 'llvm-profdata',
+ 'merge',
+ '-o',
+ 'output/dir/default.profdata',
+ '-sparse=true',
+ '/b/some/path/base_unittests/default.profdata',
+ '/b/some/path/url_unittests/default.profdata',
+ ],
+ stderr=-2,
+ ), mock_exec_cmd.call_args)
+
+ # The mock method should only apply when merging .profraw files.
+ self.assertFalse(mock_validate_and_convert_profraws.called)
+
+ def test_retry_profdata_merge_failures(self):
+ mock_input_dir_walk = [
+ ('/b/some/path', ['0', '1'], ['summary.json']),
+ ('/b/some/path/0', [],
+ ['output.json', 'default-1.profdata', 'default-2.profdata']),
+ ('/b/some/path/1', [],
+ ['output.json', 'default-1.profdata', 'default-2.profdata']),
+ ]
+ with mock.patch.object(os, 'walk') as mock_walk:
+ with mock.patch.object(os, 'remove'):
+ mock_walk.return_value = mock_input_dir_walk
+ with mock.patch.object(subprocess, 'check_output') as mock_exec_cmd:
+ invalid_profiles_msg = (
+ 'error: /b/some/path/0/default-1.profdata: Malformed '
+ 'instrumentation profile data.')
+
+ # Failed on the first merge, but succeed on the second attempt.
+ mock_exec_cmd.side_effect = [
+ subprocess.CalledProcessError(
+ returncode=1, cmd='dummy cmd', output=invalid_profiles_msg),
+ None
+ ]
+
+ merger.merge_profiles('/b/some/path', 'output/dir/default.profdata',
+ '.profdata', 'llvm-profdata')
+
+ self.assertEqual(2, mock_exec_cmd.call_count)
+
+ # Note that in the second call, /b/some/path/0/default-1.profdata is
+ # excluded!
+ self.assertEqual(
+ mock.call(
+ [
+ 'llvm-profdata',
+ 'merge',
+ '-o',
+ 'output/dir/default.profdata',
+ '-sparse=true',
+ '/b/some/path/0/default-2.profdata',
+ '/b/some/path/1/default-1.profdata',
+ '/b/some/path/1/default-2.profdata',
+ ],
+ stderr=-2,
+ ), mock_exec_cmd.call_args)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/testing/merge_scripts/code_coverage/merge_steps.py b/testing/merge_scripts/code_coverage/merge_steps.py
new file mode 100755
index 0000000..c0d5d4e
--- /dev/null
+++ b/testing/merge_scripts/code_coverage/merge_steps.py
@@ -0,0 +1,33 @@
+#!/usr/bin/python
+# Copyright 2019 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""This script merges code coverage profiles from multiple steps."""
+
+import argparse
+import os
+import sys
+
+import merge_lib as merger
+
+
+def _merge_steps_argument_parser(*args, **kwargs):
+ parser = argparse.ArgumentParser(*args, **kwargs)
+ parser.add_argument('--input-dir', required=True, help=argparse.SUPPRESS)
+ parser.add_argument(
+ '--output-file', required=True, help='where to store the merged data')
+ parser.add_argument(
+ '--llvm-profdata', required=True, help='path to llvm-profdata executable')
+ return parser
+
+
+def main():
+ desc = "Merge profdata files in <--input-dir> into a single profdata."
+ parser = _merge_steps_argument_parser(description=desc)
+ params = parser.parse_args()
+ merger.merge_profiles(params.input_dir, params.output_file, '.profdata',
+ params.llvm_profdata)
+
+
+if __name__ == '__main__':
+ sys.exit(main())