Reland "Generate and upload separate coverage data for each target in a build."
Changed the name of the upload directory for per-target metadata so that
it doesn't conflict with where the merged metadata is uploaded.
Previous commit description:
Generate and upload separate coverage data for each target in a build.
This requires us to run llvm-cov on the profdata for each test target.
The code coverage service will aggregate the per-target data to make
reports.
Temporarily skipping this for fuzzers so that the builds don't hit the
20 hr time limit on swarming
Bug: 948885
Change-Id: I17794b6a9aac537b54a2c0c723545d73c37b5f68
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/build/+/1610557
Reviewed-by: Yuke Liao <liaoyuke@chromium.org>
Commit-Queue: Sajjad Mirza <sajjadm@chromium.org>
diff --git a/scripts/slave/recipe_modules/clang_coverage/api.py b/scripts/slave/recipe_modules/clang_coverage/api.py
index d4efc23..e7d13fe 100644
--- a/scripts/slave/recipe_modules/clang_coverage/api.py
+++ b/scripts/slave/recipe_modules/clang_coverage/api.py
@@ -50,6 +50,8 @@
# The location of the scripts used to merge code coverage data (as opposed
# to test results).
self._merge_scripts_location = None
+ # File to store the git revisions of each source file
+ self._revision_cache = None
@staticmethod
def _dir_name_for_step(step_name):
@@ -115,6 +117,13 @@
self._metadata_dir = self.m.path.mkdtemp()
return self._metadata_dir
+ @property
+ def generate_individual_metadata(self):
+ """True if we should generate per-target metadata for this build."""
+ return (not self._is_per_cl_coverage and
+ self.m.buildbucket.build.builder.builder in (
+ 'linux-code-coverage', 'linux-code-coverage-dev'))
+
def _get_source_exclusion_pattern(self):
if 'exclude_sources' in self.m.properties:
return _EXCLUDE_SOURCES.get(self.m.properties['exclude_sources'])
@@ -269,6 +278,12 @@
return
try:
+ if self.generate_individual_metadata:
+ with self.m.step.nest('Generate per-target metadata'):
+ if not self._revision_cache:
+ self._revision_cache = self.m.path.mkstemp()
+ self._generate_and_upload_individual_metadata(tests)
+
merged_profdata = self._merge_profdata()
self._surface_merging_errors()
binaries = self._get_binaries(tests)
@@ -279,14 +294,46 @@
self.m.python.succeeding_step(
'skip processing coverage data because no data is found', '')
return
-
- self._generate_and_upload_metadata(binaries, merged_profdata)
+ suffix = ' for %d targets' % len(self._profdata_dirs)
+ self._generate_and_upload_metadata(
+ binaries, merged_profdata, step_name_suffix=suffix)
self._generate_and_upload_html_report_on_trybot(binaries, merged_profdata)
except self.m.step.StepFailure:
self.m.step.active_result.presentation.properties[
'process_coverage_data_failure'] = True
raise
+ def _generate_and_upload_individual_metadata(self, tests):
+ step = self.m.step.active_result
+ for test in tests:
+ # Use the name of the test to look up the profdata file for that step.
+ # This only works for full-codebase coverage. In per-CL coverage the test
+ # name isn't the same as the step name, so the directory won't match.
+ name = test.name
+ # Skip fuzzer targets because they make the build take too long.
+ # TODO(crbug.com/948885) Remove this check when we speed up the build.
+ if name.endswith('fuzzer'):
+ continue
+ if name not in self._profdata_dirs: # pragma: no cover.
+ step.presentation.step_text += (
+ 'No profdata dir for test name: %s' % name)
+ continue
+ profdata = self._profdata_dirs[name].join("default.profdata")
+ binaries = self._get_binaries([test])
+ if not binaries:
+ step.presentation.step_text += "No binaries for test name: %s" % name
+ continue
+ gs_path = self._compose_gs_path_for_coverage_data(
+ 'target_metadata/%s' % name)
+ out_dir = self.m.path.mkdtemp()
+ self.m.file.ensure_directory('ensure metadata dir for %s' % name, out_dir)
+ self._generate_and_upload_metadata(
+ binaries,
+ profdata,
+ out_dir,
+ gs_path,
+ step_name_suffix=' for %s' % name)
+
def _merge_profdata(self):
"""Merges the profdata generated by each step to a single profdata."""
merged_profdata = self.profdata_dir().join('merged.profdata')
@@ -422,8 +469,9 @@
additional_merge['script'],
])
new_merge['args'].extend([
- '--additional-merge-script-args', self.m.json.dumps(
- additional_merge['args'])])
+ '--additional-merge-script-args',
+ self.m.json.dumps(additional_merge['args'])
+ ])
return new_merge
@@ -467,13 +515,20 @@
stdout=self.m.raw_io.output_text(add_output_log=True))
return component_mapping
- def _generate_and_upload_metadata(self, binaries, profdata_path):
+ def _generate_and_upload_metadata(self,
+ binaries,
+ profdata_path,
+ output_dir=None,
+ gs_path=None,
+ step_name_suffix=''):
"""Generates the coverage info in metadata format."""
+ output_dir = output_dir or self.metadata_dir
+ gs_path = gs_path or self._compose_gs_path_for_coverage_data('metadata')
args = [
'--src-path',
self.m.path['checkout'],
'--output-dir',
- self.metadata_dir,
+ output_dir,
'--profdata-path',
profdata_path,
'--llvm-cov',
@@ -481,6 +536,8 @@
'--binaries',
]
args.extend(binaries)
+ if self._revision_cache:
+ args.extend(['--revision-cache-path', self._revision_cache])
if self._is_per_cl_coverage:
args.append('--sources')
args.extend([
@@ -495,7 +552,7 @@
self._affected_source_files)
args.extend([
'--diff-mapping-path',
- self.metadata_dir.join(_BOT_TO_GERRIT_LINE_NUM_MAPPING_FILE_NAME)
+ output_dir.join(_BOT_TO_GERRIT_LINE_NUM_MAPPING_FILE_NAME)
])
else:
pattern = self._get_source_exclusion_pattern()
@@ -508,20 +565,19 @@
try:
self.m.python(
- 'generate metadata for %d targets' % len(self._profdata_dirs),
+ 'generate metadata' + step_name_suffix,
self.resource('generate_coverage_metadata.py'),
args=args,
venv=True)
finally:
- gs_path = self._compose_gs_path_for_coverage_data('metadata')
upload_step = self.m.gsutil.upload(
- self.metadata_dir,
+ output_dir,
_BUCKET_NAME,
gs_path,
link_name=None,
args=['-r'],
multithreaded=True,
- name='upload metadata')
+ name='upload metadata' + step_name_suffix)
upload_step.presentation.links['metadata report'] = (
'https://storage.cloud.google.com/%s/%s/index.html' % (_BUCKET_NAME,
gs_path))
diff --git a/scripts/slave/recipe_modules/clang_coverage/resources/generate_coverage_metadata.py b/scripts/slave/recipe_modules/clang_coverage/resources/generate_coverage_metadata.py
index 796e011..53a7468 100644
--- a/scripts/slave/recipe_modules/clang_coverage/resources/generate_coverage_metadata.py
+++ b/scripts/slave/recipe_modules/clang_coverage/resources/generate_coverage_metadata.py
@@ -632,9 +632,62 @@
return compressed_data
-def _generate_metadata(src_path, output_dir, profdata_path, llvm_cov_path,
- binaries, component_mapping, sources, diff_mapping,
- exclusions):
+class RevisionCache(dict):
+ """Dict-like class for storing git metadata for a file.
+
+ Can be used in a with statement to automatically load and save contents to
+ the backing file.
+
+ Since this script is invoked once for each target in a coverage build,
+ computing the git revision for a file that is present in multiple targets adds
+ significant overhead. The purpose of this class is to store that information
+ across multiple invocations of this script, which reduces the time required to
+ do the build.
+
+ Args:
+ cache_file: Path to a file to save revision data in. If cache_file is false-
+ like, RevisionCache won't try to load or save data.
+ """
+
+ def __init__(self, cache_file):
+ super(RevisionCache, self).__init__()
+ self._cache_file = cache_file
+
+ def __enter__(self):
+ self.Load()
+ return self
+
+ def __exit__(self, exc_type, exc_value, traceback):
+ self.Save()
+
+ def Load(self):
+ if self._cache_file:
+ try:
+ with open(self._cache_file) as f:
+ self.update(json.load(f))
+ except (IOError, ValueError):
+ logging.warning('Revision cache invalid, starting with empty cache')
+
+ def Save(self):
+ if self._cache_file:
+ try:
+ with open(self._cache_file, 'w') as f:
+ json.dump(self, f)
+ except IOError:
+ logging.warning('Unable to write to revision cache')
+ raise
+
+
+def _generate_metadata(src_path,
+ output_dir,
+ profdata_path,
+ llvm_cov_path,
+ binaries,
+ component_mapping,
+ sources,
+ diff_mapping,
+ exclusions,
+ file_git_metadata=None):
"""Generates code coverage metadata.
Args:
@@ -651,6 +704,9 @@
diff_mapping: A json object that stores the diff mapping. Only meaningful to
per-cl coverage.
exclusions: A regex string to exclude matches from aggregation.
+ file_git_metadata: A dict-like object that maps file path to a tuple of
+ (revision, timestamp). Only meaningful to full-repo
+ coverage.
Returns:
None. This method doesn't return anything, instead, it writes the produced
@@ -665,23 +721,33 @@
'Generating & loading coverage metadata with "llvm-cov export" '
'took %.0f minutes', minutes)
- file_git_metadata = {}
if not diff_mapping:
logging.info('Retrieving file git metadata...')
start_time = time.time()
- all_files = []
+ cache_hit_count = 0
+ if file_git_metadata is None:
+ file_git_metadata = {}
+ else:
+ logging.info('Starting with %d entries in cache', len(file_git_metadata))
+ files_to_check = []
for datum in data['data']:
for file_data in datum['files']:
filename = file_data['filename']
src_file = os.path.relpath(filename, src_path)
if not src_file.startswith('//'):
src_file = '//' + src_file # Prefix the file path with '//'.
- all_files.append(src_file)
- file_git_metadata = repository_util.GetFileRevisions(
- src_path, 'DEPS', all_files)
+ if src_file in file_git_metadata:
+ cache_hit_count += 1
+ else:
+ files_to_check.append(src_file)
+ new_git_metadata = repository_util.GetFileRevisions(src_path, 'DEPS',
+ files_to_check)
+ file_git_metadata.update(new_git_metadata)
minutes = (time.time() - start_time) / 60
logging.info('Retrieving git metadata for %d files took %.0f minutes',
- len(all_files), minutes)
+ len(files_to_check), minutes)
+ if cache_hit_count:
+ logging.info('Found %d cached file revisions', cache_hit_count)
logging.info('Processing coverage data ...')
start_time = time.time()
@@ -700,7 +766,9 @@
file_path = '//' + file_path # Prefix the file path with '//'.
record['path'] = file_path
- git_metadata = file_git_metadata.get(file_path)
+ git_metadata = None
+ if file_git_metadata:
+ git_metadata = file_git_metadata.get(file_path)
if git_metadata:
record['revision'] = git_metadata[0]
record['timestamp'] = git_metadata[1]
@@ -787,6 +855,10 @@
'--exclusion-pattern',
type=str,
help='regex pattern for sources to exclude from aggregation')
+ parser.add_argument(
+ '--revision-cache-path',
+ type=str,
+ help='file containing git revisions for each source file')
return parser.parse_args(args=args)
@@ -832,10 +904,12 @@
'Either component_mapping (for full-repo coverage) or diff_mapping '
'(for per-cl coverage) must be specified.')
- compressed_data = _generate_metadata(
- params.src_path, params.output_dir, params.profdata_path, params.llvm_cov,
- params.binaries, component_mapping, abs_sources, diff_mapping,
- params.exclusion_pattern)
+ # Revision cache is only used in full-codebase coverage
+ with RevisionCache(params.revision_cache_path) as rev_cache:
+ compressed_data = _generate_metadata(
+ params.src_path, params.output_dir, params.profdata_path,
+ params.llvm_cov, params.binaries, component_mapping, abs_sources,
+ diff_mapping, params.exclusion_pattern, rev_cache)
with open(os.path.join(params.output_dir, 'all.json.gz'), 'w') as f:
f.write(zlib.compress(json.dumps(compressed_data)))
diff --git a/scripts/slave/recipe_modules/clang_coverage/tests/full.py b/scripts/slave/recipe_modules/clang_coverage/tests/full.py
index 5399e09..50d8b96 100644
--- a/scripts/slave/recipe_modules/clang_coverage/tests/full.py
+++ b/scripts/slave/recipe_modules/clang_coverage/tests/full.py
@@ -4,7 +4,6 @@
from recipe_engine import post_process
-
DEPS = [
'chromium_tests',
'clang_coverage',
@@ -16,7 +15,6 @@
'recipe_engine/step',
]
-
# Number of tests. Needed by the tests.
_NUM_TARGETS = 7
@@ -41,12 +39,14 @@
api.chromium_tests.steps.SwarmingGTestTest('gl_unittests_ozone'),
api.chromium_tests.steps.SwarmingIsolatedScriptTest('abc_fuzzer'),
api.chromium_tests.steps.SwarmingIsolatedScriptTest(
- 'blink_web_tests', merge={
- 'script': api.path['start_dir'].join(
- 'coverage', 'tests', 'merge_blink_web_tests.py'),
+ 'blink_web_tests',
+ merge={
+ 'script':
+ api.path['start_dir'].join('coverage', 'tests',
+ 'merge_blink_web_tests.py'),
'args': ['random', 'args'],
})
- ]
+ ]
assert _NUM_TARGETS == len(tests)
for test in tests:
@@ -54,8 +54,8 @@
api.clang_coverage.profdata_dir(step)
# Protected access ok here, as this is normally done by the test object
# itself.
- api.clang_coverage.shard_merge(step, additional_merge=getattr(
- test, '_merge', None))
+ api.clang_coverage.shard_merge(
+ step, additional_merge=getattr(test, '_merge', None))
api.clang_coverage.process_coverage_data(tests)
@@ -64,6 +64,7 @@
_ = api.clang_coverage.raw_profile_merge_script
+# yapf: disable
def GenTests(api):
yield (
api.test('basic')
@@ -89,7 +90,8 @@
post_process.MustRun,
'generate metadata for %s targets' % _NUM_TARGETS)
+ api.post_process(
- post_process.MustRun, 'gsutil upload metadata')
+ post_process.MustRun,
+ 'gsutil upload metadata for %s targets' % _NUM_TARGETS)
+ api.post_process(
post_process.DoesNotRun,
'generate html report for %s targets' % _NUM_TARGETS)
@@ -130,7 +132,8 @@
post_process.MustRun,
'generate metadata for %s targets' % _NUM_TARGETS)
+ api.post_process(
- post_process.MustRun, 'gsutil upload metadata')
+ post_process.MustRun,
+ 'gsutil upload metadata for %s targets' % _NUM_TARGETS)
+ api.post_process(
post_process.DoesNotRun,
'generate html report for %s targets' % _NUM_TARGETS)
@@ -187,7 +190,8 @@
post_process.MustRun,
'generate metadata for %s targets' % _NUM_TARGETS)
+ api.post_process(
- post_process.MustRun, 'gsutil upload metadata')
+ post_process.MustRun,
+ 'gsutil upload metadata for %s targets' % _NUM_TARGETS)
+ api.post_process(post_process.StatusSuccess)
+ api.post_process(post_process.DropExpectation)
)
@@ -270,7 +274,8 @@
post_process.MustRun,
'generate metadata for %s targets' % _NUM_TARGETS)
+ api.post_process(
- post_process.MustRun, 'gsutil upload metadata')
+ post_process.MustRun,
+ 'gsutil upload metadata for %s targets' % _NUM_TARGETS)
+ api.post_process(
post_process.DoesNotRun,
'generate html report for %s targets' % _NUM_TARGETS)
@@ -283,7 +288,7 @@
'generate metadata for %s targets' % _NUM_TARGETS, retcode=1)
+ api.post_check(
lambda check, steps:
- check(steps['gsutil upload metadata']
+ check(steps['gsutil upload metadata for %s targets' % _NUM_TARGETS]
.output_properties['process_coverage_data_failure'] == 'true'))
+ api.post_process(post_process.StatusFailure)
+ api.post_process(post_process.DropExpectation)