blob: a055d684d582f977846baa9228613ab93d4ae95b [file] [log] [blame]
# Copyright 2018 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import json
import re
from recipe_engine import recipe_api
_BUCKET_NAME = 'code-coverage-data'
# Name of the file to store the component map.
_COMPONENT_MAPPING_FILE_NAME = 'component_mapping_path.json'
# Name of the file to store the line number mapping from bot to Gerrit.
_BOT_TO_GERRIT_LINE_NUM_MAPPING_FILE_NAME = (
'bot_to_gerrit_line_num_mapping.json')
# Set of valid extensions for source files that use Clang.
_EXTENSIONS_OF_SOURCE_FILES_SUPPORTED_BY_CLANG = set([
'.mm', '.S', '.c', '.hh', '.cxx', '.hpp', '.cc', '.cpp', '.ipp', '.h', '.m',
'.hxx'
])
# Map exclude_sources property value to files that are to be excluded from
# coverage aggregates.
_EXCLUDE_SOURCES = {
'all_test_files': r'.*test.*',
}
class ClangCoverageApi(recipe_api.RecipeApi):
"""This module contains apis to interact with llvm-cov and llvm-profdata."""
def __init__(self, *args, **kwargs):
super(ClangCoverageApi, self).__init__(*args, **kwargs)
# A single temporary directory to contain the profile data for all targets
# in the build.
self._base_profdata_dir = None
# Temp dir for report.
self._report_dir = None
# Temp dir for metadata
self._metadata_dir = None
# Maps step names to subdirectories of the above.
self._profdata_dirs = {}
# When set, subset of source files to include in the coverage report.
self._affected_source_files = None
# When set, indicates that current context is per-cl coverage for try jobs.
self._is_per_cl_coverage = False
# The location of the scripts used to merge code coverage data (as opposed
# to test results).
self._merge_scripts_location = None
# File to store the git revisions of each source file
self._revision_cache = None
@staticmethod
def _dir_name_for_step(step_name):
"""Normalizes string, converts to lowercase, removes non-alpha characters,
and converts spaces to underscores.
Adapted from:
https://stackoverflow.com/questions/295135/turn-a-string-into-a-valid-filename
Args:
step_name (str): the name of the step to use.
"""
value = re.sub('[^\w\s]', '', step_name).strip().lower()
value = re.sub('[-\s]+', '_', value)
return value
@property
def merge_scripts_location(self):
if not self._merge_scripts_location: # pragma: no cover
self._merge_scripts_location = self.m.chromium_checkout.working_dir.join(
'src', 'testing', 'merge_scripts', 'code_coverage')
return self._merge_scripts_location
@property
def step_merge_script(self):
"""Returns the script that merges indexed profiles from multiple targets."""
return self.merge_scripts_location.join('merge_steps.py')
@property
def raw_profile_merge_script(self):
"""Returns the location of a script that merges raw profiles from shards.
This is intended to be passed to the swarming recipe module to be called
upon completion of the shards.
"""
return self.merge_scripts_location.join('merge_results.py')
def _llvm_exec(self, name):
return self.m.path['checkout'].join('third_party', 'llvm-build',
'Release+Asserts', 'bin', name)
@property
def profdata_executable(self):
"""Returns the path to the llvm-profdata executable."""
return self._llvm_exec('llvm-profdata')
@property
def cov_executable(self):
"""Returns the path to the llvm-cov executable."""
return self._llvm_exec('llvm-cov')
@property
def report_dir(self):
"""A temporary directory to save a report to. Created on first access."""
if not self._report_dir:
self._report_dir = self.m.path.mkdtemp()
return self._report_dir
@property
def metadata_dir(self):
"""A temporary directory for the metadata. Created on first access."""
if not self._metadata_dir:
self._metadata_dir = self.m.path.mkdtemp()
return self._metadata_dir
@property
def generate_individual_metadata(self):
"""True if we should generate per-target metadata for this build."""
return (not self._is_per_cl_coverage and
self.m.buildbucket.build.builder.builder in (
'linux-code-coverage', 'linux-code-coverage-dev'))
def _get_source_exclusion_pattern(self):
if 'exclude_sources' in self.m.properties:
return _EXCLUDE_SOURCES.get(self.m.properties['exclude_sources'])
return []
def profdata_dir(self, step_name=None):
"""Ensures a directory exists for writing the step-level merged profdata.
Args:
step_name (str): The name of the step for the target whose profile we'll
save in in this dir. None for getting the parent directory to contain
the dirs for all steps.
"""
# Create the parent directory when first needed.
if not self._base_profdata_dir:
self._base_profdata_dir = self.m.path.mkdtemp()
if not step_name:
return self._base_profdata_dir
if step_name in self._profdata_dirs:
return self._profdata_dirs[step_name]
new_dir = self._base_profdata_dir.join(self._dir_name_for_step(step_name))
self.m.file.ensure_directory('ensure profdata dir for %s' % step_name,
new_dir)
self._profdata_dirs[step_name] = new_dir
return new_dir
@property
def using_coverage(self):
"""Checks if the current build is running coverage-instrumented targets."""
# TODO(crbug.com/896751): Implement a cleaner way to determine if the recipe
# is using code coverage instrumentation.
return (self.m.gclient.c and self.m.gclient.c.solutions and
'checkout_clang_coverage_tools' in self.m.gclient.c.solutions[0]
.custom_vars)
def _get_binaries(self, tests):
"""Returns paths to the binary for the given test objects.
By default, use the name of the target as the binary.
"""
# TODO(crbug.com/899974): Implement a sturdier approach that also works in
# separate builder-tester setup.
binaries = []
for t in tests:
# There are a number of local isolated scripts such as
# check_static_initializers, checkdeps, checkperms etc. that introduce
# extra complexitites, meawhile, it's still unclear if there is any value
# in collecting code coverage data for them. So, for simplicity, skip
# tests that don't run on swarming for now.
if not t.runs_on_swarming:
continue
target = t.isolate_target
# TODO(crbug.com/914213): Remove webkit_layout_tests reference.
patterns = [
# Following are scripts based tests that don't build any binaries.
['blink_python_tests', None],
['metrics_python_tests', None],
['telemetry_gpu_unittests', None],
['devtools_closure_compile', None],
['devtools_eslint', None],
# Following are mappings from isolate target names to binary names.
['telemetry_gpu_integration_test', 'chrome'],
['telemetry_unittests', 'chrome'],
['telemetry_perf_unittests', 'chrome'],
['chromedriver_py_tests', 'chrome'],
['chromedriver_replay_unittests', 'chrome'],
['chrome_all_tast_tests', 'chrome'],
['cros_vm_sanity_test', 'chrome'],
['xr_browser_tests', 'xr_browser_tests_binary'],
['content_shell_crash_test', 'content_shell'],
['.*webkit_layout_tests', 'content_shell'],
['.*blink_web_tests', 'content_shell'],
['.*_ozone', target[:-len('_ozone')]],
['.*', target],
]
for pattern, binary in patterns:
if re.match(pattern, target):
if binary is not None:
binaries.append(self.m.chromium.output_dir.join(binary))
break
return list(set(binaries)) # Remove duplicates.
def _filter_source_file(self, file_paths):
"""Fitlers source files with valid extensions.
Set of valid extensions is defined in:
_EXTENSIONS_OF_SOURCE_FILES_SUPPORTED_BY_CLANG.
Args:
file_paths: A list of file paths relative to the checkout path.
Returns:
A sub-list of the input with valid extensions.
"""
source_files = []
for file_path in file_paths:
if any([
file_path.endswith(extension)
for extension in _EXTENSIONS_OF_SOURCE_FILES_SUPPORTED_BY_CLANG
]):
source_files.append(file_path)
return source_files
def instrument(self, affected_files):
"""Saves source paths to generate coverage instrumentation for to a file.
Args:
affected_files (list of str): paths to the files we want to instrument,
relative to the checkout path.
"""
self._is_per_cl_coverage = True
self.m.file.ensure_directory(
'create .clang-coverage',
self.m.path['checkout'].join('.clang-coverage'))
self._affected_source_files = self._filter_source_file(affected_files)
return self.m.python(
'save paths of affected files',
self.resource('write_paths_to_instrument.py'),
args=[
'--write-to',
self.m.path['checkout'].join('.clang-coverage',
'files_to_instrument.txt'),
'--src-path',
self.m.path['checkout'],
'--build-path',
self.m.chromium.c.build_dir.join(self.m.chromium.c.build_config_fs),
] + self._affected_source_files,
stdout=self.m.raw_io.output_text(add_output_log=True))
def process_coverage_data(self, tests):
"""Processes the coverage data for html report or metadata.
Args:
tests (list of self.m.chromium_tests.stepsl.Test): A list of test objects
whose binaries we are to create a coverage report for.
"""
if not self._profdata_dirs: # pragma: no cover.
return
if self._is_per_cl_coverage and not self._affected_source_files:
self.m.python.succeeding_step(
'skip collecting coverage data because no source file is changed', '')
return
try:
if self.generate_individual_metadata:
with self.m.step.nest('Generate per-target metadata'):
if not self._revision_cache:
self._revision_cache = self.m.path.mkstemp()
self._generate_and_upload_individual_metadata(tests)
merged_profdata = self._merge_profdata()
self._surface_merging_errors()
binaries = self._get_binaries(tests)
binaries = self._get_binaries_with_valid_coverage_data_on_trybot(
binaries, merged_profdata)
if not binaries:
self.m.python.succeeding_step(
'skip processing coverage data because no data is found', '')
return
suffix = ' for %d tests' % len(self._profdata_dirs)
self._generate_and_upload_metadata(
binaries, merged_profdata, step_name_suffix=suffix)
self._generate_and_upload_html_report_on_trybot(binaries, merged_profdata)
except self.m.step.StepFailure:
self.m.step.active_result.presentation.properties[
'process_coverage_data_failure'] = True
raise
def _generate_and_upload_individual_metadata(self, tests):
step = self.m.step.active_result
for test in tests:
# Use the name of the test to look up the profdata file for that step.
# This only works for full-codebase coverage. In per-CL coverage the test
# name isn't the same as the step name, so the directory won't match.
name = test.name
# Skip fuzzer targets because they make the build take too long.
# TODO(crbug.com/948885) Remove this check when we speed up the build.
if name.endswith('fuzzer'):
continue
if name not in self._profdata_dirs: # pragma: no cover.
step.presentation.step_text += (
'No profdata dir for test name: %s' % name)
continue
profdata = self._profdata_dirs[name].join("default.profdata")
binaries = self._get_binaries([test])
if not binaries:
step.presentation.step_text += "No binaries for test name: %s" % name
continue
gs_path = self._compose_gs_path_for_coverage_data(
'target_metadata/%s' % name)
out_dir = self.m.path.mkdtemp()
self.m.file.ensure_directory('ensure metadata dir for %s' % name, out_dir)
self._generate_and_upload_metadata(
binaries,
profdata,
out_dir,
gs_path,
step_name_suffix=' for %s' % name)
def _merge_profdata(self):
"""Merges the profdata generated by each step to a single profdata."""
merged_profdata = self.profdata_dir().join('merged.profdata')
self.m.python(
'merge coverage profile data for %d tests' % len(self._profdata_dirs),
self.step_merge_script,
args=[
'--input-dir',
self.profdata_dir(),
'--output-file',
merged_profdata,
'--llvm-profdata',
self.profdata_executable,
])
gs_path = self._compose_gs_path_for_coverage_data('merged.profdata')
upload_step = self.m.gsutil.upload(
merged_profdata,
_BUCKET_NAME,
gs_path,
link_name=None,
name='upload merged.profdata')
upload_step.presentation.links['merged.profdata'] = (
'https://storage.cloud.google.com/%s/%s' % (_BUCKET_NAME, gs_path))
upload_step.presentation.properties['merged_profdata_gs_path'] = gs_path
return merged_profdata
# TODO(crbug.com/929769): Remove this method when the fix is landed upstream.
def _get_binaries_with_valid_coverage_data_on_trybot(self, binaries,
profdata_path):
"""Gets binaries with valid coverage data.
llvm-cov bails out with error message "No coverage data found" if an
included binary does not exercise any instrumented file. The long-term
solution should be making llvm-cov being able to proceed by ignoring the
binaries without coverage data, however, for short-term, this method
implements a solution to filter out binaries without coverage data by trying
to invoke llvm-cov on each binary and decide if there is coverage data based
on the return code and error message.
This method is expected to run fast for per-cl coverage because only a small
number of files are instrumented.
Args:
binaryes (list): A list of absolute paths to binaries.
profdata_path (str): Path to the merged profdata file.
Returns:
A list of absolute paths to the binaries with valid coverage data.
"""
if not (self.m.buildbucket.build.builder.bucket == 'try' and
self._is_per_cl_coverage and self._affected_source_files):
# Only gets binaries with valid coverage data for per-cl coverage.
return binaries
args = [
'--profdata-path', profdata_path, '--llvm-cov', self.cov_executable,
'--output-json',
self.m.json.output()
]
args.extend(binaries)
step_result = self.m.python(
'filter binaries with valid coverage data for %s binaries' %
len(binaries),
self.resource('get_binaries_with_valid_coverage_data.py'),
args=args,
step_test_data=lambda: self.m.json.test_api.output([
'/path/to/base_unittests',
'/path/to/content_shell',]))
return step_result.json.output
def _generate_and_upload_html_report_on_trybot(self, binaries, profdata_path):
"""Generate html coverage report for the given binaries.
Produce a coverage report for the instrumented test targets and upload to
the appropriate bucket.
"""
if not (self.m.buildbucket.build.builder.bucket == 'try' and
self._is_per_cl_coverage and self._affected_source_files):
# Only upload html report for CQ coverage bots.
return
args = [
'--report-directory', self.report_dir, '--profdata-path', profdata_path,
'--llvm-cov', self.cov_executable, '--binaries'
]
args.extend(binaries)
args.append('--sources')
args.extend(
[self.m.path['checkout'].join(s) for s in self._affected_source_files])
self.m.python(
'generate html report for %d tests' % len(self._profdata_dirs),
self.resource('make_report.py'),
args=args)
html_report_gs_path = self._compose_gs_path_for_coverage_data('html_report')
upload_step = self.m.gsutil.upload(
self.report_dir,
_BUCKET_NAME,
html_report_gs_path,
link_name=None,
args=['-r'],
multithreaded=True,
name='upload html report')
upload_step.presentation.links['html report'] = (
'https://storage.cloud.google.com/%s/%s/index.html' %
(_BUCKET_NAME, html_report_gs_path))
def shard_merge(self, step_name, additional_merge=None):
"""Returns a merge object understood by the swarming module.
See the docstring for the `merge` parameter of api.chromium_swarming.task.
|additional_merge| is an additional merge script. This will be invoked from
the clang coverage merge script.
"""
new_merge = {
'script':
self.raw_profile_merge_script,
'args': [
'--profdata-dir',
self.profdata_dir(step_name),
'--llvm-profdata',
self.profdata_executable,
],
}
if additional_merge:
new_merge['args'].extend([
'--additional-merge-script',
additional_merge['script'],
])
new_merge['args'].extend([
'--additional-merge-script-args',
self.m.json.dumps(additional_merge['args'])
])
return new_merge
def _compose_gs_path_for_coverage_data(self, data_type):
build = self.m.buildbucket.build
if build.input.gerrit_changes:
# Assume that there is only one gerrit patchset which is true for
# Chromium CQ in practice.
gerrit_change = build.input.gerrit_changes[0]
return 'presubmit/%s/%s/%s/%s/%s/%s/%s' % (
gerrit_change.host,
gerrit_change.change, # Change id is unique in a Gerrit host.
gerrit_change.patchset,
build.builder.bucket,
build.builder.builder,
build.id,
data_type,
)
else:
commit = build.input.gitiles_commit
assert commit is not None, 'No gitiles commit'
return 'postsubmit/%s/%s/%s/%s/%s/%s/%s' % (
commit.host,
commit.project,
commit.id, # A commit HEX SHA1 is unique in a Gitiles project.
build.builder.bucket,
build.builder.builder,
build.id,
data_type,
)
def _generate_component_mapping(self):
"""Generates the mapping from crbug components to directories."""
component_mapping = self.m.path.mkdtemp().join(_COMPONENT_MAPPING_FILE_NAME)
command_path = self.m.path['checkout'].join('tools', 'checkteamtags',
'extract_components.py')
command_parts = [command_path, '-o', component_mapping]
self.m.step(
'Run component extraction script to generate mapping',
command_parts,
stdout=self.m.raw_io.output_text(add_output_log=True))
return component_mapping
def _generate_and_upload_metadata(self,
binaries,
profdata_path,
output_dir=None,
gs_path=None,
step_name_suffix=''):
"""Generates the coverage info in metadata format."""
output_dir = output_dir or self.metadata_dir
gs_path = gs_path or self._compose_gs_path_for_coverage_data('metadata')
args = [
'--src-path',
self.m.path['checkout'],
'--output-dir',
output_dir,
'--profdata-path',
profdata_path,
'--llvm-cov',
self.cov_executable,
'--binaries',
]
args.extend(binaries)
if self._revision_cache:
args.extend(['--revision-cache-path', self._revision_cache])
if self._is_per_cl_coverage:
args.append('--sources')
args.extend([
self.m.path['checkout'].join(s) for s in self._affected_source_files
])
# In order to correctly display the (un)covered line numbers on Gerrit.
# Per-cl metadata's line numbers need to be rebased because the base
# revision of the change in this build is different from the one on
# Gerrit.
self._generate_line_number_mapping_from_bot_to_gerrit(
self._affected_source_files)
args.extend([
'--diff-mapping-path',
output_dir.join(_BOT_TO_GERRIT_LINE_NUM_MAPPING_FILE_NAME)
])
else:
pattern = self._get_source_exclusion_pattern()
if pattern:
args.extend(['--exclusion-pattern', pattern])
args.extend(
['--component-mapping-path',
self._generate_component_mapping()])
try:
self.m.python(
'generate coverage metadata' + step_name_suffix,
self.resource('generate_coverage_metadata.py'),
args=args,
venv=True)
finally:
upload_step = self.m.gsutil.upload(
output_dir,
_BUCKET_NAME,
gs_path,
link_name=None,
args=['-r'],
multithreaded=True,
name='upload metadata' + step_name_suffix)
upload_step.presentation.links['metadata report'] = (
'https://storage.cloud.google.com/%s/%s/index.html' % (_BUCKET_NAME,
gs_path))
upload_step.presentation.properties['coverage_metadata_gs_path'] = gs_path
upload_step.presentation.properties['coverage_gs_bucket'] = _BUCKET_NAME
def _generate_line_number_mapping_from_bot_to_gerrit(self, source_files):
"""Generates the line number mapping from bot to Gerrit.
Args:
source_files: List of source files to generate line number mapping for,
the paths are relative to the checkout path.
"""
gerrit_change = self.m.buildbucket.build.input.gerrit_changes[0]
local_to_gerrit_diff_mapping_file = self.metadata_dir.join(
_BOT_TO_GERRIT_LINE_NUM_MAPPING_FILE_NAME)
self.m.python(
'generate line number mapping from bot to Gerrit',
self.resource('rebase_line_number_from_bot_to_gerrit.py'),
args=[
'--host', gerrit_change.host, '--project', gerrit_change.project,
'--change', gerrit_change.change, '--patchset',
gerrit_change.patchset, '--src-path', self.m.path['checkout'],
'--output-file', local_to_gerrit_diff_mapping_file
] + source_files,
stdout=self.m.json.output())
def _surface_merging_errors(self):
test_data = {
"failed profiles": {
"browser_tests": ["/tmp/1/default-123.profraw"]
},
"total": 1
}
step_result = self.m.python(
'Finding merging errors',
self.resource('load_merge_errors.py'),
args=['--root-dir', self.profdata_dir()],
step_test_data=lambda: self.m.json.test_api.output_stream(test_data),
stdout=self.m.json.output())
if step_result.stdout:
step_result.presentation.status = self.m.step.FAILURE
step_result.presentation.properties[
'bad_coverage_profiles'] = step_result.stdout