blob: 7a8b71bb4a4e90836f7a565672e8ec61d3f70b30 [file] [log] [blame]
# Copyright 2018 The LUCI Authors. All rights reserved.
# Use of this source code is governed under the Apache License, Version 2.0
# that can be found in the LICENSE file.
"""API for Tricium analyzers to use.
This recipe module is intended to support different kinds of
analyzer recipes, including:
* Recipes that wrap one or more legacy analyzers.
* Recipes that accumulate comments one by one.
* Recipes that wrap other tools and parse their output.
"""
import fnmatch
import os
from google.protobuf import json_format
from recipe_engine import recipe_api
from . import legacy_analyzers
from PB.tricium.data import Data
class TriciumApi(recipe_api.RecipeApi):
"""TriciumApi provides basic support for Tricium."""
# Expose pre-defined analyzers, as well the LegacyAnalyzer class.
LegacyAnalyzer = legacy_analyzers.LegacyAnalyzer
analyzers = legacy_analyzers.Analyzers
# The limit on the number of comments that can be added via this recipe.
#
# Any comments added after this threshold is reached will be dropped.
_comments_num_limit = 1000
def __init__(self, **kwargs):
"""Sets up the API.
Initializes an empty list of comments for use with
add_comment and write_comments.
"""
super(TriciumApi, self).__init__(**kwargs)
self._comments = []
def add_comment(self,
category,
message,
path,
start_line=0,
end_line=0,
start_char=0,
end_char=0,
suggestions=()):
"""Adds one comment to accumulate.
For semantics of start_line, start_char, end_line, end_char, see Gerrit doc
https://gerrit-review.googlesource.com/Documentation/rest-api-changes.html#comment-range
"""
comment = Data.Comment()
comment.category = category
comment.message = message
comment.path = path
comment.start_line = start_line
comment.end_line = end_line
comment.start_char = start_char
comment.end_char = end_char
for s in suggestions:
json_format.ParseDict(s, comment.suggestions.add())
self.validate_comment(comment)
self._add_comment(comment)
@staticmethod
def validate_comment(comment):
"""Validates comment to comply with Tricium/Gerrit requirements.
Raise ValueError on the first detected problem.
"""
if comment.start_line < 0:
raise ValueError('start_line must be 1-based, but %d given' %
(comment.start_line,))
if comment.start_line == 0:
for attr in ('end_line', 'start_char', 'end_char'):
value = getattr(comment, attr)
if value:
raise ValueError('start_line is 0, implying file level comment, '
'but %s is %d instead of 0' % (attr, value))
return
if comment.start_line > comment.end_line and comment.end_line != 0:
# TODO(tandrii): it's probably better to require end_line always set.
raise ValueError('start_line must be <= end_line, but %d..%d given' %
(comment.start_line, comment.end_line))
if comment.start_char < 0:
raise ValueError('start_char must be 0-based, but %d given' %
(comment.start_char,))
if comment.end_char < 0:
raise ValueError('end_char must be 0-based, but %d given' %
(comment.end_char,))
if (comment.start_line == comment.end_line and
comment.start_char >= comment.end_char and comment.end_char > 0):
raise ValueError(
'(start_line, start_char) must be before (end_line, end_char), '
'but (%d,%d) .. (%d,%d) given' %
(comment.start_line, comment.start_char, comment.end_line,
comment.end_char))
if os.path.isabs(comment.path):
raise ValueError('path must be relative to the input directory, but '
'got absolute path %s' % (comment.path))
def _add_comment(self, comment):
if comment not in self._comments:
self._comments.append(comment)
def write_comments(self):
"""Emit the results accumulated by `add_comment` and `run_legacy`."""
results = Data.Results()
results.comments.extend(self._comments)
step = self.m.step('write results', [])
if len(results.comments) > self._comments_num_limit:
# We don't yet know how many of these comments are included in changed
# lines and would be posted. Add a warning to try to help with
# clarification in the case that Tricium unexpectedly emits no comments.
step.presentation.status = self.m.step.WARNING
step.presentation.step_text = (
'%s comments created, Tricium may refuse to post comments if there '
'are too many in changed lines. This build sends only the first %s '
'comments.' % (len(results.comments), self._comments_num_limit))
comments = results.comments[:self._comments_num_limit]
del results.comments[:]
results.comments.extend(comments)
# The "tricium" output property is read by the Tricium service.
step.presentation.properties['tricium'] = self.m.proto.encode(
results, 'JSONPB', indent=0, preserving_proto_field_name=False)
return step
def run_legacy(self,
analyzers,
input_base,
affected_files,
commit_message,
emit=True):
"""Runs legacy analyzers.
This function internally accumulates the comments from the analyzers it
runs to the same global storage used by `add_comment()`. By default it
emits comments from legacy analyzers to the tricium output property,
along with any comments previously created by calling `add_comment()`
directly, after running all the specified analyzers.
Args:
* analyzers (List(LegacyAnalyer)): Analyzers to run.
* input_base (Path): The Tricium input dir, generally a checkout base.
* affected_files (List(str)): Paths of files in the change, relative
to input_base.
* commit_message (str): Commit message from Gerrit.
* emit (bool): Whether to write results to the tricium output
property. If unset, the caller will be responsible for calling
`write_comments` to emit the comments added by the legacy analyzers.
This is useful for recipes that need to run a mixture of custom
analyzers (using `add_comment()` to store comments) and legacy
analyzers.
"""
self._write_files_data(affected_files, commit_message, input_base)
# For each analyzer, download the CIPD package, run it and accumulate
# results. Note: Each analyzer could potentially be run in parallel.
for analyzer in analyzers:
with self.m.step.nest(analyzer.name) as presentation:
# Check analyzer.path_filters and conditionally skip.
if not _matches_path_filters(affected_files, analyzer.path_filters):
presentation.step_text = 'skipped due to path filters'
try:
analyzer_dir = self.m.path.cleanup_dir / analyzer.name
output_base = analyzer_dir / 'out'
package_dir = analyzer_dir / 'package'
self._fetch_legacy_analyzer(package_dir, analyzer)
results = self._run_legacy_analyzer(
package_dir,
analyzer,
input_dir=input_base,
output_dir=output_base)
# Show step results. If there are too many comments, don't include
# them. If one analyzer fails, continue running the rest.
for comment in results.comments:
self._add_comment(comment)
num_comments = len(results.comments)
presentation.step_text = '%s comment(s)' % num_comments
presentation.logs['result'] = self.m.proto.encode(
results, 'JSONPB')
except self.m.step.StepFailure:
presentation.step_text = 'failed'
# The tricium data dir with files.json is written in the checkout cache
# directory and should be cleaned up.
self.m.file.rmtree('clean up tricium data dir', input_base / 'tricium')
if emit:
self.write_comments()
def _write_files_data(self, affected_files, commit_message, base_dir):
"""Writes a Files input message to a file.
Args:
* affected_files (List(str)): File paths. This should
be relative to `base_dir`.
* commit_message (str): The commit message from Gerrit.
* base_dir (Path): Input files base directory.
"""
files = Data.Files()
files.commit_message = commit_message
for path in affected_files:
# TODO(qyearsley): Set the is_binary and status fields for each file.
# Analyzers use these fields to determine whether to skip files.
f = files.files.add()
f.path = path
data_dir = self._ensure_data_dir(base_dir)
self.m.file.write_proto(
'write files.json',
data_dir / 'files.json',
files,
'JSONPB',
# Tricium analyzers expect camelCase field names.
encoding_kwargs={'preserving_proto_field_name': False})
def _read_results(self, base_dir):
"""Reads a Tricium Results message from a file.
Args:
* base_dir (Path): A directory. Generally this will
be the same as the -output arg passed to the analyzer.
Returns: Results protobuf message.
"""
data_dir = self._ensure_data_dir(base_dir)
results_json = self.m.file.read_text(
'read results',
data_dir / 'results.json',
test_data='{"comments":[]}')
return json_format.Parse(results_json, Data.Results())
def _ensure_data_dir(self, base_dir):
"""Creates the Tricium data directory if it doesn't exist.
Simple Tricium analyzers assume that data is input/output from a
particular subpath relative to the input/output paths passed.
Args:
* base_dir (Path): A directory, could be either the -input
or -output passed to a Tricium analyzer.
Returns: Tricium data file directory inside base_dir.
"""
data_dir = base_dir / 'tricium' / 'data'
self.m.file.ensure_directory('ensure tricium data dir', data_dir)
return data_dir
def _fetch_legacy_analyzer(self, package_dir, analyzer):
"""Fetches an analyzer package from CIPD.
Args:
* packages_dir (Path): The path to fetch to.
* analyzer (LegacyAnalyzer): Analyzer package to fetch.
"""
ensure_file = self.m.cipd.EnsureFile()
ensure_file.add_package(analyzer.package, version=analyzer.version)
self.m.cipd.ensure(package_dir, ensure_file)
def _run_legacy_analyzer(self, package_dir, analyzer, input_dir, output_dir):
"""Runs a simple legacy analyzer executable and returns the results.
Args:
* package_dir (Path): The directory where the analyzer CIPD package
contents have been unpacked to.
* analyzer (LegacyAnalyzer): Analyzer object to run.
* input_dir (Path): The Tricium input dir, which is expected to contain
files as well as the metadata at tricium/data/files.json.
* output_dir (Path): The directory to write results into.
"""
# Some analyzers depend on other files in the CIPD package, so cwd is
# expected to be the directory with the analyzer.
with self.m.context(cwd=package_dir):
cmd = [
package_dir / analyzer.executable, '-input', input_dir, '-output',
output_dir
] + analyzer.extra_args
self.m.step('run analyzer',
cmd).presentation.logs['cmd'] = ' '.join(str(c) for c in cmd)
return self._read_results(output_dir)
def _matches_path_filters(files, patterns):
if len(patterns) == 0:
return True
for p in patterns:
if any(fnmatch.fnmatch(f, p) for f in files):
return True
return False