| # Copyright 2018 The LUCI Authors. All rights reserved. |
| # Use of this source code is governed under the Apache License, Version 2.0 |
| # that can be found in the LICENSE file. |
| |
| """API for Tricium analyzers to use. |
| |
| This recipe module is intended to support different kinds of |
| analyzer recipes, including: |
| * Recipes that wrap one or more legacy analyzers. |
| * Recipes that accumulate comments one by one. |
| * Recipes that wrap other tools and parse their output. |
| """ |
| |
| import fnmatch |
| import os |
| |
| from google.protobuf import json_format |
| |
| from recipe_engine import recipe_api |
| |
| from . import legacy_analyzers |
| |
| from PB.tricium.data import Data |
| |
| |
| class TriciumApi(recipe_api.RecipeApi): |
| """TriciumApi provides basic support for Tricium.""" |
| |
| # Expose pre-defined analyzers, as well the LegacyAnalyzer class. |
| LegacyAnalyzer = legacy_analyzers.LegacyAnalyzer |
| analyzers = legacy_analyzers.Analyzers |
| |
| # The limit on the number of comments that can be added via this recipe. |
| # |
| # Any comments added after this threshold is reached will be dropped. |
| _comments_num_limit = 1000 |
| |
| def __init__(self, **kwargs): |
| """Sets up the API. |
| |
| Initializes an empty list of comments for use with |
| add_comment and write_comments. |
| """ |
| super(TriciumApi, self).__init__(**kwargs) |
| self._comments = [] |
| |
| def add_comment(self, |
| category, |
| message, |
| path, |
| start_line=0, |
| end_line=0, |
| start_char=0, |
| end_char=0, |
| suggestions=()): |
| """Adds one comment to accumulate. |
| |
| For semantics of start_line, start_char, end_line, end_char, see Gerrit doc |
| https://gerrit-review.googlesource.com/Documentation/rest-api-changes.html#comment-range |
| """ |
| comment = Data.Comment() |
| comment.category = category |
| comment.message = message |
| comment.path = path |
| comment.start_line = start_line |
| comment.end_line = end_line |
| comment.start_char = start_char |
| comment.end_char = end_char |
| for s in suggestions: |
| json_format.ParseDict(s, comment.suggestions.add()) |
| self.validate_comment(comment) |
| self._add_comment(comment) |
| |
| @staticmethod |
| def validate_comment(comment): |
| """Validates comment to comply with Tricium/Gerrit requirements. |
| |
| Raise ValueError on the first detected problem. |
| """ |
| if comment.start_line < 0: |
| raise ValueError('start_line must be 1-based, but %d given' % |
| (comment.start_line,)) |
| if comment.start_line == 0: |
| for attr in ('end_line', 'start_char', 'end_char'): |
| value = getattr(comment, attr) |
| if value: |
| raise ValueError('start_line is 0, implying file level comment, ' |
| 'but %s is %d instead of 0' % (attr, value)) |
| return |
| if comment.start_line > comment.end_line and comment.end_line != 0: |
| # TODO(tandrii): it's probably better to require end_line always set. |
| raise ValueError('start_line must be <= end_line, but %d..%d given' % |
| (comment.start_line, comment.end_line)) |
| if comment.start_char < 0: |
| raise ValueError('start_char must be 0-based, but %d given' % |
| (comment.start_char,)) |
| if comment.end_char < 0: |
| raise ValueError('end_char must be 0-based, but %d given' % |
| (comment.end_char,)) |
| if (comment.start_line == comment.end_line and |
| comment.start_char >= comment.end_char and comment.end_char > 0): |
| raise ValueError( |
| '(start_line, start_char) must be before (end_line, end_char), ' |
| 'but (%d,%d) .. (%d,%d) given' % |
| (comment.start_line, comment.start_char, comment.end_line, |
| comment.end_char)) |
| if os.path.isabs(comment.path): |
| raise ValueError('path must be relative to the input directory, but ' |
| 'got absolute path %s' % (comment.path)) |
| |
| def _add_comment(self, comment): |
| if comment not in self._comments: |
| self._comments.append(comment) |
| |
| def write_comments(self): |
| """Emit the results accumulated by `add_comment` and `run_legacy`.""" |
| results = Data.Results() |
| results.comments.extend(self._comments) |
| step = self.m.step('write results', []) |
| if len(results.comments) > self._comments_num_limit: |
| # We don't yet know how many of these comments are included in changed |
| # lines and would be posted. Add a warning to try to help with |
| # clarification in the case that Tricium unexpectedly emits no comments. |
| step.presentation.status = self.m.step.WARNING |
| step.presentation.step_text = ( |
| '%s comments created, Tricium may refuse to post comments if there ' |
| 'are too many in changed lines. This build sends only the first %s ' |
| 'comments.' % (len(results.comments), self._comments_num_limit)) |
| comments = results.comments[:self._comments_num_limit] |
| del results.comments[:] |
| results.comments.extend(comments) |
| |
| # The "tricium" output property is read by the Tricium service. |
| step.presentation.properties['tricium'] = self.m.proto.encode( |
| results, 'JSONPB', indent=0, preserving_proto_field_name=False) |
| return step |
| |
| def run_legacy(self, |
| analyzers, |
| input_base, |
| affected_files, |
| commit_message, |
| emit=True): |
| """Runs legacy analyzers. |
| |
| This function internally accumulates the comments from the analyzers it |
| runs to the same global storage used by `add_comment()`. By default it |
| emits comments from legacy analyzers to the tricium output property, |
| along with any comments previously created by calling `add_comment()` |
| directly, after running all the specified analyzers. |
| |
| Args: |
| * analyzers (List(LegacyAnalyer)): Analyzers to run. |
| * input_base (Path): The Tricium input dir, generally a checkout base. |
| * affected_files (List(str)): Paths of files in the change, relative |
| to input_base. |
| * commit_message (str): Commit message from Gerrit. |
| * emit (bool): Whether to write results to the tricium output |
| property. If unset, the caller will be responsible for calling |
| `write_comments` to emit the comments added by the legacy analyzers. |
| This is useful for recipes that need to run a mixture of custom |
| analyzers (using `add_comment()` to store comments) and legacy |
| analyzers. |
| """ |
| self._write_files_data(affected_files, commit_message, input_base) |
| # For each analyzer, download the CIPD package, run it and accumulate |
| # results. Note: Each analyzer could potentially be run in parallel. |
| for analyzer in analyzers: |
| with self.m.step.nest(analyzer.name) as presentation: |
| # Check analyzer.path_filters and conditionally skip. |
| if not _matches_path_filters(affected_files, analyzer.path_filters): |
| presentation.step_text = 'skipped due to path filters' |
| try: |
| analyzer_dir = self.m.path.cleanup_dir / analyzer.name |
| output_base = analyzer_dir / 'out' |
| package_dir = analyzer_dir / 'package' |
| self._fetch_legacy_analyzer(package_dir, analyzer) |
| results = self._run_legacy_analyzer( |
| package_dir, |
| analyzer, |
| input_dir=input_base, |
| output_dir=output_base) |
| # Show step results. If there are too many comments, don't include |
| # them. If one analyzer fails, continue running the rest. |
| for comment in results.comments: |
| self._add_comment(comment) |
| num_comments = len(results.comments) |
| presentation.step_text = '%s comment(s)' % num_comments |
| presentation.logs['result'] = self.m.proto.encode( |
| results, 'JSONPB') |
| except self.m.step.StepFailure: |
| presentation.step_text = 'failed' |
| # The tricium data dir with files.json is written in the checkout cache |
| # directory and should be cleaned up. |
| self.m.file.rmtree('clean up tricium data dir', input_base / 'tricium') |
| |
| if emit: |
| self.write_comments() |
| |
| def _write_files_data(self, affected_files, commit_message, base_dir): |
| """Writes a Files input message to a file. |
| |
| Args: |
| * affected_files (List(str)): File paths. This should |
| be relative to `base_dir`. |
| * commit_message (str): The commit message from Gerrit. |
| * base_dir (Path): Input files base directory. |
| """ |
| files = Data.Files() |
| files.commit_message = commit_message |
| for path in affected_files: |
| # TODO(qyearsley): Set the is_binary and status fields for each file. |
| # Analyzers use these fields to determine whether to skip files. |
| f = files.files.add() |
| f.path = path |
| data_dir = self._ensure_data_dir(base_dir) |
| self.m.file.write_proto( |
| 'write files.json', |
| data_dir / 'files.json', |
| files, |
| 'JSONPB', |
| # Tricium analyzers expect camelCase field names. |
| encoding_kwargs={'preserving_proto_field_name': False}) |
| |
| def _read_results(self, base_dir): |
| """Reads a Tricium Results message from a file. |
| |
| Args: |
| * base_dir (Path): A directory. Generally this will |
| be the same as the -output arg passed to the analyzer. |
| |
| Returns: Results protobuf message. |
| """ |
| data_dir = self._ensure_data_dir(base_dir) |
| results_json = self.m.file.read_text( |
| 'read results', |
| data_dir / 'results.json', |
| test_data='{"comments":[]}') |
| return json_format.Parse(results_json, Data.Results()) |
| |
| def _ensure_data_dir(self, base_dir): |
| """Creates the Tricium data directory if it doesn't exist. |
| |
| Simple Tricium analyzers assume that data is input/output from a |
| particular subpath relative to the input/output paths passed. |
| |
| Args: |
| * base_dir (Path): A directory, could be either the -input |
| or -output passed to a Tricium analyzer. |
| |
| Returns: Tricium data file directory inside base_dir. |
| """ |
| data_dir = base_dir / 'tricium' / 'data' |
| self.m.file.ensure_directory('ensure tricium data dir', data_dir) |
| return data_dir |
| |
| def _fetch_legacy_analyzer(self, package_dir, analyzer): |
| """Fetches an analyzer package from CIPD. |
| |
| Args: |
| * packages_dir (Path): The path to fetch to. |
| * analyzer (LegacyAnalyzer): Analyzer package to fetch. |
| """ |
| ensure_file = self.m.cipd.EnsureFile() |
| ensure_file.add_package(analyzer.package, version=analyzer.version) |
| self.m.cipd.ensure(package_dir, ensure_file) |
| |
| def _run_legacy_analyzer(self, package_dir, analyzer, input_dir, output_dir): |
| """Runs a simple legacy analyzer executable and returns the results. |
| |
| Args: |
| * package_dir (Path): The directory where the analyzer CIPD package |
| contents have been unpacked to. |
| * analyzer (LegacyAnalyzer): Analyzer object to run. |
| * input_dir (Path): The Tricium input dir, which is expected to contain |
| files as well as the metadata at tricium/data/files.json. |
| * output_dir (Path): The directory to write results into. |
| """ |
| # Some analyzers depend on other files in the CIPD package, so cwd is |
| # expected to be the directory with the analyzer. |
| with self.m.context(cwd=package_dir): |
| cmd = [ |
| package_dir / analyzer.executable, '-input', input_dir, '-output', |
| output_dir |
| ] + analyzer.extra_args |
| self.m.step('run analyzer', |
| cmd).presentation.logs['cmd'] = ' '.join(str(c) for c in cmd) |
| return self._read_results(output_dir) |
| |
| |
| def _matches_path_filters(files, patterns): |
| if len(patterns) == 0: |
| return True |
| for p in patterns: |
| if any(fnmatch.fnmatch(f, p) for f in files): |
| return True |
| return False |