| #!/usr/bin/env python3 |
| # Copyright 2019 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """ |
| Extracts network traffic annotation definitions from C++ source code. |
| """ |
| |
| from __future__ import print_function |
| |
| import argparse |
| import re |
| import sys |
| import traceback |
| |
| from annotation_tools import NetworkTrafficAnnotationTools |
| from annotation_tokenizer import Tokenizer, SourceCodeParsingError |
| |
| from enum import Enum |
| from pathlib import Path |
| from typing import List, Dict, NamedTuple |
| |
| |
| class AnnotationType(Enum): |
| COMPLETE = 'Definition' |
| PARTIAL = 'Partial' |
| COMPLETING = 'Completing' |
| BRANCHED_COMPLETING = 'BranchedCompleting' |
| MUTABLE = 'Mutable' |
| |
| |
| class Language(NamedTuple): |
| """Info on how to parse a given programming language's source code.""" |
| # Human-readable name, for debugging. |
| name: str |
| # Maps definition function names to the type of annotation they define. |
| annotation_types: Dict[str, AnnotationType] |
| # Regex that matches an annotation definition. Capture group 1 of this regex |
| # should contain a function name that can be mapped via annotation_types. |
| call_detection_regex: re.Pattern |
| |
| # Exit code for parsing errors. Other runtime errors return 1. |
| EX_PARSE_ERROR = 2 |
| |
| # Language definition for C++ source files. |
| CPP_ANNOTATION_TYPES = { |
| 'DefineNetworkTrafficAnnotation': AnnotationType.COMPLETE, |
| 'DefinePartialNetworkTrafficAnnotation': AnnotationType.PARTIAL, |
| 'CompleteNetworkTrafficAnnotation': AnnotationType.COMPLETING, |
| 'BranchedCompleteNetworkTrafficAnnotation': |
| AnnotationType.BRANCHED_COMPLETING, |
| 'CreateMutableNetworkTrafficAnnotationTag': AnnotationType.MUTABLE, |
| } |
| |
| CPP_LANGUAGE = Language(name='C++', |
| annotation_types=CPP_ANNOTATION_TYPES, |
| call_detection_regex=re.compile( |
| r''' |
| \b |
| # Look for one of the tracked function names. |
| # Capture group 1: function name. |
| ( |
| ''' + ('|'.join(CPP_ANNOTATION_TYPES.keys())) + r''' |
| ) |
| # Followed by a left-paren. |
| \s* |
| \( |
| ''', re.VERBOSE | re.DOTALL)) |
| |
| # Language definition for Java source files. |
| JAVA_ANNOTATION_TYPES = { |
| 'createComplete': AnnotationType.COMPLETE, |
| } |
| |
| JAVA_LANGUAGE = Language(name='Java', |
| annotation_types=JAVA_ANNOTATION_TYPES, |
| call_detection_regex=re.compile( |
| r''' |
| \b |
| # Look for a string like NetworkTrafficAnnotationTag.<methodName> |
| NetworkTrafficAnnotationTag \s* \. \s* |
| # Capture group 1: method name. |
| ( |
| ''' + ('|'.join(JAVA_ANNOTATION_TYPES.keys())) + r''' |
| ) |
| # Followed by a left-paren. |
| \s* |
| \( |
| ''', re.VERBOSE | re.DOTALL)) |
| |
| # Maps file extensions to their Language definition. |
| LANGUAGE_MAPPING: Dict[str, Language] = { |
| '.cc': CPP_LANGUAGE, |
| '.mm': CPP_LANGUAGE, |
| '.java': JAVA_LANGUAGE, |
| } |
| |
| # Regex that matches an annotation that should only be used in test files. |
| TEST_ANNOTATION_REGEX = re.compile( |
| r'\b(PARTIAL_)?TRAFFIC_ANNOTATION_FOR_TESTS\b') |
| |
| # Regex that matches a placeholder annotation for a few whitelisted files. |
| MISSING_ANNOTATION_REGEX = re.compile(r'\bMISSING_TRAFFIC_ANNOTATION\b') |
| |
| # Regex that matches placeholder annotations for unsupported platforms that |
| # don't require Network Traffic Annotations compliance. (e.g. iOS) |
| NO_ANNOTATION_REGEX = re.compile(r'\bNO_TRAFFIC_ANNOTATION_YET\b') |
| |
| # List of supported file extensions for source code. |
| SUPPORTED_EXTENSIONS = set(LANGUAGE_MAPPING.keys()) |
| |
| |
| class Annotation: |
| """A network annotation definition in C++ code.""" |
| |
| def __init__(self, |
| language: Language, |
| file_path: Path, |
| line_number: int, |
| type_name: AnnotationType, |
| unique_id='', |
| extra_id='', |
| text=''): |
| """Constructs an Annotation object with the given field values. |
| |
| Args: |
| file_path: Path to the file that contains this annotation. |
| """ |
| self.language = language |
| self.file_path = file_path |
| self.line_number = line_number |
| self.type_name = type_name |
| self.unique_id = unique_id |
| self.extra_id = extra_id |
| self.text = text |
| |
| def parse_definition(self, re_match: re.Match): |
| """Parses the annotation and populates object fields. |
| |
| Args: |
| re_match: A Match obtained from the Language's call_detection_regex. |
| """ |
| definition_function = re_match.group(1) |
| self.type_name = self.language.annotation_types[definition_function] |
| |
| # Parse the arguments given to the definition function, populating |
| # |unique_id|, |text| and (possibly) |extra_id|. |
| body = re_match.string[re_match.end():] |
| self._parse_body(body) |
| |
| |
| def extractor_output_string(self) -> str: |
| """Returns a string formatted for output.""" |
| return '\n'.join( |
| map(str, [ |
| '==== NEW ANNOTATION ====', |
| self.file_path, |
| self.line_number, |
| self.type_name.value, |
| self.unique_id, |
| self.extra_id, |
| self.text, |
| '==== ANNOTATION ENDS ====', |
| ])) |
| |
| def _parse_body(self, body: str): |
| """Tokenizes and parses the arguments given to the definition function.""" |
| # Don't bother parsing CreateMutableNetworkTrafficAnnotationTag(), we don't |
| # care about its arguments anyways. |
| if self.type_name == AnnotationType.MUTABLE: |
| return |
| |
| tokenizer = Tokenizer(body, self.file_path, self.line_number) |
| |
| # unique_id |
| self.unique_id = self._parse_string(tokenizer) |
| tokenizer.advance('comma') |
| |
| # extra_id (Partial/BranchedCompleting) |
| if self.type_name in [ |
| AnnotationType.PARTIAL, AnnotationType.BRANCHED_COMPLETING |
| ]: |
| self.extra_id = self._parse_string(tokenizer) |
| tokenizer.advance('comma') |
| |
| # partial_annotation (Completing/BranchedCompleting) |
| if self.type_name in [ |
| AnnotationType.COMPLETING, AnnotationType.BRANCHED_COMPLETING |
| ]: |
| # Skip the |partial_annotation| argument. It can be a variable_name, or a |
| # FunctionName(), so skip the parentheses if they're there. |
| tokenizer.advance('symbol') |
| if tokenizer.maybe_advance('left_paren'): |
| tokenizer.advance('right_paren') |
| tokenizer.advance('comma') |
| |
| # proto text |
| self.text = self._parse_string(tokenizer) |
| |
| # The function call should end here without any more arguments. |
| assert tokenizer.advance('right_paren') |
| |
| def _parse_string(self, tokenizer: Tokenizer) -> str: |
| """Parse a string value. |
| |
| It could be a string literal by itself, or multiple string literals |
| concatenated together. Add a newline to the string for each |
| concatenation.""" |
| text = tokenizer.advance('string_literal') |
| while True: |
| # Perform concatenations. |
| if tokenizer.maybe_advance('plus') is None: |
| break |
| text += '\n' |
| text += tokenizer.advance('string_literal') |
| return text |
| |
| |
| def get_line_number_at(string, pos): |
| """Find the line number for the char at position |pos|. 1-indexed.""" |
| # This is inefficient: O(n). But we only run it once for each annotation |
| # definition, so the effect on total runtime is negligible. |
| return 1 + len(re.compile(r'\n').findall(string[:pos])) |
| |
| |
| def is_inside_comment(string, pos): |
| """Checks if the position |pos| within string seems to be inside a comment. |
| |
| This is a bit naive. Only checks for single-line comments (// ...), not block |
| comments (/* ... */). |
| |
| Args: |
| string: string to scan. |
| pos: position within the string. |
| |
| Returns: |
| True if |string[pos]| looks like it's inside a C++ comment. |
| """ |
| # Look for "//" on the same line in the reversed string. |
| return bool(re.match(r'[^\n]*//', string[pos::-1])) |
| # TODO(crbug/966883): Add multi-line comment support. |
| |
| |
| def extract_annotations(file_path: Path): |
| """Extracts and returns annotations from the file at |file_path|.""" |
| if file_path.suffix not in LANGUAGE_MAPPING: |
| raise ValueError("Unrecognized extension '{}' for file '{}'.".format( |
| file_path.suffix, str(file_path))) |
| |
| language = LANGUAGE_MAPPING[file_path.suffix] |
| |
| contents = file_path.read_text() |
| |
| defs = [] |
| |
| # Check for function calls (e.g. DefineNetworkTrafficAnnotation(...)) |
| for re_match in language.call_detection_regex.finditer(contents): |
| if is_inside_comment(re_match.string, re_match.start()): |
| continue |
| line_number = get_line_number_at(contents, re_match.start()) |
| annotation = Annotation(language, file_path, line_number, |
| AnnotationType.COMPLETE) |
| annotation.parse_definition(re_match) |
| defs.append(annotation) |
| |
| # Check for test annotations (e.g. TRAFFIC_ANNOTATION_FOR_TESTS) |
| for re_match in TEST_ANNOTATION_REGEX.finditer(contents): |
| if is_inside_comment(re_match.string, re_match.start()): |
| continue |
| line_number = get_line_number_at(contents, re_match.start()) |
| |
| is_partial = bool(re_match.group(1)) |
| if is_partial: |
| type_name = AnnotationType.PARTIAL |
| unique_id = 'test_partial' |
| extra_id = 'test' |
| else: |
| type_name = AnnotationType.COMPLETE |
| unique_id = 'test' |
| extra_id = '' |
| |
| annotation = Annotation( |
| language, |
| file_path, |
| line_number, |
| type_name=type_name, |
| unique_id=unique_id, |
| extra_id=extra_id, |
| text='Traffic annotation for unit, browser and other tests') |
| defs.append(annotation) |
| |
| # Check for MISSING_TRAFFIC_ANNOTATION. |
| for re_match in MISSING_ANNOTATION_REGEX.finditer(contents): |
| if is_inside_comment(re_match.string, re_match.start()): |
| continue |
| line_number = get_line_number_at(contents, re_match.start()) |
| |
| annotation = Annotation(language, |
| file_path, |
| line_number, |
| type_name=AnnotationType.COMPLETE, |
| unique_id='missing', |
| text='Function called without traffic annotation.') |
| defs.append(annotation) |
| |
| # Check for NO_TRAFFIC_ANNOTATION_YET. |
| for re_match in NO_ANNOTATION_REGEX.finditer(contents): |
| if is_inside_comment(re_match.string, re_match.start()): |
| continue |
| line_number = get_line_number_at(contents, re_match.start()) |
| |
| annotation = Annotation(language, |
| file_path, |
| line_number, |
| type_name=AnnotationType.COMPLETE, |
| unique_id='undefined', |
| text='Nothing here yet.') |
| defs.append(annotation) |
| |
| return defs |
| |
| |
| def main(): |
| parser = argparse.ArgumentParser() |
| parser.add_argument( |
| '--options-file', |
| help='optional file to read options from') |
| args, argv = parser.parse_known_args() |
| if args.options_file: |
| argv = open(args.options_file).read().split() |
| |
| parser.add_argument( |
| '--build-path', |
| type=Path, |
| help='Specifies a compiled build directory, e.g. out/Debug.') |
| parser.add_argument( |
| '--generate-compdb', action='store_true', |
| help='Generate a new compile_commands.json before running') |
| parser.add_argument( |
| '--no-filter', action='store_true', |
| help='Do not filter files based on compdb entries') |
| parser.add_argument('file_paths', |
| nargs='+', |
| type=Path, |
| help='List of files to process.') |
| |
| args = parser.parse_args(argv) |
| |
| if not args.no_filter: |
| tools = NetworkTrafficAnnotationTools(args.build_path) |
| compdb_files = tools.GetCompDBFiles(args.generate_compdb) |
| |
| annotation_definitions = [] |
| |
| # Parse all the files. |
| # TODO(crbug/966883): Do this in parallel. |
| for file_path in args.file_paths: |
| if not args.no_filter and file_path.resolve() not in compdb_files: |
| continue |
| try: |
| annotation_definitions.extend(extract_annotations(file_path)) |
| except SourceCodeParsingError: |
| traceback.print_exc() |
| return EX_PARSE_ERROR |
| |
| # Print output. |
| for annotation in annotation_definitions: |
| print(annotation.extractor_output_string()) |
| |
| # If all files were successfully checked for annotations but none of them had |
| # any, print something so that the traffic_annotation_auditor knows there was |
| # no error so that the files get checked for deleted annotations. |
| if not annotation_definitions: |
| print('No annotations in these files.') |
| return 0 |
| |
| |
| if '__main__' == __name__: |
| sys.exit(main()) |