| # Copyright 2023 The Chromium Authors |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| """ Script for building a Chromium CodeQL database.""" |
| import argparse |
| import functools |
| import json |
| import multiprocessing |
| import subprocess |
| import logging |
| import time |
| import os |
| import traceback |
| import gn_sources_tools |
| import targets_to_index |
| |
| |
| def log_subprocess_output(output, logger=None): |
| """ Reads from a subprocess's stdout and writes it to `logger`, |
| or, if none given, to stdout. """ |
| if not logger: |
| print(output) |
| else: |
| logger.info(output) |
| |
| |
| def get_compilation_db(src_path, out_path): |
| root_command = os.path.join(src_path, |
| 'tools/clang/scripts/generate_compdb.py') |
| command = [root_command] |
| command.extend(['-p', out_path]) |
| print(f"Compilation DB command: {command}") |
| output = subprocess.check_output(command, cwd=src_path) |
| return json.loads(output) |
| |
| |
| def trace(processing_num, entry, *, codeql_binary_path, codeql_db_path, |
| successful_commands, failed_commands, logger): |
| directory = entry['directory'] |
| command = entry['command'] |
| |
| command = command.replace('\\"', '"').replace('\\(', |
| '(').replace('\\)', |
| ')').split(' ') |
| command[0] = os.path.abspath(os.path.join(directory, command[0])) |
| |
| try: |
| subprocess.check_output([ |
| codeql_binary_path, 'database', 'trace-command', codeql_db_path, |
| f'--working-dir={directory}', '--', *command |
| ], |
| stderr=subprocess.STDOUT) |
| successful_commands.append(str(command)) |
| logger.info("************ Upto " + str(processing_num)) |
| |
| except subprocess.CalledProcessError as e: |
| logger.info( |
| "FAILURE: a subprocess.CalledProcessError occurred while running %s" % |
| command) |
| logger.info(traceback.format_exc()) |
| logger.info("Working directory was %s" % directory) |
| failed_commands.append(str(command)) |
| |
| |
| class CodeQLDatabase: |
| |
| def __init__(self, src_path, db_path, codeql_binary_path): |
| """ Construct a new `CodeQLDatabase` object. |
| :param src_path: The path to the chromium/src tree. |
| :param db_path: The path where the CodeQL database will be created. |
| :return: returns nothing |
| """ |
| self.db_path = db_path |
| try: |
| process_stdout = subprocess.check_output([ |
| codeql_binary_path, 'database', 'init', f'--source-root={src_path}', |
| '--language=cpp', db_path, '--overwrite' |
| ]) |
| log_subprocess_output(process_stdout) |
| except subprocess.CalledProcessError: |
| # Presumably failed due to an invalid value for db_path. |
| raise ValueError |
| |
| |
| def index_one_target(target_name, |
| src_path, |
| db_path, |
| codeql_binary_path, |
| out_path, |
| logger, |
| gn_path=None, |
| logfile=None): |
| target_shortname = target_name.split(":")[1] |
| db_path = os.path.join(db_path, target_shortname) |
| os.mkdir(os.path.join(db_path)) |
| |
| start_time = time.time() |
| print("Generating compilation db.") |
| compilation_db = [] |
| |
| print("Fetching all transitive source dependencies for " + str(target_name)) |
| gn_sources_dict = gn_sources_tools.dictionary_of_all_transitive_sources( |
| target_name, out_path, gn_path) |
| initial_compilation_db = get_compilation_db(src_path, out_path) |
| print('Filtering compilation DB to only include matches ' |
| 'from GN transitive dependencies.') |
| compilation_db = [] |
| for entry in initial_compilation_db: |
| if entry['file'] in gn_sources_dict: |
| compilation_db.append(entry) |
| |
| print("Initializing codeql.") |
| codeql_db = "" |
| try: |
| codeql_db = CodeQLDatabase(src_path, db_path, codeql_binary_path) |
| except ValueError: |
| print("Could not initialize CodeQL database at %s" % db_path) |
| exit() |
| |
| print("Tracing compilation.") |
| if (logfile): |
| print("Progress on trace compilation will be reported to %s" % logfile) |
| my_cpu_count = int(multiprocessing.cpu_count()) |
| failed_commands = multiprocessing.Manager().list() |
| successful_commands = multiprocessing.Manager().list() |
| with multiprocessing.Pool(my_cpu_count) as p: |
| results = p.starmap( |
| functools.partial(trace, |
| codeql_binary_path=codeql_binary_path, |
| codeql_db_path=codeql_db.db_path, |
| successful_commands=successful_commands, |
| failed_commands=failed_commands, |
| logger=logger), |
| [(num, entry) for num, entry in enumerate(compilation_db)]) |
| |
| print("Successful commands: %s" % len(successful_commands)) |
| print("Failed commands: %s" % len(failed_commands)) |
| |
| print("Finalizing codeql db.") |
| try: |
| process_stdout = subprocess.check_output( |
| [codeql_binary_path, 'database', 'finalize', '-j=-1', db_path]) |
| log_subprocess_output(process_stdout) |
| except subprocess.CalledProcessError as e: |
| print("CodeQL DB finalization failed with return code %s" % e.returncode) |
| print("stdout: %s" % e.stdout) |
| print("stderr: %s" % e.stderr) |
| print("Database creation complete.") |
| total_time = time.time() - start_time |
| print("Time elapsed:") |
| print(str(total_time)) |
| |
| |
| def main(): |
| print('BEFORE RUNNING THIS SCRIPT: Make sure you have done a *full build*' |
| 'in your --out_dir.') |
| print('This script does not build anything itself, and will fail in strange ' |
| 'ways if there is an empty or incomplete build!"') |
| |
| logger = logging.getLogger('log') |
| logger.setLevel(logging.INFO) |
| actual_cwd = os.getcwd() |
| script_directory = os.path.dirname(os.path.realpath(__file__)) |
| src_path = os.path.join(script_directory, '..', '..') |
| if actual_cwd != os.path.normpath(src_path): |
| print("Failure: Script must be executed from `chromium/src`. Exiting.") |
| print(actual_cwd) |
| print(src_path) |
| exit() |
| |
| print("Parsing command line arguments.") |
| parser = argparse.ArgumentParser( |
| description='Build CodeQL database for Chromium browser process') |
| parser.add_argument( |
| '--out_path', |
| '-o', |
| type=str, |
| default='out/release', |
| help='Relative path inside chromium checkout to build directory') |
| parser.add_argument('--db_path', |
| '-d', |
| type=str, |
| required=True, |
| help='Path to output database') |
| parser.add_argument( |
| '--logfile', |
| '-l', |
| type=str, |
| help="absolute path to logfile for `trace` calls, if desired") |
| parser.add_argument( |
| '--gn_target', |
| '-g', |
| action='append', |
| type=str, |
| help=( |
| 'name for the specific GN target you want a CodeQL database for ' |
| '(e.g. `//components:components_unittests`); if left blank, indexes ' |
| 'everything')) |
| parser.add_argument( |
| '--codeql_binary_path', |
| '-c', |
| type=str, |
| default='codeql', |
| help=('Path to the codeql binary. If this is not set, the script assumes ' |
| 'it is located at `codeql` somewhere in the user\'s PATH.')) |
| parser.add_argument( |
| '--gn_path', |
| type=str, |
| default='gn', |
| help=('Path to the gn executable. If this is not set, the script assumes ' |
| 'it is located at `gn` somehwere in the user\'s PATH.')) |
| args = parser.parse_args() |
| |
| if (args.logfile): |
| ch = logging.FileHandler(args.logfile) |
| ch.setFormatter(logging.Formatter('%(message)s')) |
| logger.addHandler(ch) |
| src_path = os.path.abspath(os.path.expanduser(src_path)) |
| args.db_path = os.path.abspath(os.path.expanduser(args.db_path)) |
| |
| if args.gn_target: |
| index_one_target(args.gn_target, src_path, args.db_path, |
| args.codeql_binary_path, args.out_path, logger, |
| args.gn_path, args.logfile) |
| return |
| |
| # If no args.gn_target given, default to indexing everything in |
| # targets_to_index. |
| for target in targets_to_index.full_targets: |
| index_one_target(target, src_path, args.db_path, args.codeql_binary_path, |
| args.out_path, logger, args.gn_path, args.logfile) |
| |
| if __name__ == '__main__': |
| main() |