blob: 1db207b94c6c78fdacfc8830ce98d2978b32e645 [file] [log] [blame]
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package main
import (
kpb "infra/cmd/package_index/kythe/proto"
// clangUnit contains all the JSON information for a given clang target.
type clangUnit struct {
Directory string `json:"directory"`
Command string `json:"command"`
File string `json:"file"`
// clangUnitInfo contains a clangUnit struct and filepath filename
// for a clang target.
type clangUnitInfo struct {
unit clangUnit
filepathsFn string
// ClangTargets contains info for Clang target processing.
type ClangTargets struct {
once sync.Once
filePath string
entries []clangUnit
targetChan chan clangUnit
targetsLen int
// Keeps track of the '*.filepaths' files already processed.
filepathsSet *ConcurrentSet
// WaitGroup for closing targetDataOut.
DataWg sync.WaitGroup
// WaitGroup for closing targetUnitOut.
UnitWg sync.WaitGroup
// WaitGroup for deferring processing of compilation units until after data files
// have been processed into kzip entries.
KzipDataWg sync.WaitGroup
// NewClangTargets initializes a new ClangTargets struct.
func NewClangTargets(clangTargetsPath string) *ClangTargets {
return &ClangTargets{
filePath: clangTargetsPath,
// populateChannel parses the compdb file from filePath and fills targetChan with
// the parsed JSON information.
func (clangTargets *ClangTargets) populateChannel() {
if clangTargets.entries == nil {
// Parse JSON
dat, err := ioutil.ReadFile(clangTargets.filePath)
if err != nil {
json.Unmarshal(dat, &clangTargets.entries)
clangTargets.targetsLen = len(clangTargets.entries)
clangTargets.filepathsSet = NewConcurrentSet(clangTargets.targetsLen)
clangTargets.targetChan = make(chan clangUnit, clangTargets.targetsLen)
for _, target := range clangTargets.entries {
clangTargets.targetChan <- target
// ProcessClangTargets processes clang targets from a given compdb file in clangTargets' filePath.
func (clangTargets *ClangTargets) ProcessClangTargets(ctx context.Context, rootPath, outDir, corpus, buildConfig string,
hashMaps *FileHashMap, targetDataOut chan<- string, targetUnitOut chan<- *kpb.CompilationUnit) error {
// Parse compdb once.
// Channel for deferred compilation unit processing.
clangTargetChan := make(chan *clangUnitInfo, clangTargets.targetsLen)
// Process data files.
for target := range clangTargets.targetChan {
files, err := getClangFiles(ctx, clangTargets.filepathsSet, target, clangTargetChan)
if err != nil {
return err
if files == nil {
for _, f := range files {
targetDataOut <- f
// Wait for data files to be processed for writing to kzip.
// Process compilation units.
for clangInfo := range clangTargetChan {
unit, err := getClangUnit(ctx, clangInfo, rootPath, outDir, corpus, buildConfig, hashMaps)
if err != nil {
return err
targetUnitOut <- unit
return nil
// getClangFiles returns a list of filepaths needed for a given clang target.
func getClangFiles(ctx context.Context, filepathsSet *ConcurrentSet,
target clangUnit, clangTargetChan chan<- *clangUnitInfo) ([]string, error) {
var files []string
filepathsFn := filepath.Join(target.Directory, target.File+".filepaths")
// We don't want to fail if one of the filepaths doesn't exist. However we
// keep track of it.
if _, err := os.Stat(filepathsFn); os.IsNotExist(err) {
return nil, nil
// For some reason, the compilation database contains the same targets more
// than once. However we have just one file containing the file paths of
// the involved files. So we can skip this target if we already processed
// it.
if !filepathsSet.Add(filepathsFn) {
return nil, nil
// Send to clangTargetChan for deferred unit processing.
clangTargetChan <- &clangUnitInfo{target, filepathsFn}
// All file paths given in the *.filepaths file are either absolute paths
// or relative to the directory target in the compilation database.
file, err := os.Open(filepathsFn)
if err != nil {
return nil, err
scanner := bufio.NewScanner(file)
for scanner.Scan() {
// Each line in the '*.filepaths' file references the path to a source
// file involved in the compilation.
fname := strings.ReplaceAll(strings.TrimSpace(scanner.Text()), "//", "/")
if !filepath.IsAbs(fname) {
fname = filepath.Join(target.Directory, fname)
// We should not package builtin clang header files, see
if strings.Contains(fname, "third_party/llvm-build") {
files = append(files, fname)
// .pb.h.meta file is required for CC/PB cross references
if _, err := os.Stat(filepath.Join(fname + ".meta")); err == nil && strings.HasSuffix(fname, ".pb.h") {
files = append(files, fname+".meta")
if err = file.Close(); err != nil {
return nil, err
return files, nil
// getClangUnit returns the compilation unit for a given clang target.
func getClangUnit(ctx context.Context, clangInfo *clangUnitInfo, rootPath, outDir, corpus, buildConfig string,
hashMaps *FileHashMap) (*kpb.CompilationUnit, error) {
unitProto := &kpb.CompilationUnit{}
commandList, err := shellSplit(clangInfo.unit.Command)
if err != nil {
return nil, err
logging.Debugf(ctx, "Generating Translation Unit data for %s\nCompile command: %s",
clangInfo.unit.File, clangInfo.unit.Command)
// On some platforms, the |command_list| starts with the goma executable,
// followed by the path to the clang executable (either clang++ or
// clang-cl.exe). We want the clang executable to be the first parameter.
for i, cmd := range commandList {
if strings.Contains(cmd, "clang") {
// Shorten the list of commands such that it starts with the path to
// the clang executable.
commandList = commandList[i:]
// Extract the output file argument.
var outputFile string
for i, cmd := range commandList {
if cmd == "-o" && i+1 < len(commandList) {
outputFile = commandList[i+1]
} else if strings.HasPrefix(cmd, "/Fo") {
// Handle the Windows case.
outputFile = cmd[len("/Fo"):]
if outputFile == "" {
logging.Warningf(ctx, "No output file path found for %s\n", clangInfo.unit.File)
if strings.Contains(commandList[0], "clang-cl") {
// Convert any args starting with -imsvc to use forward slashes, since
// this is what Kythe expects.
for i, cmd := range commandList {
if strings.HasPrefix(cmd, "-imsvc") {
commandList[i] = strings.ReplaceAll(cmd, "\\", "/")
// HACK ALERT: Here we define header guards to prevent Kythe from using
// the CUDA wrapper headers, which cause indexing errors.
// The standard Kythe extractor dumps header search state to help the
// indexer find the right headers, but we don't do that in this script.
// The below lines work around it by excluding the CUDA headers entirely.
commandList = append(commandList,
// Remove any args that may cause errors with the Kythe indexer.
ln := 0
for _, arg := range commandList {
if isUnwantedWinArg(arg) {
commandList[ln] = arg
commandList = commandList[:ln]
// This macro is used to guard Kythe-specific pragmas, so we must define it
// for Kythe to see them. In particular the kythe_inline_metadata pragma we
// insert into mojom generated files.
commandList = append(commandList, "-DKYTHE_IS_RUNNING=1")
file, err := os.Open(clangInfo.filepathsFn)
if err != nil {
return nil, err
scanner := bufio.NewScanner(file)
for scanner.Scan() {
fname := strings.TrimSpace(scanner.Text())
// We should not package builtin clang header files, see
if strings.Contains(fname, "third_party/llvm-build") {
// The clang tool uses '//' to separate the system path where system
// headers can be found from the relative path used in the #include
// statement.
if strings.Contains(fname, "//") {
fname = strings.ReplaceAll(fname, "//", "/")
fnameFullpath, err := addClangUnitInput(ctx,
fname, clangInfo.unit.Directory, outDir, corpus, hashMaps, unitProto)
if err != nil {
return nil, err
// .pb.h.meta file is required for CC/PB cross references
if _, err := os.Stat(fnameFullpath + ".meta"); err == nil && strings.HasSuffix(fname, ".pb.h") {
_, err = addClangUnitInput(ctx,
fname+".meta", clangInfo.unit.Directory, outDir, corpus, hashMaps, unitProto)
if err != nil {
return nil, err
if err = file.Close(); err != nil {
return nil, err
unitProto.SourceFile = append(unitProto.SourceFile, clangInfo.unit.File)
unitProto.WorkingDirectory = convertPathToForwardSlashes(clangInfo.unit.Directory)
unitProto.OutputKey = outputFile
unitProto.VName = &kpb.VName{
Corpus: corpusForFile(ctx, clangInfo.unit.File, corpus),
Language: "c++",
// Add the build config if specified.
if buildConfig != "" {
details := &kpb.BuildDetails{
BuildConfig: buildConfig,
any, err := ptypes.MarshalAny(details)
if err != nil {
return nil, err
any.TypeUrl = ""
unitProto.Details = append(unitProto.Details, any)
// Disable all warnings with -w so that the indexer can run successfully.
// The job of the indexer is to index the code, not to verify it. Warnings
// we actually care about should show up in the compile step.
unitProto.Argument = append(unitProto.Argument, commandList...)
unitProto.Argument = append(unitProto.Argument, "-w")
return unitProto, nil
// addClangUnitInput adds required input to unitProto and returns full path to file
// that was added. Used as a helper function in getClangUnit.
func addClangUnitInput(ctx context.Context, fname, dir, outDir, corpus string, hashMaps *FileHashMap,
unitProto *kpb.CompilationUnit) (string, error) {
// Clean up fname and set to absolute path for use in hashMaps.
fname = filepath.Clean(fname)
fnameFullpath := fname
// Paths in *.filepaths files are either absolute or relative to dir.
// Format and clean fnameFullpath to make it consistent with entries in hashMaps.
if !filepath.IsAbs(fnameFullpath) {
fnameFullpathAbs, err := filepath.Abs(filepath.Join(dir, fname))
if err != nil {
return "", err
fnameFullpath = fnameFullpathAbs
fnameFullpath = filepath.Clean(fnameFullpath)
hash, ok := hashMaps.Filehash(fnameFullpath)
if !ok {
logging.Warningf(ctx, "No information about required input file %s\n", fnameFullpath)
return "", nil
// Handle absolute paths - when normalizing we assume paths are
// relative to the output directory (e.g. src/out/Debug).
if filepath.IsAbs(fname) {
fnameRel, err := filepath.Rel(dir, fname)
if err != nil {
return "", err
fname = fnameRel
vname := &kpb.VName{}
setVnameForFile(ctx, vname, convertPathToForwardSlashes(normalizePath(outDir, fname)), corpus)
requiredInput := &kpb.CompilationUnit_FileInput{
VName: vname,
Info: &kpb.FileInfo{
Path: convertPathToForwardSlashes(fname),
Digest: hash,
unitProto.RequiredInput = append(unitProto.GetRequiredInput(), requiredInput)
return fnameFullpath, nil