blob: 5a1820dc69b24fd60e4d4641ff66344a70b883cb [file]
// Copyright 2020 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package main
import (
"bufio"
"context"
"encoding/json"
"fmt"
"os"
"path/filepath"
"strings"
"sync"
"unique"
"github.com/golang/protobuf/ptypes"
"go.chromium.org/luci/common/logging"
kpb "go.chromium.org/infra/cmd/package_index/kythe/proto"
)
// clangUnit contains all the JSON information for a given clang target.
type clangUnit struct {
Directory string `json:"directory"`
Command string `json:"command"`
File string `json:"file"`
}
// clangUnitInfo contains a clangUnit struct and filepath filename
// for a clang target.
type clangUnitInfo struct {
unit clangUnit
filepathsFile string
}
// ClangTargets contains info for Clang target processing.
type ClangTargets struct {
once sync.Once
filePath string
entries []clangUnit
targetChan chan clangUnit
targetsLen int
// Keep track of the processed files. When useSiso, clangUnit file paths
// will be added; otherwise, processed '*.filepaths' files are added.
filepathsSet *ConcurrentSet
// WaitGroup for closing targetDataOut.
DataWg sync.WaitGroup
// WaitGroup for closing targetUnitOut.
UnitWg sync.WaitGroup
// WaitGroup for deferring processing of compilation units until after data files
// have been processed into kzip entries.
KzipDataWg sync.WaitGroup
}
// NewClangTargets initializes a new ClangTargets struct.
func NewClangTargets(clangTargetsPath string) *ClangTargets {
return &ClangTargets{
filePath: clangTargetsPath,
}
}
// populateChannel parses the compdb file from filePath and fills targetChan with
// the parsed JSON information.
func (clangTargets *ClangTargets) populateChannel() {
if clangTargets.entries == nil {
// Parse JSON
dat, err := os.ReadFile(clangTargets.filePath)
if err != nil {
panic(err)
}
json.Unmarshal(dat, &clangTargets.entries)
clangTargets.targetsLen = len(clangTargets.entries)
clangTargets.filepathsSet = NewConcurrentSet(clangTargets.targetsLen)
clangTargets.targetChan = make(chan clangUnit, clangTargets.targetsLen)
}
for _, target := range clangTargets.entries {
clangTargets.targetChan <- target
}
close(clangTargets.targetChan)
}
// ProcessClangTargets processes clang targets from a given compdb file in clangTargets' filePath.
func (clangTargets *ClangTargets) ProcessClangTargets(ctx context.Context, rootPath, outDir, corpus, buildConfig, clangTargetArch string,
hashMaps *FileHashMap, targetDataOut chan<- string, targetUnitOut chan<- *kpb.CompilationUnit, useSiso bool, targetDepsMap targetDepsMap) error {
// Parse compdb once.
clangTargets.once.Do(clangTargets.populateChannel)
// Channel for deferred compilation unit processing.
clangTargetChan := make(chan *clangUnitInfo, clangTargets.targetsLen)
// Process data files.
for target := range clangTargets.targetChan {
var files []string
var err error
if useSiso {
files, err = getClangFilesFromSisoDeps(ctx, clangTargets.filepathsSet, target, clangTargetChan, targetDepsMap)
} else {
files, err = getClangFilesFromFile(ctx, clangTargets.filepathsSet, target, clangTargetChan)
}
if err != nil {
return err
}
if files == nil {
continue
}
for _, f := range files {
targetDataOut <- f
}
}
close(clangTargetChan)
clangTargets.DataWg.Done()
// Wait for data files to be processed for writing to kzip.
clangTargets.KzipDataWg.Wait()
// Process compilation units.
for clangInfo := range clangTargetChan {
unit, err := getClangUnit(ctx, clangInfo, rootPath, outDir, corpus, buildConfig, clangTargetArch, hashMaps, useSiso, targetDepsMap)
if err != nil {
return err
}
targetUnitOut <- unit
}
clangTargets.UnitWg.Done()
return nil
}
// Read a dependant file name string for a target, normalize and add to
// the final string slice.
func appendToFiles(ctx context.Context, files []string, target clangUnit, file string, filepathsFile string) []string {
fnameRaw := strings.TrimSpace(file)
fname := strings.ReplaceAll(fnameRaw, "//", "/")
if !filepath.IsAbs(fname) {
fname = filepath.Join(target.Directory, fname)
}
if strings.Contains(fname, ".hmap/") {
// See crbug.com/408298859.
logging.Warningf(ctx, "Potential problematic hmap path read from %s: %s (originally %s)", filepathsFile, fname, fnameRaw)
}
// We should not package builtin clang header files, see
// crbug.com/513826
if strings.Contains(fname, "third_party/llvm-build") {
return files
}
files = append(files, fname)
// .pb.h.meta file is required for CC/PB cross references
if _, err := os.Stat(filepath.Join(fname + ".meta")); err == nil && strings.HasSuffix(fname, ".pb.h") {
files = append(files, fname+".meta")
}
return files
}
// Returns a list of filepaths needed for a given clang target.
// Read the source from siso generated dependencies directly.
func getClangFilesFromSisoDeps(ctx context.Context, filepathsSet *ConcurrentSet,
target clangUnit, clangTargetChan chan<- *clangUnitInfo, targetDepsMap targetDepsMap) ([]string, error) {
// Do not process if the same target has been processed.
if !filepathsSet.Add(target.File) {
return nil, nil
}
var files []string
value, ok := targetDepsMap[unique.Make(target.File)]
if !ok {
return files, fmt.Errorf("fail to find dependencies in siso deps query for %s", target.File)
}
// Send to clangTargetChan for deferred unit processing.
// While using siso, we look up m[target.File] and do not need
// filepaths files.
clangTargetChan <- &clangUnitInfo{target, ""}
for _, dep := range value {
files = appendToFiles(ctx, files, target, dep.Value(), target.File)
}
return files, nil
}
// getClangFiles returns a list of filepaths needed for a given clang target.
// Read the source from translation_unit generated *.filepaths files.
func getClangFilesFromFile(ctx context.Context, filepathsSet *ConcurrentSet,
target clangUnit, clangTargetChan chan<- *clangUnitInfo) ([]string, error) {
var files []string
filepathsFile := filepath.Join(target.Directory, target.File+".filepaths")
// We don't want to fail if one of the filepaths doesn't exist. However we
// keep track of it.
if _, err := os.Stat(filepathsFile); os.IsNotExist(err) {
return nil, nil
}
// For some reason, the compilation database contains the same targets more
// than once. However we have just one file containing the file paths of
// the involved files. So we can skip this target if we already processed
// it.
if !filepathsSet.Add(filepathsFile) {
return nil, nil
}
// Send to clangTargetChan for deferred unit processing.
clangTargetChan <- &clangUnitInfo{target, filepathsFile}
// All file paths given in the *.filepaths file are either absolute paths
// or relative to the directory target in the compilation database.
file, err := os.Open(filepathsFile)
if err != nil {
return nil, err
}
scanner := bufio.NewScanner(file)
for scanner.Scan() {
files = appendToFiles(ctx, files, target, scanner.Text(), filepathsFile)
}
if err = file.Close(); err != nil {
return nil, err
}
return files, nil
}
// getClangUnit returns the compilation unit for a given clang target.
func getClangUnit(ctx context.Context, clangInfo *clangUnitInfo, rootPath, outDir, corpus, buildConfig, clangTargetArch string,
hashMaps *FileHashMap, useSiso bool, targetDepsMap targetDepsMap) (*kpb.CompilationUnit, error) {
unitProto := &kpb.CompilationUnit{}
commandList, err := shellSplit(clangInfo.unit.Command)
if err != nil {
return nil, err
}
logging.Debugf(ctx, "Generating Translation Unit data for %s\nCompile command: %s",
clangInfo.unit.File, clangInfo.unit.Command)
// On some platforms, the |command_list| starts with the goma executable,
// followed by the path to the clang executable (either clang++ or
// clang-cl.exe). We want the clang executable to be the first parameter.
for i, cmd := range commandList {
if strings.Contains(cmd, "clang") {
// Shorten the list of commands such that it starts with the path to
// the clang executable.
commandList = commandList[i:]
if clangTargetArch != "" {
commandList = append(commandList, "-target", clangTargetArch)
}
break
}
}
// Extract the output file argument.
var outputFile string
for i, cmd := range commandList {
if cmd == "-o" && i+1 < len(commandList) {
outputFile = commandList[i+1]
break
} else if strings.HasPrefix(cmd, "/Fo") {
// Handle the Windows case.
outputFile = cmd[len("/Fo"):]
break
}
}
if outputFile == "" {
logging.Warningf(ctx, "No output file path found for %s\n", clangInfo.unit.File)
}
if strings.Contains(commandList[0], "clang-cl") {
// Convert any args starting with -imsvc to use forward slashes, since
// this is what Kythe expects.
for i, cmd := range commandList {
if strings.HasPrefix(cmd, "-imsvc") {
commandList[i] = strings.ReplaceAll(cmd, "\\", "/")
}
}
// HACK ALERT: Here we define header guards to prevent Kythe from using
// the CUDA wrapper headers, which cause indexing errors.
// The standard Kythe extractor dumps header search state to help the
// indexer find the right headers, but we don't do that in this script.
// The below lines work around it by excluding the CUDA headers entirely.
commandList = append(commandList,
"-D__CLANG_CUDA_WRAPPERS_NEW",
"-D__CLANG_CUDA_WRAPPERS_COMPLEX",
"-D__CLANG_CUDA_WRAPPERS_ALGORITHM",
)
// Remove any args that may cause errors with the Kythe indexer.
ln := 0
for _, arg := range commandList {
if isUnwantedWinArg(arg) {
continue
}
commandList[ln] = arg
ln++
}
commandList = commandList[:ln]
}
// This macro is used to guard Kythe-specific pragmas, so we must define it
// for Kythe to see them. In particular the kythe_inline_metadata pragma we
// insert into mojom generated files.
commandList = append(commandList, "-DKYTHE_IS_RUNNING=1")
if useSiso {
deps, ok := targetDepsMap[unique.Make(clangInfo.unit.File)]
if !ok {
return nil, fmt.Errorf("getClangUnit: not able to find dependencies for %s", clangInfo.unit.File)
}
for _, dep := range deps {
if err := processAndAddClangUnitInput(ctx, dep.Value(), clangInfo, outDir, corpus, hashMaps, unitProto); err != nil {
return nil, err
}
}
} else {
file, err := os.Open(clangInfo.filepathsFile)
if err != nil {
return nil, err
}
scanner := bufio.NewScanner(file)
for scanner.Scan() {
if err := processAndAddClangUnitInput(ctx, scanner.Text(), clangInfo, outDir, corpus, hashMaps, unitProto); err != nil {
return nil, err
}
}
if err = file.Close(); err != nil {
return nil, err
}
}
unitProto.SourceFile = append(unitProto.SourceFile, clangInfo.unit.File)
unitProto.WorkingDirectory = convertPathToForwardSlashes(clangInfo.unit.Directory)
unitProto.OutputKey = outputFile
unitProto.VName = &kpb.VName{
Corpus: corpus,
Language: "c++",
}
// Add the build config if specified.
if buildConfig != "" {
details := &kpb.BuildDetails{
BuildConfig: buildConfig,
}
any, err := ptypes.MarshalAny(details)
if err != nil {
return nil, err
}
any.TypeUrl = "kythe.io/proto/kythe.proto.BuildDetails"
unitProto.Details = append(unitProto.Details, any)
}
// Disable all warnings with -w so that the indexer can run successfully.
// The job of the indexer is to index the code, not to verify it. Warnings
// we actually care about should show up in the compile step.
unitProto.Argument = append(unitProto.Argument, commandList...)
unitProto.Argument = append(unitProto.Argument, "-w")
return unitProto, nil
}
func processAndAddClangUnitInput(ctx context.Context, fnameRaw string, clangInfo *clangUnitInfo, outDir string, corpus string,
hashMaps *FileHashMap, unitProto *kpb.CompilationUnit) error {
fname := strings.TrimSpace(fnameRaw)
// We should not package builtin clang header files, see
// crbug.com/513826
if strings.Contains(fname, "third_party/llvm-build") {
return nil
}
// The clang tool uses '//' to separate the system path where system
// headers can be found from the relative path used in the #include
// statement.
if strings.Contains(fname, "//") {
fname = strings.ReplaceAll(fname, "//", "/")
}
fnameFullpath, err := addClangUnitInput(ctx,
fname, clangInfo.unit.Directory, outDir, corpus, hashMaps, unitProto)
if err != nil {
return err
}
// .pb.h.meta file is required for CC/PB cross references
if _, err := os.Stat(fnameFullpath + ".meta"); err == nil && strings.HasSuffix(fname, ".pb.h") {
_, err = addClangUnitInput(ctx,
fname+".meta", clangInfo.unit.Directory, outDir, corpus, hashMaps, unitProto)
if err != nil {
return err
}
}
return nil
}
// addClangUnitInput adds required input to unitProto and returns full path to file
// that was added. Used as a helper function in getClangUnit.
func addClangUnitInput(ctx context.Context, fname, dir, outDir, corpus string, hashMaps *FileHashMap,
unitProto *kpb.CompilationUnit) (string, error) {
// Clean up fname and set to absolute path for use in hashMaps.
fname = filepath.Clean(fname)
fnameFullpath := fname
// Paths in *.filepaths files are either absolute or relative to dir.
// Format and clean fnameFullpath to make it consistent with entries in hashMaps.
if !filepath.IsAbs(fnameFullpath) {
fnameFullpathAbs, err := filepath.Abs(filepath.Join(dir, fname))
if err != nil {
return "", err
}
fnameFullpath = fnameFullpathAbs
}
fnameFullpath = filepath.Clean(fnameFullpath)
hash, ok := hashMaps.Filehash(fnameFullpath)
if !ok {
logging.Warningf(ctx, "No information about required input file %s\n", fnameFullpath)
return "", nil
}
// Handle absolute paths - when normalizing we assume paths are
// relative to the output directory (e.g. src/out/Debug).
if filepath.IsAbs(fname) {
fnameRel, err := filepath.Rel(dir, fname)
if err != nil {
return "", err
}
fname = fnameRel
}
vname := &kpb.VName{}
setVnameForFile(vname, convertPathToForwardSlashes(normalizePath(outDir, fname)), corpus)
requiredInput := &kpb.CompilationUnit_FileInput{
VName: vname,
Info: &kpb.FileInfo{
Path: convertPathToForwardSlashes(fname),
Digest: hash,
},
}
unitProto.RequiredInput = append(unitProto.GetRequiredInput(), requiredInput)
return fnameFullpath, nil
}