blob: f38e9fcee3e17eb8b54aa4a2acd411bbe7f5bc39 [file] [log] [blame]
// Copyright 2016 The Goma Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "lib/execreq_normalizer.h"
#include <utility>
#include <vector>
#include "absl/memory/memory.h"
#include "absl/strings/match.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_join.h"
#include "base/path.h"
#include "glog/logging.h"
#include "glog/stl_logging.h"
#include "lib/compiler_flags.h"
#include "lib/path_resolver.h"
#include "lib/path_util.h"
using google::protobuf::RepeatedPtrField;
using ::absl::StrCat;
namespace devtools_goma {
string FixPathToBeCwdRelative::ParseFlagValue(const FlagParser::Flag& flag,
const string& value) {
string normalized_path = PathResolver::WeakRelativePath(value, cwd_);
if (normalized_path != value) {
is_fixed_ = true;
}
return normalized_path;
}
string PathRewriterWithDebugPrefixMap::ParseFlagValue(
const FlagParser::Flag& flag,
const string& value) {
// TODO: need to support Windows?
if (!IsPosixAbsolutePath(value)) {
return value;
}
// RewritePath is used for normalizing paths.
// We MUST eliminate anything in debug-prefix-map.
if (flag.name() == "fdebug-prefix-map") {
removed_fdebug_prefix_map_ = true;
return "";
}
string path = value;
if (RewritePathWithDebugPrefixMap(debug_prefix_map_, &path)) {
is_rewritten_ = true;
return path;
}
return value;
}
bool RewritePathWithDebugPrefixMap(
const std::map<string, string>& debug_prefix_map,
string* path) {
if (path->empty()) {
return false;
}
// See CGDebugInfo::remapDIPath
// https://clang.llvm.org/doxygen/CGDebugInfo_8cpp_source.html
for (const auto& iter : debug_prefix_map) {
if (absl::StartsWith(*path, iter.first)) {
*path = file::JoinPath(iter.second, path->substr(iter.first.length()));
return true;
}
}
return false;
}
// We say debug prefix map is ambiguous when the application order of debug
// prefix map can change the final result.
// For example:
// Suppose we have the following debug prefix maps:
// /A = /X (1)
// /A/B = /Y (2)
// and we want to rewrite /A/B/C.
// /A/B/C is written to /X/B/C with (1), but is also written to /Y/C with (2).
// When such a case happens, we say debug prefix map is ambiguous.
//
// In clang and gcc, only first matched rule is used to rewrite path.
// https://clang.llvm.org/doxygen/CGDebugInfo_8cpp_source.html
// (CGDebugInfo::remapDIPath)
// https://github.com/gcc-mirror/gcc/blob/460902cc8ac206904e7f1763f197927be87b122f/gcc/final.c#L1562
//
// TODO: If the application order of debug_prefix_map is written-order,
// using std::vector<std::pair<string, string>> looks better than
// std::map<string, string>?
bool HasAmbiguityInDebugPrefixMap(
const std::map<string, string>& debug_prefix_map) {
if (debug_prefix_map.size() <= 1) {
return false;
}
string prev;
for (const auto& path : debug_prefix_map) {
if (!prev.empty() && absl::StartsWith(path.first, prev)) {
return true;
}
prev = path.first;
}
return false;
}
void ConfigurableExecReqNormalizer::NormalizeExecReqSystemIncludeDirs(
int keep_system_include_dirs,
const std::map<string, string>& debug_prefix_map,
const string& debug_prefix_map_signature,
ExecReq* req) const {
if (keep_system_include_dirs & kAsIs) {
return;
}
// Hack for non-system-default compilers e.g. NaCl and clang.
// Normalize following paths to be given with the relative path:
// - system_include_path
// - cxx_system_include_path
//
// Already cleared:
// - local_compiler_path
//
// Note:
// Since followings are usually pointing the system default paths,
// we do not normalize them.
// - system_framework_path
// - system_library_path
CommandSpec* normalized_spec = req->mutable_command_spec();
// To avoid yet another cache poisoning, we should separate cache area.
// i.e. include_paths with relative paths is given but misunderstand
// it as not normalized.
if (keep_system_include_dirs & kNormalizeWithDebugPrefixMap) {
bool is_normalized = false;
for (auto& path : *normalized_spec->mutable_system_include_path()) {
is_normalized |= RewritePathWithDebugPrefixMap(debug_prefix_map, &path);
}
for (auto& path : *normalized_spec->mutable_cxx_system_include_path()) {
is_normalized |= RewritePathWithDebugPrefixMap(debug_prefix_map, &path);
}
if (is_normalized) {
normalized_spec->mutable_comment()->append(" include_path:" +
debug_prefix_map_signature);
}
} else if (keep_system_include_dirs & kNormalizeWithCwd) {
bool is_include_path_normalized = false;
for (auto& path : *normalized_spec->mutable_system_include_path()) {
string normalized_path = PathResolver::WeakRelativePath(path, req->cwd());
if (path != normalized_path) {
path.assign(normalized_path);
is_include_path_normalized = true;
}
}
for (auto& path : *normalized_spec->mutable_cxx_system_include_path()) {
string normalized_path = PathResolver::WeakRelativePath(path, req->cwd());
if (path != normalized_path) {
path.assign(normalized_path);
is_include_path_normalized = true;
}
}
if (is_include_path_normalized) {
normalized_spec->mutable_comment()->append(" include_path:cwd");
}
} else if (keep_system_include_dirs == kOmit) {
normalized_spec->clear_system_include_path();
normalized_spec->clear_cxx_system_include_path();
normalized_spec->mutable_comment()->append(" omit_include_path:");
} else {
DLOG(FATAL) << "Unexpected keep_system_include_dirs="
<< keep_system_include_dirs;
}
}
void ConfigurableExecReqNormalizer::NormalizeExecReqArgs(
int keep_args,
const std::vector<string>& args,
const std::vector<string>& normalize_weak_relative_for_arg,
const std::map<string, string>& debug_prefix_map,
const string& debug_prefix_map_signature,
ExecReq* req) const {
DCHECK(keep_args & kAsIs) << keep_args;
LOG_IF(ERROR, (keep_args & kAsIs) == 0)
<< "NormalizeExecReqArgs's default implementation is not provided. "
<< "keep_args must have kAsIs. To implement normalization, provide "
<< "compiler specific one.";
}
void ConfigurableExecReqNormalizer::NormalizeExecReqPathnamesInInput(
int keep_pathnames_in_input,
const std::map<string, string>& debug_prefix_map,
const string& debug_prefix_map_signature,
ExecReq* req) const {
if (keep_pathnames_in_input & kAsIs) {
return;
}
bool is_rewritten_debug_prefix_map = false;
bool is_rewritten_cwd = false;
bool is_removed = false;
for (auto& input : *req->mutable_input()) {
if (keep_pathnames_in_input & kNormalizeWithDebugPrefixMap) {
RewritePathWithDebugPrefixMap(debug_prefix_map, input.mutable_filename());
is_rewritten_debug_prefix_map = true;
} else if (keep_pathnames_in_input & kNormalizeWithCwd) {
input.set_filename(
PathResolver::WeakRelativePath(input.filename(), req->cwd()));
is_rewritten_cwd = true;
} else {
DLOG(FATAL) << "Unexpected keep_pathnames_in_input="
<< keep_pathnames_in_input;
}
}
CommandSpec* normalized_spec = req->mutable_command_spec();
if (is_rewritten_debug_prefix_map) {
normalized_spec->mutable_comment()->append(" pathnames_in_input:" +
debug_prefix_map_signature);
}
if (is_rewritten_cwd) {
normalized_spec->mutable_comment()->append(" pathnames_in_input:cwd");
}
if (is_removed) {
normalized_spec->mutable_comment()->append(" pathnames_in_input:removed");
}
}
void ConfigurableExecReqNormalizer::NormalizeExecReqCwd(
int keep_cwd,
const absl::optional<string>& new_cwd,
const std::map<string, string>& debug_prefix_map,
const string& debug_prefix_map_signature,
ExecReq* req) const {
if (keep_cwd & kAsIs) {
return;
}
bool is_rewritten = false;
bool is_removed = false;
bool is_replaced = false;
static const char kPwd[] = "PWD=";
if (keep_cwd & kNormalizeWithDebugPrefixMap) {
// If there is PWD= in env, replace cwd with content of PWD=.
for (const auto& env_var : req->env()) {
if (absl::StartsWith(env_var, kPwd)) {
*req->mutable_cwd() = env_var.substr(strlen(kPwd));
break;
}
}
if (new_cwd) {
// fdebug-compilation-dir is applied before fdebug-prefix-map when we use
// fdebug-prefix-map.
req->set_cwd(*new_cwd);
is_replaced = true;
}
RewritePathWithDebugPrefixMap(debug_prefix_map, req->mutable_cwd());
is_rewritten = true;
} else if (new_cwd) {
req->set_cwd(*new_cwd);
is_replaced = true;
} else {
req->clear_cwd();
is_removed = true;
}
// Drop PWD from env.
auto it = req->mutable_env()->begin();
while (it != req->mutable_env()->end()) {
if (absl::StartsWith(*it, kPwd)) {
if (keep_cwd & kNormalizeWithDebugPrefixMap) {
string path = it->substr(strlen(kPwd));
RewritePathWithDebugPrefixMap(debug_prefix_map, &path);
*it = StrCat(kPwd, path);
is_rewritten = true;
++it;
} else {
it = req->mutable_env()->erase(it);
is_removed = true;
}
} else {
++it;
}
}
CommandSpec* normalized_spec = req->mutable_command_spec();
if (is_rewritten) {
normalized_spec->mutable_comment()->append(" cwd:" +
debug_prefix_map_signature);
}
if (is_replaced) {
normalized_spec->mutable_comment()->append(" cwd:replaced");
}
if (is_removed) {
normalized_spec->mutable_comment()->append(" cwd:removed");
}
}
void ConfigurableExecReqNormalizer::NormalizeExecReqSubprograms(
ExecReq* req) const {
// normalize subprogram. path names are not needed for cache key.
for (auto& s : *req->mutable_subprogram()) {
s.clear_path();
}
}
void ConfigurableExecReqNormalizer::NormalizeExecReqEnvs(ExecReq* req) const {
std::vector<string> new_env;
bool changed = false;
for (const auto& env_var : req->env()) {
if (absl::StartsWith(env_var, "DEVELOPER_DIR=")) {
changed = true;
continue;
}
new_env.push_back(env_var);
}
if (changed) {
req->clear_env();
for (auto&& env_var : new_env) {
req->add_env(std::move(env_var));
}
}
}
void ConfigurableExecReqNormalizer::NormalizeExecReqOutputFilesAndDirs(
ExecReq* req) const {
// Just sort.
std::sort(req->mutable_expected_output_files()->begin(),
req->mutable_expected_output_files()->end());
std::sort(req->mutable_expected_output_dirs()->begin(),
req->mutable_expected_output_dirs()->end());
}
// ExecReq_Inputs are sorted by filename now. However, cwd can be different
// among computers, and filename might contain cwd. So the essentially same
// ExecReq might have different hash values, even if cwd in ExecReq and
// filenames in ExecReq_Input are cleared.
// So we reorder ExecReq_Inputs so that ExecReq_Input whose filename starts with
// cwd come first.
//
// For example: When cwd = /usr/local/google/home/foo/build,
// the following ExecReq_Inputs
// ExecReq_Input { filename: /usr/include/stdio.h, ... }
// ...
// ...
// ExecReq_Input { filename: /usr/local/google/home/foo/build/main.cc, ...}
// will be reorderd to
// ExecReq_Input { filename: /usr/local/google/home/foo/build/main.cc, ...}
// ExecReq_Input { filename: /usr/include/stdio.h, ... }
// ...
// ...
//
// See also b/11455957
void ConfigurableExecReqNormalizer::NormalizeExecReqInputOrderForCacheKey(
ExecReq* req) const {
std::vector<const ExecReq_Input*> inputs_in_cwd;
std::vector<const ExecReq_Input*> inputs_not_in_cwd;
inputs_in_cwd.reserve(req->input_size());
inputs_not_in_cwd.reserve(req->input_size());
for (const auto& input : req->input()) {
if (absl::StartsWith(input.filename(), req->cwd())) {
inputs_in_cwd.push_back(&input);
} else {
inputs_not_in_cwd.push_back(&input);
}
}
RepeatedPtrField<ExecReq_Input> new_inputs;
new_inputs.Reserve(req->input_size());
// Inputs whose filename starting with cwd come first.
for (const auto& input : inputs_in_cwd) {
*new_inputs.Add() = *input;
}
for (const auto& input : inputs_not_in_cwd) {
*new_inputs.Add() = *input;
}
DCHECK_EQ(new_inputs.size(), req->input_size());
req->mutable_input()->Swap(&new_inputs);
}
void ConfigurableExecReqNormalizer::NormalizeForCacheKey(
int id,
bool normalize_include_path,
bool is_linking,
const std::vector<string>& normalize_weak_relative_for_arg,
const std::map<string, string>& debug_prefix_map,
ExecReq* req) const {
req->clear_requester_info();
req->clear_cache_policy();
req->clear_requester_env();
for (auto& input : *req->mutable_input()) {
input.clear_content();
}
req->mutable_command_spec()->clear_local_compiler_path();
const string& command_name = req->command_spec().name();
LOG_IF(ERROR, command_name.empty())
<< "empty command_spec.name:" << req->command_spec().DebugString();
std::vector<string> args;
// Normalize args.
// we use CommandSpec.name for arg(0) for cache key.
// see b/11973647
if (req->expanded_arg_size() > 0) {
req->set_expanded_arg(0, command_name);
req->clear_arg();
std::copy(req->expanded_arg().begin(), req->expanded_arg().end(),
back_inserter(args));
} else if (req->arg_size() > 0) {
req->set_arg(0, command_name);
std::copy(req->arg().begin(), req->arg().end(), back_inserter(args));
}
Config config =
Configure(id, args, normalize_include_path, is_linking,
normalize_weak_relative_for_arg, debug_prefix_map, req);
LOG(INFO) << id << ": normalize:"
<< " keep_cwd=" << config.keep_cwd
<< " keep_args=" << config.keep_args
<< " keep_pathnames_in_input=" << config.keep_pathnames_in_input
<< " keep_system_include_dirs=" << config.keep_system_include_dirs;
string debug_prefix_map_signature;
if (!debug_prefix_map.empty()) {
debug_prefix_map_signature += "debug_prefix_map:";
for (const auto& iter : debug_prefix_map) {
debug_prefix_map_signature += iter.second;
debug_prefix_map_signature += ",";
}
}
// TODO: confirm output does not contains path in include_path
// for the situation we normalize the include path name.
NormalizeExecReqSystemIncludeDirs(config.keep_system_include_dirs,
debug_prefix_map,
debug_prefix_map_signature, req);
NormalizeExecReqArgs(config.keep_args, args, normalize_weak_relative_for_arg,
debug_prefix_map, debug_prefix_map_signature, req);
// This method needs cwd and filename in ExecReq_Input.
// So, do before processing keep_pathnames and keep_cwd.
NormalizeExecReqInputOrderForCacheKey(req);
NormalizeExecReqPathnamesInInput(config.keep_pathnames_in_input,
debug_prefix_map, debug_prefix_map_signature,
req);
NormalizeExecReqCwd(config.keep_cwd, config.new_cwd, debug_prefix_map,
debug_prefix_map_signature, req);
NormalizeExecReqSubprograms(req);
NormalizeExecReqEnvs(req);
NormalizeExecReqOutputFilesAndDirs(req);
}
ConfigurableExecReqNormalizer::Config AsIsExecReqNormalizer::Configure(
int id,
const std::vector<string>& args,
bool normalize_include_path,
bool is_linking,
const std::vector<string>& normalize_weak_relative_for_arg,
const std::map<string, string>& debug_prefix_map,
const ExecReq* req) const {
return Config::AsIs();
}
} // namespace devtools_goma