Enable propagating env vars to binaries started by Centipede.

I added a new Centipede flag `env_diff_for_binaries`, which allows specifying
env vars to set and unset relative to the environment of the parent process.

I modified the interface of `Command` to match the spec of the added flag for
setting/unsetting env vars.

With the new mechanism, I was able to move the list of Bazel TEST_* vars that
need to be unset out of the engine code and into the C++ framework code.

PiperOrigin-RevId: 899674482
diff --git a/centipede/analyze_corpora_test.cc b/centipede/analyze_corpora_test.cc
index cf00c54..c349ec3 100644
--- a/centipede/analyze_corpora_test.cc
+++ b/centipede/analyze_corpora_test.cc
@@ -57,8 +57,8 @@
   bool uses_legacy_trace_pc_instrumentation = {};
   BinaryInfo binary_info;
   binary_info.InitializeFromSanCovBinary(
-      GetTargetPath(), GetObjDumpPath(), GetLLVMSymbolizerPath(),
-      GetTestTempDir(test_info_->name()).string());
+      GetTargetPath(), /*env_diff=*/{}, GetObjDumpPath(),
+      GetLLVMSymbolizerPath(), GetTestTempDir(test_info_->name()).string());
   const auto &pc_table = binary_info.pc_table;
   EXPECT_FALSE(uses_legacy_trace_pc_instrumentation);
   const SymbolTable &symbols = binary_info.symbols;
@@ -93,7 +93,8 @@
 
   const std::string test_tmpdir = GetTestTempDir(test_info_->name());
   BinaryInfo binary_info;
-  binary_info.InitializeFromSanCovBinary(GetTargetPath(), GetObjDumpPath(),
+  binary_info.InitializeFromSanCovBinary(GetTargetPath(), /*env_diff=*/{},
+                                         GetObjDumpPath(),
                                          GetLLVMSymbolizerPath(), test_tmpdir);
   CoverageResults coverage_results =
       GetCoverage(corpus_records, std::move(binary_info));
diff --git a/centipede/binary_info.cc b/centipede/binary_info.cc
index 4517c98..0d02c53 100644
--- a/centipede/binary_info.cc
+++ b/centipede/binary_info.cc
@@ -40,8 +40,9 @@
 }  // namespace
 
 void BinaryInfo::InitializeFromSanCovBinary(
-    std::string_view binary_path_with_args, std::string_view objdump_path,
-    std::string_view symbolizer_path, std::string_view tmp_dir_path) {
+    std::string_view binary_path_with_args, std::vector<std::string> env_diff,
+    std::string_view objdump_path, std::string_view symbolizer_path,
+    std::string_view tmp_dir_path) {
   if (binary_path_with_args.empty()) {
     // This usually happens in tests.
     FUZZTEST_LOG(INFO) << __func__ << ": binary_path_with_args is empty";
@@ -58,10 +59,11 @@
       std::filesystem::path{tmp_dir_path} / "binary_info_log_tmp";
   FUZZTEST_LOG(INFO) << __func__ << ": tmp_dir: " << tmp_dir;
 
-  Command::Options cmd_options;
-  cmd_options.env_add = {absl::StrCat(
+  env_diff.push_back(absl::StrCat(
       "CENTIPEDE_RUNNER_FLAGS=:dump_binary_info:arg1=", pc_table_path.path(),
-      ":arg2=", cf_table_path.path(), ":arg3=", dso_table_path.path(), ":")};
+      ":arg2=", cf_table_path.path(), ":arg3=", dso_table_path.path(), ":"));
+  Command::Options cmd_options;
+  cmd_options.env_diff = std::move(env_diff);
   cmd_options.stdout_file_prefix = log_prefix;
   Command cmd{binary_path_with_args, std::move(cmd_options)};
   int exit_code = cmd.Execute();
diff --git a/centipede/binary_info.h b/centipede/binary_info.h
index 9a1e53d..f082fa4 100644
--- a/centipede/binary_info.h
+++ b/centipede/binary_info.h
@@ -15,7 +15,9 @@
 #ifndef THIRD_PARTY_CENTIPEDE_BINARY_INFO_H_
 #define THIRD_PARTY_CENTIPEDE_BINARY_INFO_H_
 
+#include <string>
 #include <string_view>
+#include <vector>
 
 #include "./centipede/call_graph.h"
 #include "./centipede/control_flow.h"
@@ -39,9 +41,13 @@
   // `uses_legacy_trace_pc_instrumentation` based on `binary_path_with_args`.
   // * `binary_path_with_args` is the path to the instrumented binary,
   // possibly with space-separated arguments.
+  // * `env_diff` is a list of environment variables to set (given as `VAR=val`)
+  //    or unset (given as `-VAR`) when running the binary, relative to the
+  //    parent process.
   // * `objdump_path` and `symbolizer_path` are paths to respective tools.
   // * `tmp_dir_path` is a path to a temp dir, that must exist.
   void InitializeFromSanCovBinary(std::string_view binary_path_with_args,
+                                  std::vector<std::string> env_diff,
                                   std::string_view objdump_path,
                                   std::string_view symbolizer_path,
                                   std::string_view tmp_dir_path);
diff --git a/centipede/centipede_callbacks.cc b/centipede/centipede_callbacks.cc
index 6537212..af247e1 100644
--- a/centipede/centipede_callbacks.cc
+++ b/centipede/centipede_callbacks.cc
@@ -290,38 +290,10 @@
   int conn_socket_ = -1;
 };
 
-namespace {
-
-// When running a test binary in a subprocess, we don't want these environment
-// variables to be inherited and affect the execution of the tests.
-//
-// See list of environment variables here:
-// https://bazel.build/reference/test-encyclopedia#initial-conditions
-//
-// TODO(fniksic): Add end-to-end tests that make sure we don't observe the
-// effects of these variables in the test binary.
-std::vector<std::string> EnvironmentVariablesToUnset() {
-  return {"TEST_DIAGNOSTICS_OUTPUT_DIR",              //
-          "TEST_INFRASTRUCTURE_FAILURE_FILE",         //
-          "TEST_LOGSPLITTER_OUTPUT_FILE",             //
-          "TEST_PREMATURE_EXIT_FILE",                 //
-          "TEST_RANDOM_SEED",                         //
-          "TEST_RUN_NUMBER",                          //
-          "TEST_SHARD_INDEX",                         //
-          "TEST_SHARD_STATUS_FILE",                   //
-          "TEST_TOTAL_SHARDS",                        //
-          "TEST_UNDECLARED_OUTPUTS_ANNOTATIONS_DIR",  //
-          "TEST_UNDECLARED_OUTPUTS_DIR",              //
-          "TEST_WARNINGS_OUTPUT_FILE",                //
-          "GTEST_OUTPUT",                             //
-          "XML_OUTPUT_FILE"};
-}
-
-}  // namespace
-
 void CentipedeCallbacks::PopulateBinaryInfo(BinaryInfo& binary_info) {
   binary_info.InitializeFromSanCovBinary(
-      env_.coverage_binary, env_.objdump_path, env_.symbolizer_path, temp_dir_);
+      env_.coverage_binary, env_.env_diff_for_binaries, env_.objdump_path,
+      env_.symbolizer_path, temp_dir_);
   // Check the PC table.
   if (binary_info.pc_table.empty()) {
     if (env_.require_pc_table) {
@@ -415,7 +387,8 @@
         std::make_unique<CentipedeCallbacks::PersistentModeServer>(
             std::move(server_path));
   }
-  std::vector<std::string> env = {ConstructRunnerFlags(
+  std::vector<std::string> env_diff = env_.env_diff_for_binaries;
+  env_diff.push_back(ConstructRunnerFlags(
       absl::StrCat(":shmem:test=", env_.test_name, ":arg1=",
                    inputs_blobseq_.path(), ":arg2=", outputs_blobseq_.path(),
                    ":failure_description_path=", failure_description_path_,
@@ -425,16 +398,16 @@
                        : absl::StrCat(":persistent_mode_socket=",
                                       persistent_mode_server->server_path()),
                    ":"),
-      disable_coverage)};
+      disable_coverage));
 
-  if (env_.clang_coverage_binary == binary)
-    env.emplace_back(
+  if (env_.clang_coverage_binary == binary) {
+    env_diff.push_back(
         absl::StrCat("LLVM_PROFILE_FILE=",
                      WorkDir{env_}.SourceBasedCoverageRawProfilePath()));
+  }
 
   Command::Options cmd_options;
-  cmd_options.env_add = std::move(env);
-  cmd_options.env_remove = EnvironmentVariablesToUnset();
+  cmd_options.env_diff = std::move(env_diff);
   cmd_options.stdout_file_prefix = execute_log_prefix_;
   cmd_options.stderr_file_prefix = execute_log_prefix_;
   cmd_options.temp_file_path = temp_input_file_path_;
@@ -642,8 +615,8 @@
                     "dl_path_suffix=", env_.runner_dl_path_suffix, ":");
   }
   Command::Options cmd_options;
-  cmd_options.env_add = {std::move(centipede_runner_flags)};
-  cmd_options.env_remove = EnvironmentVariablesToUnset();
+  cmd_options.env_diff = env_.env_diff_for_binaries;
+  cmd_options.env_diff.push_back(std::move(centipede_runner_flags));
   cmd_options.stdout_file_prefix = execute_log_prefix_;
   cmd_options.stderr_file_prefix = execute_log_prefix_;
   cmd_options.temp_file_path = temp_input_file_path_;
@@ -707,8 +680,8 @@
                     "dl_path_suffix=", env_.runner_dl_path_suffix, ":");
   }
   Command::Options cmd_options;
-  cmd_options.env_add = {std::move(centipede_runner_flags)};
-  cmd_options.env_remove = EnvironmentVariablesToUnset();
+  cmd_options.env_diff = env_.env_diff_for_binaries;
+  cmd_options.env_diff.push_back(std::move(centipede_runner_flags));
   cmd_options.stdout_file_prefix = execute_log_prefix_;
   cmd_options.stderr_file_prefix = execute_log_prefix_;
   cmd_options.temp_file_path = temp_input_file_path_;
diff --git a/centipede/centipede_flags.inc b/centipede/centipede_flags.inc
index 1a171d7..843e298 100644
--- a/centipede/centipede_flags.inc
+++ b/centipede/centipede_flags.inc
@@ -484,3 +484,9 @@
     bool, report_crash_summary, true,
     "If set, reports a summary of crashes found during fuzzing or replay."
 )
+CENTIPEDE_FLAG(
+    std::vector<std::string>, env_diff_for_binaries, {},
+    "A comma-separated list of environment variables to set or unset when "
+    "running the target binary, coverage binary, and extra binaries, relative "
+    "to the parent process's environment. Variables in the form 'VAR=VAL' are "
+    "set, and variables in the form '-VAR' are unset.")
diff --git a/centipede/command.cc b/centipede/command.cc
index 3071428..4c76598 100644
--- a/centipede/command.cc
+++ b/centipede/command.cc
@@ -204,16 +204,22 @@
 
 std::string Command::ToString() const {
   std::vector<std::string> ss;
-  ss.reserve(/*env*/ 1 + options_.env_add.size() + options_.env_remove.size() +
-             /*path*/ 1 + /*args*/ options_.args.size() + /*out/err*/ 2);
+  ss.reserve(/*env*/ 1 + options_.env_diff.size() + /*path*/ 1 +
+             /*args*/ options_.args.size() + /*out/err*/ 2);
   // env.
   ss.push_back("env");
+  std::vector<std::string> env_to_set;
+  env_to_set.reserve(options_.env_diff.size());
   // Arguments that unset environment variables must appear first.
-  for (const auto& var : options_.env_remove) {
-    ss.push_back(absl::StrCat("-u ", var));
+  for (std::string_view var : options_.env_diff) {
+    if (absl::StartsWith(var, "-")) {
+      ss.push_back(absl::StrCat("-u ", var.substr(1)));
+    } else {
+      env_to_set.emplace_back(var);
+    }
   }
-  for (const auto& var : options_.env_add) {
-    ss.push_back(var);
+  for (auto& var : env_to_set) {
+    ss.push_back(std::move(var));
   }
   // path.
   std::string path = path_;
diff --git a/centipede/command.h b/centipede/command.h
index d9edf8d..f127248 100644
--- a/centipede/command.h
+++ b/centipede/command.h
@@ -34,12 +34,10 @@
     // shell, so the arguments need to be shell-escaped.
     // TODO(b/381910257): Escape the arguments for passing to the shell.
     std::vector<std::string> args;
-    // Environment variables/values in the form "KEY=VALUE" to set in the
-    // subprocess executing the command. These are added to the environment
-    // variables inherited from the parent process.
-    std::vector<std::string> env_add;
-    // Environment variables to unset in the subprocess executing the command.
-    std::vector<std::string> env_remove;
+    // Environment variables to set or unset in the subprocess, relative to the
+    // environment of the parent process. The variables in the form "KEY=VALUE"
+    // are set, and the variables in the form "-KEY" are unset.
+    std::vector<std::string> env_diff;
     // Redirect stdout to this file path with an opaque suffix. If empty, use
     // parent's STDOUT.
     //
diff --git a/centipede/command_test.cc b/centipede/command_test.cc
index dc163c9..d7f2f58 100644
--- a/centipede/command_test.cc
+++ b/centipede/command_test.cc
@@ -48,8 +48,7 @@
   }
   {
     Command::Options cmd_options;
-    cmd_options.env_add = {"K1=V1", "K2=V2"};
-    cmd_options.env_remove = {"K3"};
+    cmd_options.env_diff = {"K1=V1", "K2=V2", "-K3"};
     EXPECT_EQ((Command{"x", std::move(cmd_options)}.ToString()),
               "env \\\n-u K3 \\\nK1=V1 \\\nK2=V2 \\\nx");
   }
diff --git a/centipede/control_flow_test.cc b/centipede/control_flow_test.cc
index 5327b85..3fe2282 100644
--- a/centipede/control_flow_test.cc
+++ b/centipede/control_flow_test.cc
@@ -200,7 +200,7 @@
   // Load the cf table.
   BinaryInfo binary_info;
   binary_info.InitializeFromSanCovBinary(
-      target_path, GetObjDumpPath(), GetLLVMSymbolizerPath(),
+      target_path, /*env_diff=*/{}, GetObjDumpPath(), GetLLVMSymbolizerPath(),
       GetTestTempDir(test_info_->name()).string());
   const auto &cf_table = binary_info.cf_table;
   FUZZTEST_LOG(INFO) << VV(target_path) << VV(tmp_path1) << VV(cf_table.size());
@@ -278,7 +278,8 @@
 static void SymbolizeBinary(std::string_view test_dir,
                             std::string_view target_path, bool use_trace_pc) {
   BinaryInfo binary_info;
-  binary_info.InitializeFromSanCovBinary(target_path, GetObjDumpPath(),
+  binary_info.InitializeFromSanCovBinary(target_path, /*env_diff=*/{},
+                                         GetObjDumpPath(),
                                          GetLLVMSymbolizerPath(), test_dir);
   // Load the pc table.
   const auto &pc_table = binary_info.pc_table;
diff --git a/centipede/coverage_test.cc b/centipede/coverage_test.cc
index 12cbf53..4f3a830 100644
--- a/centipede/coverage_test.cc
+++ b/centipede/coverage_test.cc
@@ -196,8 +196,8 @@
   bool uses_legacy_trace_pc_instrumentation = {};
   BinaryInfo binary_info;
   binary_info.InitializeFromSanCovBinary(
-      GetTargetPath(), GetObjDumpPath(), GetLLVMSymbolizerPath(),
-      GetTestTempDir(test_info_->name()).string());
+      GetTargetPath(), /*env_diff=*/{}, GetObjDumpPath(),
+      GetLLVMSymbolizerPath(), GetTestTempDir(test_info_->name()).string());
   const auto &pc_table = binary_info.pc_table;
   EXPECT_FALSE(uses_legacy_trace_pc_instrumentation);
   const SymbolTable &symbols = binary_info.symbols;
@@ -422,8 +422,8 @@
   // Initialize coverage data.
   BinaryInfo binary_info;
   binary_info.InitializeFromSanCovBinary(
-      GetTargetPath(), GetObjDumpPath(), GetLLVMSymbolizerPath(),
-      GetTestTempDir(test_info_->name()).string());
+      GetTargetPath(), /*env_diff=*/{}, GetObjDumpPath(),
+      GetLLVMSymbolizerPath(), GetTestTempDir(test_info_->name()).string());
 
   const PCTable &pc_table = binary_info.pc_table;
   EXPECT_FALSE(binary_info.uses_legacy_trace_pc_instrumentation);
diff --git a/fuzztest/internal/centipede_adaptor.cc b/fuzztest/internal/centipede_adaptor.cc
index c4fc989..e97afd9 100644
--- a/fuzztest/internal/centipede_adaptor.cc
+++ b/fuzztest/internal/centipede_adaptor.cc
@@ -192,6 +192,30 @@
   return env;
 }
 
+std::vector<std::string> GetEnvDiffForBinaries() {
+  // When running a test binary in a subprocess, we don't want these environment
+  // variables to be inherited and affect the execution of the tests.
+  //
+  // See list of environment variables here:
+  // https://bazel.build/reference/test-encyclopedia#initial-conditions
+  std::vector<std::string> env_diff = {
+      "-TEST_DIAGNOSTICS_OUTPUT_DIR",              //
+      "-TEST_INFRASTRUCTURE_FAILURE_FILE",         //
+      "-TEST_LOGSPLITTER_OUTPUT_FILE",             //
+      "-TEST_PREMATURE_EXIT_FILE",                 //
+      "-TEST_RANDOM_SEED",                         //
+      "-TEST_RUN_NUMBER",                          //
+      "-TEST_SHARD_INDEX",                         //
+      "-TEST_SHARD_STATUS_FILE",                   //
+      "-TEST_TOTAL_SHARDS",                        //
+      "-TEST_UNDECLARED_OUTPUTS_ANNOTATIONS_DIR",  //
+      "-TEST_UNDECLARED_OUTPUTS_DIR",              //
+      "-TEST_WARNINGS_OUTPUT_FILE",                //
+      "-GTEST_OUTPUT",                             //
+      "-XML_OUTPUT_FILE"};
+  return env_diff;
+}
+
 fuzztest::internal::Environment CreateCentipedeEnvironmentFromConfiguration(
     const Configuration& configuration, absl::string_view workdir,
     absl::string_view test_name, RunMode run_mode) {
@@ -265,6 +289,7 @@
   env.coverage_binary = (*args)[0];
   env.binary_name = std::filesystem::path{(*args)[0]}.filename();
   env.binary_hash = "DUMMY_HASH";
+  env.env_diff_for_binaries = GetEnvDiffForBinaries();
   env.exit_on_crash = !configuration.continue_after_crash ||
                       // Always fail on crash for reproducer tests.
                       configuration.crashing_input_to_reproduce.has_value();