add an experimental flag to treat pc pairs as features.

PiperOrigin-RevId: 458108404
diff --git a/centipede.cc b/centipede.cc
index 02dce9a..de4ac62 100644
--- a/centipede.cc
+++ b/centipede.cc
@@ -206,6 +206,7 @@
             << " df: " << fs_.CountFeatures(FeatureDomains::kDataFlow)
             << " cmp: " << fs_.CountFeatures(FeatureDomains::kCMP)
             << " path: " << fs_.CountFeatures(FeatureDomains::kBoundedPath)
+            << " pair: " << fs_.CountFeatures(FeatureDomains::kPCPair)
             << " corp: " << corpus_.NumActive() << "/" << corpus_.NumTotal()
             << " max/avg " << max << " " << avg << " exec/s: " << exec_speed
             << " mb: " << (MemoryUsage() >> 20);
@@ -238,6 +239,47 @@
   return success;
 }
 
+// *** Highly experimental and risky. May not scale well for large targets. ***
+//
+// The idea: an unordered pair of two features {a, b} is by itself a feature.
+// In the worst case, the number of such synthetic features is a square of
+// the number of regular features, which may not scale.
+// For now, we only treat pairs of PCs as features, which is still quadratic
+// by the number of PCs. But in moderate-sized programs this may be tolerable.
+//
+// Rationale: if two different parts of the target are exercised simultaneously,
+// this may create interesting behaviour that is hard to capture with regular
+// control flow (or other) features.
+size_t Centipede::AddPcPairFeatures(FeatureVec &fv) {
+  // Using a scratch vector to avoid allocations.
+  auto &pcs = add_pc_pair_scratch_;
+  pcs.clear();
+
+  size_t num_pcs = pc_table_.size();
+  size_t num_added_pairs = 0;
+
+  // Collect PCs from fv.
+  for (auto feature : fv) {
+    if (FeatureDomains::k8bitCounters.Contains(feature))
+      pcs.push_back(Convert8bitCounterFeatureToPcIndex(feature));
+  }
+
+  // The quadratic loop: iterate all PC pairs (!!).
+  for (size_t i = 0, n = pcs.size(); i < n; ++i) {
+    size_t pc1 = pcs[i];
+    for (size_t j = i + 1; j < n; ++j) {
+      size_t pc2 = pcs[j];
+      feature_t f = FeatureDomains::kPCPair.ConvertToMe(
+          ConvertPcPairToNumber(pc1, pc2, num_pcs));
+      // If we have seen this pair at least once, ignore it.
+      if (fs_.Frequency(f)) continue;
+      fv.push_back(f);
+      ++num_added_pairs;
+    }
+  }
+  return num_added_pairs;
+}
+
 bool Centipede::RunBatch(const std::vector<ByteArray> &input_vec,
                          BatchResult &batch_result,
                          BlobFileAppender *corpus_file,
@@ -265,6 +307,8 @@
     bool function_filter_passed = function_filter_.filter(fv);
     bool input_gained_new_coverage =
         fs_.CountUnseenAndPruneFrequentFeatures(fv);
+    if (env_.use_pcpair_features && AddPcPairFeatures(fv))
+      input_gained_new_coverage = true;
     if (unconditional_features_file) {
       CHECK_OK(unconditional_features_file->Append(
           PackFeaturesAndHash(input_vec[i], fv)));
diff --git a/centipede.h b/centipede.h
index f3d64be..7a7f9aa 100644
--- a/centipede.h
+++ b/centipede.h
@@ -121,6 +121,11 @@
   void MergeFromOtherCorpus(std::string_view merge_from_dir,
                             size_t shard_index_to_merge);
 
+  // Collects all PCs from `fv`, then adds PC-pair features to `fv`.
+  // Returns the number of added features.
+  // See more comments in centipede.cc.
+  size_t AddPcPairFeatures(FeatureVec &fv);
+
   FeatureSet fs_;
   Timer timer_;  // counts time for coverage collection rate computation
   Corpus corpus_;
@@ -145,6 +150,9 @@
   // Counts the number of crashes reported so far.
   int num_crash_reports_ = 0;
 
+  // Scratch object for AddPcPairFeatures.
+  std::vector<size_t> add_pc_pair_scratch_;
+
   // Path and command for the input_filter.
   std::string input_filter_path_;
   Command input_filter_cmd_;
diff --git a/environment.cc b/environment.cc
index eb3948f..f869c26 100644
--- a/environment.cc
+++ b/environment.cc
@@ -136,6 +136,9 @@
           "When available from instrumentation, use features derived from "
           "counting the number of occurrences of a given PC. "
           "When enabled, supersedes --use_pc_features.");
+ABSL_FLAG(bool, use_pcpair_features, false,
+          "If true, PC pairs are used as additional synthetic features. "
+          "Experimental, use with care - it may explode the corpus.");
 ABSL_FLAG(bool, generate_corpus_stats, false,
           "If true, a file workdir/corpus-stats-BINARY.json containing"
           "corpus stats will be generated periodically");
@@ -226,6 +229,7 @@
       use_cmp_features(absl::GetFlag(FLAGS_use_cmp_features)),
       use_dataflow_features(absl::GetFlag(FLAGS_use_dataflow_features)),
       use_counter_features(absl::GetFlag(FLAGS_use_counter_features)),
+      use_pcpair_features(absl::GetFlag(FLAGS_use_pcpair_features)),
       generate_corpus_stats(absl::GetFlag(FLAGS_generate_corpus_stats)),
       distill_shards(absl::GetFlag(FLAGS_distill_shards)),
       fork_server_helper_path(absl::GetFlag(FLAGS_fork_server_helper_path)),
diff --git a/environment.h b/environment.h
index 3f6ec66..0bf84f8 100644
--- a/environment.h
+++ b/environment.h
@@ -55,6 +55,7 @@
   bool use_cmp_features;
   bool use_dataflow_features;
   bool use_counter_features;
+  size_t use_pcpair_features;
   bool generate_corpus_stats;
   size_t distill_shards;
   std::string fork_server_helper_path;
diff --git a/feature.h b/feature.h
index 8fec731..88fd239 100644
--- a/feature.h
+++ b/feature.h
@@ -96,6 +96,9 @@
 // their number to 2^32.
 constexpr Domain kBoundedPath = {kCMP.end(), 1UL << 32, "path"};
 
+// Features derived from (unordered) pairs of PCs.
+constexpr Domain kPCPair = {kBoundedPath.end(), 1UL << 40, "pc-pair"};
+
 }  // namespace FeatureDomains
 
 // Converts a 8-bit coverage counter,  i.e. a pair of