Implement execution time based weight scaling. The scaling formula is the same as libFuzzer and the legacy FuzzTest. PiperOrigin-RevId: 823546218
diff --git a/centipede/BUILD b/centipede/BUILD index 9e7a9d6..17550db 100644 --- a/centipede/BUILD +++ b/centipede/BUILD
@@ -1634,6 +1634,7 @@ ":feature", ":feature_set", ":pc_info", + ":runner_result", ":util", "@com_google_fuzztest//common:defs", "@com_google_fuzztest//common:test_util",
diff --git a/centipede/centipede.cc b/centipede/centipede.cc index 6a1843f..e60e5aa 100644 --- a/centipede/centipede.cc +++ b/centipede/centipede.cc
@@ -474,6 +474,7 @@ } } } + corpus_.UpdateWeights(fs_, coverage_frontier_, env_.exec_time_weight_scaling); return batch_gained_new_coverage; }
diff --git a/centipede/centipede_flags.inc b/centipede/centipede_flags.inc index f52e2f8..572e56d 100644 --- a/centipede/centipede_flags.inc +++ b/centipede/centipede_flags.inc
@@ -193,6 +193,9 @@ "If true, use weighted distribution when choosing the corpus element " "to mutate. This flag is mostly for Centipede developers.") CENTIPEDE_FLAG( + bool, exec_time_weight_scaling, true, + "If true, scale the corpus weight by the execution time of each input.") +CENTIPEDE_FLAG( bool, use_coverage_frontier, false, "If true, use coverage frontier when choosing the corpus element to " "mutate. This flag is mostly for Centipede developers.")
diff --git a/centipede/centipede_test.cc b/centipede/centipede_test.cc index deb06af..e41b171 100644 --- a/centipede/centipede_test.cc +++ b/centipede/centipede_test.cc
@@ -620,6 +620,7 @@ Environment env; env.workdir = tmp_dir.path(); env.seed = 1; // make the runs predictable. + env.exec_time_weight_scaling = false; env.num_runs = 100; env.batch_size = 10; env.binary = GetDataDependencyFilepath("centipede/testing/test_fuzz_target");
diff --git a/centipede/corpus.cc b/centipede/corpus.cc index 9aaf927..4363b24 100644 --- a/centipede/corpus.cc +++ b/centipede/corpus.cc
@@ -77,20 +77,94 @@ return {max, total / records_.size()}; } +void Corpus::UpdateWeights(const FeatureSet& fs, + const CoverageFrontier& coverage_frontier, + bool scale_by_exec_time) { + std::vector<double> weights; + weights.resize(records_.size()); + for (size_t i = 0, n = records_.size(); i < n; ++i) { + auto& record = records_[i]; + const size_t unseen = fs.PruneFeaturesAndCountUnseen(record.features); + FUZZTEST_CHECK_EQ(unseen, 0); + weights[i] = fs.ComputeWeight(record.features); + } + if (scale_by_exec_time) { + double total_exec_time_usec = 0; + // For loaded corpus, we don't have the exec time recorded. Thus we don't + // count them when calculating the average exec time or scale their weights. + size_t exec_time_divider = 0; + for (const auto& record : records_) { + if (!(record.stats == ExecutionResult::Stats{})) { + total_exec_time_usec += record.stats.exec_time_usec; + ++exec_time_divider; + } + } + const double avg_exec_time_usec = + exec_time_divider == 0 ? 0 : total_exec_time_usec / exec_time_divider; + for (size_t i = 0; i < records_.size(); ++i) { + const auto& record = records_[i]; + if (record.stats == ExecutionResult::Stats{}) { + continue; + } + // Same as the scaling method from libFuzzer: + // https://github.com/llvm/llvm-project/blob/10bec2cd9dab796d5685fa8aadf47b912e3558fe/compiler-rt/lib/fuzzer/FuzzerCorpus.h#L101 + if (record.stats.exec_time_usec > avg_exec_time_usec * 10) { + weights[i] *= 0.1; + } else if (record.stats.exec_time_usec > avg_exec_time_usec * 4) { + weights[i] *= 0.25; + } else if (record.stats.exec_time_usec > avg_exec_time_usec * 2) { + weights[i] *= 0.5; + } else if (record.stats.exec_time_usec * 3 > avg_exec_time_usec * 4) { + weights[i] *= 0.75; + } else if (record.stats.exec_time_usec * 4 < avg_exec_time_usec) { + weights[i] *= 3; + } else if (record.stats.exec_time_usec * 3 < avg_exec_time_usec) { + weights[i] *= 2; + } else if (record.stats.exec_time_usec * 2 < avg_exec_time_usec) { + weights[i] *= 1.5; + } + } + } + // Normalize weights into integers in [0, 2^16]. + double highest_weight = 0; + double lowest_weight = 0; + double weight_sum = 0; + for (size_t i = 0; i < records_.size(); ++i) { + if (i == 0 || weights[i] > highest_weight) { + highest_weight = weights[i]; + } + if (i == 0 || weights[i] < lowest_weight) { + lowest_weight = weights[i]; + } + weight_sum += weights[i]; + } + FUZZTEST_VLOG(1) << "Recomputed weight with average: " + << weight_sum / records_.size() + << " highest: " << highest_weight + << " lowest: " << lowest_weight; + FUZZTEST_CHECK(lowest_weight >= 0) << "Must not have negative corpus weight!"; + for (size_t i = 0; i < records_.size(); ++i) { + // If all weights are zeros, fall back to prioritize recent corpus. + const double normalized_weight = highest_weight > 0 + ? (weights[i] / highest_weight) + : ((i + 1.0) / records_.size()); + weighted_distribution_.ChangeWeight(i, normalized_weight * (1 << 16)); + } + weighted_distribution_.RecomputeInternalState(); +} + size_t Corpus::Prune(const FeatureSet &fs, const CoverageFrontier &coverage_frontier, size_t max_corpus_size, Rng &rng) { // TODO(kcc): use coverage_frontier. FUZZTEST_CHECK(max_corpus_size); if (records_.size() < 2UL) return 0; - // Recompute the weights. + size_t num_zero_weights = 0; - for (size_t i = 0, n = records_.size(); i < n; ++i) { - fs.PruneFeaturesAndCountUnseen(records_[i].features); - auto new_weight = - ComputeWeight(records_[i].features, fs, coverage_frontier); - weighted_distribution_.ChangeWeight(i, new_weight); - if (new_weight == 0) ++num_zero_weights; + for (size_t i = 0; i < records_.size(); ++i) { + if (weighted_distribution_.weights()[i] == 0) { + ++num_zero_weights; + } } // Remove zero weights and the corresponding corpus record.
diff --git a/centipede/corpus.h b/centipede/corpus.h index 89bcd6d..0716466 100644 --- a/centipede/corpus.h +++ b/centipede/corpus.h
@@ -44,6 +44,8 @@ // Removes the last weight and returns it. // Precondition: size() > 0. uint64_t PopBack(); + // Read-only weight accessor. + const std::vector<uint64_t>& weights() const { return weights_; } // Changes the existing idx-th weight to new_weight. void ChangeWeight(size_t idx, uint64_t new_weight); // Returns a random number in [0,size()), using a random number `random`. @@ -118,6 +120,12 @@ // Returns the number of removed elements. size_t Prune(const FeatureSet &fs, const CoverageFrontier &coverage_frontier, size_t max_corpus_size, Rng &rng); + // Updates the corpus weights according to `fs` and `coverage_frontier`. If + // `scale_by_exec_time` is set, scales the weights by the corpus execution + // time relative to the average. + void UpdateWeights(const FeatureSet& fs, + const CoverageFrontier& coverage_frontier, + bool scale_by_exec_time); // Accessors.
diff --git a/centipede/corpus_test.cc b/centipede/corpus_test.cc index 220239d..a75ddb0 100644 --- a/centipede/corpus_test.cc +++ b/centipede/corpus_test.cc
@@ -28,6 +28,7 @@ #include "./centipede/feature.h" #include "./centipede/feature_set.h" #include "./centipede/pc_info.h" +#include "./centipede/runner_result.h" #include "./centipede/util.h" #include "./common/defs.h" #include "./common/test_util.h" @@ -113,6 +114,7 @@ Add({{2}, {30, 40}}); Add({{3}, {40, 50}}); Add({{4}, {10, 20}}); + corpus.UpdateWeights(fs, coverage_frontier, /*scale_by_exec_time=*/false); // Prune. Features 20 and 40 are frequent => input {0} will be removed. EXPECT_EQ(corpus.NumActive(), 5); @@ -122,6 +124,8 @@ VerifyActiveInputs({{1}, {2}, {3}, {4}}); Add({{5}, {30, 60}}); + corpus.UpdateWeights(fs, coverage_frontier, /*scale_by_exec_time=*/false); + EXPECT_EQ(corpus.NumTotal(), 6); // Prune. Feature 30 is now frequent => inputs {1} and {2} will be removed. EXPECT_EQ(corpus.NumActive(), 5); @@ -141,6 +145,55 @@ EXPECT_EQ(corpus.NumTotal(), 6); } +TEST(Corpus, ScalesWeightsWithExecTime) { + PCTable pc_table(100); + CFTable cf_table(100); + BinaryInfo bin_info{pc_table, {}, cf_table, {}, {}, {}}; + CoverageFrontier coverage_frontier(bin_info); + FeatureSet fs(2, {}); + Corpus corpus; + + auto Add = [&](const CorpusRecord& record, uint64_t exec_time_usec) { + fs.MergeFeatures(record.features); + ExecutionResult::Stats stats = {}; + stats.exec_time_usec = exec_time_usec; + corpus.Add(record.data, record.features, /*metadata=*/{}, stats, fs, + coverage_frontier); + }; + + Add({/*data=*/{0}, /*features=*/{10}}, /*exec_time_usec=*/1); + Add({/*data=*/{1}, /*features=*/{20}}, /*exec_time_usec=*/5); + Add({/*data=*/{2}, /*features=*/{30}}, /*exec_time_usec=*/9); + + constexpr int kNumIter = 10000; + std::vector<uint64_t> freq; + + Rng rng; + auto ComputeFreq = [&]() { + freq.clear(); + freq.resize(corpus.NumActive()); + for (int i = 0; i < kNumIter; i++) { + const auto& record = corpus.WeightedRandom(rng); + const auto id = record.data[0]; + ASSERT_LT(id, freq.size()); + freq[id]++; + } + }; + + // The weights should be equal without exec time scaling. + corpus.UpdateWeights(fs, coverage_frontier, /*scale_by_exec_time=*/false); + ComputeFreq(); + EXPECT_NEAR(freq[0], kNumIter / 3, 100); + EXPECT_NEAR(freq[1], kNumIter / 3, 100); + EXPECT_NEAR(freq[2], kNumIter / 3, 100); + + // The weights should favor {0} over {1} over {2} with exec time scaling. + corpus.UpdateWeights(fs, coverage_frontier, /*scale_by_exec_time=*/true); + ComputeFreq(); + EXPECT_GT(freq[0], freq[1] + 100); + EXPECT_GT(freq[1], freq[2] + 100); +} + // Regression test for a crash in Corpus::Prune(). TEST(Corpus, PruneRegressionTest1) { PCTable pc_table(100);