Add Setup/Teardown option on Benchmark. (#1269)
* Add Setup/Teardown option on Benchmark.
Motivations:
- feature parity with our internal library. (which has ~718 callers)
- more flexible than cordinating setup/teardown inside the benchmark routine.
* change Setup/Teardown callback type to raw function pointers
* add test file to cmake file
* move b.Teardown() up
* add const to param of Setup/Teardown callbacks
* fix comment and add doc to user_guide
* fix typo
* fix doc, fix test and add bindings to python/benchmark.cc
* fix binding again
* remove explicit C cast - that was wrong
* change policy to reference_internal
* try removing the bindinds ...
* clean up
* add more tests with repetitions and fixtures
* more comments
* init setup/teardown callbacks to NULL
* s/nullptr/NULL
* removed unused var
* change assertion on fixture_interaction::fixture_setup
* move NULL init to .cc file
diff --git a/docs/user_guide.md b/docs/user_guide.md
index e08ed5b..d1a4e1e 100644
--- a/docs/user_guide.md
+++ b/docs/user_guide.md
@@ -18,6 +18,8 @@
[Runtime and Reporting Considerations](#runtime-and-reporting-considerations)
+[Setup/Teardown] (#setup-teardown)
+
[Passing Arguments](#passing-arguments)
[Custom Benchmark Name](#custom-benchmark-name)
@@ -238,6 +240,38 @@
As well as the per-benchmark entries, a preamble in the report will include
information about the machine on which the benchmarks are run.
+<a name="setup-teardown" />
+
+## Setup/Teardown
+
+Global setup/teardown specific to each benchmark can be done by
+passing a callback to Setup/Teardown:
+
+The setup/teardown callbacks will be invoked once for each benchmark.
+If the benchmark is multi-threaded (will run in k threads), they will be invoked exactly once before
+each run with k threads.
+If the benchmark uses different size groups of threads, the above will be true for each size group.
+
+Eg.,
+
+```c++
+static void DoSetup(const benchmark::State& state) {
+}
+
+static void DoTeardown(const benchmark::State& state) {
+}
+
+static void BM_func(benchmark::State& state) {...}
+
+BENCHMARK(BM_func)->Arg(1)->Arg(3)->Threads(16)->Threads(32)->Setup(DoSetup)->Teardown(DoTeardown);
+
+```
+
+In this example, `DoSetup` and `DoTearDown` will be invoked 4 times each,
+specifically, once for each of this family:
+ - BM_func_Arg_1_Threads_16, BM_func_Arg_1_Threads_32
+ - BM_func_Arg_3_Threads_16, BM_func_Arg_3_Threads_32
+
<a name="passing-arguments" />
## Passing Arguments
diff --git a/include/benchmark/benchmark.h b/include/benchmark/benchmark.h
index 31f6173..4fdb545 100644
--- a/include/benchmark/benchmark.h
+++ b/include/benchmark/benchmark.h
@@ -969,6 +969,23 @@
return Ranges(ranges);
}
+ // Have "setup" and/or "teardown" invoked once for every benchmark run.
+ // If the benchmark is multi-threaded (will run in k threads concurrently),
+ // the setup callback will be be invoked exactly once (not k times) before
+ // each run with k threads. Time allowing (e.g. for a short benchmark), there
+ // may be multiple such runs per benchmark, each run with its own
+ // "setup"/"teardown".
+ //
+ // If the benchmark uses different size groups of threads (e.g. via
+ // ThreadRange), the above will be true for each size group.
+ //
+ // The callback will be passed a State object, which includes the number
+ // of threads, thread-index, benchmark arguments, etc.
+ //
+ // The callback must not be NULL or self-deleting.
+ Benchmark* Setup(void (*setup)(const benchmark::State&));
+ Benchmark* Teardown(void (*teardown)(const benchmark::State&));
+
// Pass this benchmark object to *func, which can customize
// the benchmark by calling various methods like Arg, Args,
// Threads, etc.
@@ -1100,6 +1117,10 @@
std::vector<Statistics> statistics_;
std::vector<int> thread_counts_;
+ typedef void (*callback_function)(const benchmark::State&);
+ callback_function setup_;
+ callback_function teardown_;
+
Benchmark& operator=(Benchmark const&);
};
diff --git a/src/benchmark_api_internal.cc b/src/benchmark_api_internal.cc
index 89da519..4de36e3 100644
--- a/src/benchmark_api_internal.cc
+++ b/src/benchmark_api_internal.cc
@@ -78,6 +78,9 @@
if (!benchmark_.thread_counts_.empty()) {
name_.threads = StrFormat("threads:%d", threads_);
}
+
+ setup_ = benchmark_.setup_;
+ teardown_ = benchmark_.teardown_;
}
State BenchmarkInstance::Run(
@@ -90,5 +93,20 @@
return st;
}
+void BenchmarkInstance::Setup() const {
+ if (setup_) {
+ State st(/*iters*/ 1, args_, /*thread_id*/ 0, threads_, nullptr, nullptr,
+ nullptr);
+ setup_(st);
+ }
+}
+
+void BenchmarkInstance::Teardown() const {
+ if (teardown_) {
+ State st(/*iters*/ 1, args_, /*thread_id*/ 0, threads_, nullptr, nullptr,
+ nullptr);
+ teardown_(st);
+ }
+}
} // namespace internal
} // namespace benchmark
diff --git a/src/benchmark_api_internal.h b/src/benchmark_api_internal.h
index 592dd46..94c2b29 100644
--- a/src/benchmark_api_internal.h
+++ b/src/benchmark_api_internal.h
@@ -38,6 +38,8 @@
double min_time() const { return min_time_; }
IterationCount iterations() const { return iterations_; }
int threads() const { return threads_; }
+ void Setup() const;
+ void Teardown() const;
State Run(IterationCount iters, int thread_id, internal::ThreadTimer* timer,
internal::ThreadManager* manager,
@@ -62,6 +64,10 @@
double min_time_;
IterationCount iterations_;
int threads_; // Number of concurrent threads to us
+
+ typedef void (*callback_function)(const benchmark::State&);
+ callback_function setup_ = nullptr;
+ callback_function teardown_ = nullptr;
};
bool FindBenchmarksInternal(const std::string& re,
diff --git a/src/benchmark_register.cc b/src/benchmark_register.cc
index 0b148e2..b3f85dc 100644
--- a/src/benchmark_register.cc
+++ b/src/benchmark_register.cc
@@ -211,7 +211,9 @@
use_real_time_(false),
use_manual_time_(false),
complexity_(oNone),
- complexity_lambda_(nullptr) {
+ complexity_lambda_(nullptr),
+ setup_(nullptr),
+ teardown_(nullptr) {
ComputeStatistics("mean", StatisticsMean);
ComputeStatistics("median", StatisticsMedian);
ComputeStatistics("stddev", StatisticsStdDev);
@@ -321,6 +323,18 @@
return this;
}
+Benchmark* Benchmark::Setup(void (*setup)(const benchmark::State&)) {
+ BM_CHECK(setup != nullptr);
+ setup_ = setup;
+ return this;
+}
+
+Benchmark* Benchmark::Teardown(void (*teardown)(const benchmark::State&)) {
+ BM_CHECK(teardown != nullptr);
+ teardown_ = teardown;
+ return this;
+}
+
Benchmark* Benchmark::RangeMultiplier(int multiplier) {
BM_CHECK(multiplier > 1);
range_multiplier_ = multiplier;
diff --git a/src/benchmark_runner.cc b/src/benchmark_runner.cc
index fd23d46..eac807b 100644
--- a/src/benchmark_runner.cc
+++ b/src/benchmark_runner.cc
@@ -279,7 +279,9 @@
// is *only* calculated for the *first* repetition, and other repetitions
// simply use that precomputed iteration count.
for (;;) {
+ b.Setup();
i = DoNIterations();
+ b.Teardown();
// Do we consider the results to be significant?
// If we are doing repetitions, and the first repetition was already done,
@@ -316,10 +318,12 @@
memory_manager->Start();
std::unique_ptr<internal::ThreadManager> manager;
manager.reset(new internal::ThreadManager(1));
+ b.Setup();
RunInThread(&b, memory_iterations, 0, manager.get(),
perf_counters_measurement_ptr);
manager->WaitForAllThreads();
manager.reset();
+ b.Teardown();
BENCHMARK_DISABLE_DEPRECATED_WARNING
memory_manager->Stop(memory_result);
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 0f7b265..162af53 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -59,6 +59,9 @@
compile_benchmark_test(spec_arg_test)
add_test(NAME spec_arg COMMAND spec_arg_test --benchmark_filter=BM_NotChosen)
+compile_benchmark_test(benchmark_setup_teardown_test)
+add_test(NAME benchmark_setup_teardown COMMAND benchmark_setup_teardown_test)
+
compile_benchmark_test(filter_test)
macro(add_filter_test name filter expect)
add_test(NAME ${name} COMMAND filter_test --benchmark_min_time=0.01 --benchmark_filter=${filter} ${expect})
diff --git a/test/benchmark_setup_teardown_test.cc b/test/benchmark_setup_teardown_test.cc
new file mode 100644
index 0000000..f18c570
--- /dev/null
+++ b/test/benchmark_setup_teardown_test.cc
@@ -0,0 +1,157 @@
+#include <atomic>
+#include <cassert>
+#include <cstdlib>
+#include <cstring>
+#include <iostream>
+#include <limits>
+#include <string>
+
+#include "benchmark/benchmark.h"
+
+// Test that Setup() and Teardown() are called exactly once
+// for each benchmark run (single-threaded).
+namespace single {
+static int setup_call = 0;
+static int teardown_call = 0;
+} // namespace single
+static void DoSetup1(const benchmark::State& state) {
+ ++single::setup_call;
+
+ // Setup/Teardown should never be called with any thread_idx != 0.
+ assert(state.thread_index() == 0);
+}
+
+static void DoTeardown1(const benchmark::State& state) {
+ ++single::teardown_call;
+ assert(state.thread_index() == 0);
+}
+
+static void BM_with_setup(benchmark::State& state) {
+ for (auto s : state) {
+ }
+}
+BENCHMARK(BM_with_setup)
+ ->Arg(1)
+ ->Arg(3)
+ ->Arg(5)
+ ->Arg(7)
+ ->Iterations(100)
+ ->Setup(DoSetup1)
+ ->Teardown(DoTeardown1);
+
+// Test that Setup() and Teardown() are called once for each group of threads.
+namespace concurrent {
+static std::atomic<int> setup_call(0);
+static std::atomic<int> teardown_call(0);
+static std::atomic<int> func_call(0);
+} // namespace concurrent
+
+static void DoSetup2(const benchmark::State& state) {
+ concurrent::setup_call.fetch_add(1, std::memory_order_acquire);
+ assert(state.thread_index() == 0);
+}
+
+static void DoTeardown2(const benchmark::State& state) {
+ concurrent::teardown_call.fetch_add(1, std::memory_order_acquire);
+ assert(state.thread_index() == 0);
+}
+
+static void BM_concurrent(benchmark::State& state) {
+ for (auto s : state) {
+ }
+ concurrent::func_call.fetch_add(1, std::memory_order_acquire);
+}
+
+BENCHMARK(BM_concurrent)
+ ->Setup(DoSetup2)
+ ->Teardown(DoTeardown2)
+ ->Iterations(100)
+ ->Threads(5)
+ ->Threads(10)
+ ->Threads(15);
+
+// Testing interaction with Fixture::Setup/Teardown
+namespace fixture_interaction {
+int setup = 0;
+int fixture_setup = 0;
+} // namespace fixture_interaction
+
+#define FIXTURE_BECHMARK_NAME MyFixture
+
+class FIXTURE_BECHMARK_NAME : public ::benchmark::Fixture {
+ public:
+ void SetUp(const ::benchmark::State& state) BENCHMARK_OVERRIDE {
+ fixture_interaction::fixture_setup++;
+ }
+
+ ~FIXTURE_BECHMARK_NAME() {}
+};
+
+BENCHMARK_F(FIXTURE_BECHMARK_NAME, BM_WithFixture)(benchmark::State& st) {
+ for (auto _ : st) {
+ }
+}
+
+static void DoSetupWithFixture(const benchmark::State& state) {
+ fixture_interaction::setup++;
+}
+
+BENCHMARK_REGISTER_F(FIXTURE_BECHMARK_NAME, BM_WithFixture)
+ ->Arg(1)
+ ->Arg(3)
+ ->Arg(5)
+ ->Arg(7)
+ ->Setup(DoSetupWithFixture)
+ ->Repetitions(1)
+ ->Iterations(100);
+
+// Testing repetitions.
+namespace repetitions {
+int setup = 0;
+}
+
+static void DoSetupWithRepetitions(const benchmark::State& state) {
+ repetitions::setup++;
+}
+static void BM_WithRep(benchmark::State& state) {
+ for (auto _ : state) {
+ }
+}
+
+BENCHMARK(BM_WithRep)
+ ->Arg(1)
+ ->Arg(3)
+ ->Arg(5)
+ ->Arg(7)
+ ->Setup(DoSetupWithRepetitions)
+ ->Iterations(100)
+ ->Repetitions(4);
+
+int main(int argc, char** argv) {
+ benchmark::Initialize(&argc, argv);
+
+ size_t ret = benchmark::RunSpecifiedBenchmarks(".");
+ assert(ret > 0);
+
+ // Setup/Teardown is called once for each arg group (1,3,5,7).
+ assert(single::setup_call == 4);
+ assert(single::teardown_call == 4);
+
+ // 3 group of threads calling this function (3,5,10).
+ assert(concurrent::setup_call.load(std::memory_order_relaxed) == 3);
+ assert(concurrent::teardown_call.load(std::memory_order_relaxed) == 3);
+ assert((5 + 10 + 15) ==
+ concurrent::func_call.load(std::memory_order_relaxed));
+
+ // Setup is called 4 times, once for each arg group (1,3,5,7)
+ assert(fixture_interaction::setup == 4);
+ // Fixture::Setup is called everytime the bm routine is run.
+ // The exact number is indeterministic, so we just assert that
+ // it's more than setup.
+ assert(fixture_interaction::fixture_setup > fixture_interaction::setup);
+
+ // Setup is call once for each repetition * num_arg = 4 * 4 = 16.
+ assert(repetitions::setup == 16);
+
+ return 0;
+}