Forcibly terminate the runner via if the previous watchdog abort attempt has timed out. The watchdog will send `SIGABRT` initially, and the registered signal handler will produce a crash report. However, this may occasionally deadlock if the crash happens within tcmalloc. To prevent this, the watchdog thread will forcibly terminate it after a timeout via `std::abort()` without a crash report. PiperOrigin-RevId: 735739776
diff --git a/centipede/BUILD b/centipede/BUILD index 15356cd..6721830 100644 --- a/centipede/BUILD +++ b/centipede/BUILD
@@ -1829,6 +1829,7 @@ "@com_google_fuzztest//centipede/testing:expensive_startup_fuzz_target", "@com_google_fuzztest//centipede/testing:fuzz_target_with_config", "@com_google_fuzztest//centipede/testing:fuzz_target_with_custom_mutator", + "@com_google_fuzztest//centipede/testing:hanging_fuzz_target", "@com_google_fuzztest//centipede/testing:seeded_fuzz_target", "@com_google_fuzztest//centipede/testing:test_fuzz_target", "@com_google_fuzztest//centipede/testing:test_input_filter", @@ -1848,6 +1849,7 @@ "@com_google_absl//absl/log", "@com_google_absl//absl/log:check", "@com_google_absl//absl/strings", + "@com_google_absl//absl/time", "@com_google_fuzztest//common:defs", "@com_google_fuzztest//common:hash", "@com_google_fuzztest//common:logging",
diff --git a/centipede/centipede_callbacks.cc b/centipede/centipede_callbacks.cc index 4b6ba0a..23269e8 100644 --- a/centipede/centipede_callbacks.cc +++ b/centipede/centipede_callbacks.cc
@@ -16,6 +16,7 @@ #include <algorithm> #include <cstddef> +#include <cstdint> #include <cstdlib> #include <filesystem> // NOLINT #include <string> @@ -110,10 +111,21 @@ std::string CentipedeCallbacks::ConstructRunnerFlags( std::string_view extra_flags, bool disable_coverage) { + int64_t force_abort_timeout = 0; + if (env_.force_abort_timeout == absl::InfiniteDuration() || + env_.force_abort_timeout <= absl::ZeroDuration()) { + LOG(INFO) << "Centipede's force abort feature is disabled because force " + "abort timeout is set to " + << env_.force_abort_timeout << "."; + force_abort_timeout = 0; + } else { + force_abort_timeout = absl::ToInt64Seconds(env_.force_abort_timeout); + } std::vector<std::string> flags = { "CENTIPEDE_RUNNER_FLAGS=", absl::StrCat("timeout_per_input=", env_.timeout_per_input), absl::StrCat("timeout_per_batch=", env_.timeout_per_batch), + absl::StrCat("force_abort_timeout=", force_abort_timeout), absl::StrCat("address_space_limit_mb=", env_.address_space_limit_mb), absl::StrCat("rss_limit_mb=", env_.rss_limit_mb), absl::StrCat("stack_limit_kb=", env_.stack_limit_kb),
diff --git a/centipede/centipede_test.cc b/centipede/centipede_test.cc index fa0ff71..a39bd52 100644 --- a/centipede/centipede_test.cc +++ b/centipede/centipede_test.cc
@@ -34,6 +34,7 @@ #include "absl/log/check.h" #include "absl/log/log.h" #include "absl/strings/str_cat.h" +#include "absl/time/time.h" #include "./centipede/centipede_callbacks.h" #include "./centipede/centipede_default_callbacks.h" #include "./centipede/centipede_interface.h" @@ -1095,5 +1096,21 @@ EXPECT_NE(inputs, mutants); } +TEST_F(CentipedeWithTemporaryLocalDir, HangingFuzzTargetExitsAfterTimeout) { + Environment env; + env.binary = + GetDataDependencyFilepath("centipede/testing/hanging_fuzz_target"); + BatchResult batch_result; + const std::vector<ByteArray> inputs = {{0}}; + CentipedeDefaultCallbacks callbacks(env); + + env.force_abort_timeout = absl::Seconds(1); + env.timeout_per_batch = 1; + env.fork_server = false; + + // Test that the process does not get stuck and exits promptly. + EXPECT_FALSE(callbacks.Execute(env.binary, {{0}}, batch_result)); +} + } // namespace } // namespace centipede
diff --git a/centipede/environment.h b/centipede/environment.h index aadc666..22d27b8 100644 --- a/centipede/environment.h +++ b/centipede/environment.h
@@ -67,6 +67,7 @@ size_t stack_limit_kb = 0; size_t timeout_per_input = 60; size_t timeout_per_batch = 0; + absl::Duration force_abort_timeout = absl::Minutes(15); absl::Time stop_at = absl::InfiniteFuture(); bool fork_server = true; bool full_sync = false;
diff --git a/centipede/environment_flags.cc b/centipede/environment_flags.cc index f5b5ed2..325bac2 100644 --- a/centipede/environment_flags.cc +++ b/centipede/environment_flags.cc
@@ -24,6 +24,7 @@ #include "absl/log/check.h" #include "absl/log/log.h" #include "absl/strings/match.h" +#include "absl/strings/str_cat.h" #include "absl/strings/str_split.h" #include "absl/time/clock.h" #include "absl/time/time.h" @@ -157,6 +158,14 @@ "finish within --timeout_per_batch seconds. The default is computed " "as a function of --timeout_per_input * --batch_size. Support may " "vary depending on the runner."); +ABSL_FLAG( + absl::Duration, force_abort_timeout, + Environment::Default().force_abort_timeout, + absl::StrCat("The timeout to forcefully exit the test binary if it is " + "still running after a SIGABRT. This is useful to prevent " + "hangs (e.g., during stacktrace dumps). The default value is ", + Environment::Default().force_abort_timeout, + "; use 'inf' to disable.")); ABSL_FLAG(absl::Time, stop_at, Environment::Default().stop_at, "Stop fuzzing in all shards (--total_shards) at approximately this " "time in ISO-8601/RFC-3339 format, e.g. 2023-04-06T23:35:02Z. " @@ -462,6 +471,7 @@ /*stack_limit_kb=*/absl::GetFlag(FLAGS_stack_limit_kb), /*timeout_per_input=*/absl::GetFlag(FLAGS_timeout_per_input), /*timeout_per_batch=*/absl::GetFlag(FLAGS_timeout_per_batch), + /*force_abort_timeout=*/absl::GetFlag(FLAGS_force_abort_timeout), /*stop_at=*/ GetStopAtTime(absl::GetFlag(FLAGS_stop_at), absl::GetFlag(FLAGS_stop_after)),
diff --git a/centipede/runner.cc b/centipede/runner.cc index 59ddbd9..91e76ce 100644 --- a/centipede/runner.cc +++ b/centipede/runner.cc
@@ -211,6 +211,28 @@ return tv.tv_sec * kUsecInSec + tv.tv_usec; } +static void CheckForceAbortTimeout() { + // No timeout is set, ignore. + if (state.run_time_flags.force_abort_timeout == 0) return; + // Watchdog did not invoke abort yet, ignore. + if (state.force_abort_deadline == 0) return; + // The runner is still running unexpectedly long after we started aborting. + if (time(nullptr) > state.force_abort_deadline) { + fprintf(stderr, + "========= Force Abort timer exceeded; " + "the program is still running after %" PRIu64 + " seconds after raising SIGABRT. Forcefully aborting the runner in " + "the watchdog thread.\n", + state.run_time_flags.force_abort_timeout); + // std::_Exit() is preferred over std::abort() and std::exit(). + // std::abort() would raise a SIGABRT and trigger the same crash handler + // that is currently stuck; std::exit() would trigger the onexit handlers, + // which may or may not be problematic. To be safe, we use _exit() to bypass + // these failure handlers and terminate the process immediately. + std::_Exit(EXIT_FAILURE); + } +} + static void CheckWatchdogLimits() { const uint64_t curr_time = time(nullptr); struct Resource { @@ -261,6 +283,8 @@ pthread_mutex_lock(&state.runner_main_thread_mu); if (state.runner_main_thread.has_value()) { fprintf(stderr, "Sending SIGABRT to the runner main thread.\n"); + state.force_abort_deadline = + time(nullptr) + state.run_time_flags.force_abort_timeout; pthread_kill(*state.runner_main_thread, SIGABRT); pthread_mutex_unlock(&state.runner_main_thread_mu); return; @@ -281,6 +305,7 @@ state.watchdog_thread_started = true; while (true) { sleep(1); + CheckForceAbortTimeout(); // No calls to ResetInputTimer() yet: input execution hasn't started. if (state.input_start_time == 0) continue; @@ -322,10 +347,12 @@ void GlobalRunnerState::StartWatchdogThread() { fprintf(stderr, "Starting watchdog thread: timeout_per_input: %" PRIu64 - " sec; timeout_per_batch: %" PRIu64 " sec; rss_limit_mb: %" PRIu64 + " sec; timeout_per_batch: %" PRIu64 + " sec; force_abort_timeout: %" PRIu64 " sec; rss_limit_mb: %" PRIu64 " MB; stack_limit_kb: %" PRIu64 " KB\n", state.run_time_flags.timeout_per_input.load(), state.run_time_flags.timeout_per_batch, + state.run_time_flags.force_abort_timeout, state.run_time_flags.rss_limit_mb.load(), state.run_time_flags.stack_limit_kb.load()); pthread_t watchdog_thread;
diff --git a/centipede/runner.h b/centipede/runner.h index 3233b19..fc30e1f 100644 --- a/centipede/runner.h +++ b/centipede/runner.h
@@ -67,6 +67,7 @@ uint64_t use_auto_dictionary : 1; std::atomic<uint64_t> timeout_per_input; uint64_t timeout_per_batch; + uint64_t force_abort_timeout; std::atomic<uint64_t> stack_limit_kb; std::atomic<uint64_t> rss_limit_mb; uint64_t crossover_level; @@ -172,6 +173,7 @@ /*use_auto_dictionary=*/HasFlag(":use_auto_dictionary:"), /*timeout_per_input=*/HasIntFlag(":timeout_per_input=", 0), /*timeout_per_batch=*/HasIntFlag(":timeout_per_batch=", 0), + /*force_abort_timeout=*/HasIntFlag(":force_abort_timeout=", 0), /*stack_limit_kb=*/HasIntFlag(":stack_limit_kb=", 0), /*rss_limit_mb=*/HasIntFlag(":rss_limit_mb=", 0), /*crossover_level=*/HasIntFlag(":crossover_level=", 50), @@ -345,6 +347,9 @@ // Per-batch timer. Initially, zero. ResetInputTimer() sets it to the current // time before the first input and never resets it. std::atomic<time_t> batch_start_time; + // Initially, zero. Watchdog thread sets it based on the provided timeout when + // it attempts to abort the runner main thread. + std::atomic<time_t> force_abort_deadline; // The Watchdog thread sets this to true. std::atomic<bool> watchdog_thread_started;
diff --git a/centipede/testing/BUILD b/centipede/testing/BUILD index 543fb8b..4d6f25f 100644 --- a/centipede/testing/BUILD +++ b/centipede/testing/BUILD
@@ -293,6 +293,10 @@ deps = ["@com_google_absl//absl/base:nullability"], ) +centipede_fuzz_target( + name = "hanging_fuzz_target", +) + ################################################################################ # This fuzz target is not currently used by any automated tests and is here for # manual tests only.
diff --git a/centipede/testing/hanging_fuzz_target.cc b/centipede/testing/hanging_fuzz_target.cc new file mode 100644 index 0000000..fe60757 --- /dev/null +++ b/centipede/testing/hanging_fuzz_target.cc
@@ -0,0 +1,25 @@ +// Copyright 2025 The Centipede Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <csignal> +#include <cstddef> +#include <cstdint> + +// A fuzz target that hangs even during abort. +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + signal(SIGABRT, SIG_IGN); // Ignore abort signals and let the process hang. + while (true) { + } + return 0; +}