Forcibly terminate the runner via if the previous watchdog abort attempt has timed out.

The watchdog will send `SIGABRT` initially, and the registered signal handler will produce a crash report. However, this may occasionally deadlock if the crash happens within tcmalloc. To prevent this, the watchdog thread will forcibly terminate it after a timeout via `std::abort()` without a crash report.

PiperOrigin-RevId: 735739776
diff --git a/centipede/BUILD b/centipede/BUILD
index 15356cd..6721830 100644
--- a/centipede/BUILD
+++ b/centipede/BUILD
@@ -1829,6 +1829,7 @@
         "@com_google_fuzztest//centipede/testing:expensive_startup_fuzz_target",
         "@com_google_fuzztest//centipede/testing:fuzz_target_with_config",
         "@com_google_fuzztest//centipede/testing:fuzz_target_with_custom_mutator",
+        "@com_google_fuzztest//centipede/testing:hanging_fuzz_target",
         "@com_google_fuzztest//centipede/testing:seeded_fuzz_target",
         "@com_google_fuzztest//centipede/testing:test_fuzz_target",
         "@com_google_fuzztest//centipede/testing:test_input_filter",
@@ -1848,6 +1849,7 @@
         "@com_google_absl//absl/log",
         "@com_google_absl//absl/log:check",
         "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/time",
         "@com_google_fuzztest//common:defs",
         "@com_google_fuzztest//common:hash",
         "@com_google_fuzztest//common:logging",
diff --git a/centipede/centipede_callbacks.cc b/centipede/centipede_callbacks.cc
index 4b6ba0a..23269e8 100644
--- a/centipede/centipede_callbacks.cc
+++ b/centipede/centipede_callbacks.cc
@@ -16,6 +16,7 @@
 
 #include <algorithm>
 #include <cstddef>
+#include <cstdint>
 #include <cstdlib>
 #include <filesystem>  // NOLINT
 #include <string>
@@ -110,10 +111,21 @@
 
 std::string CentipedeCallbacks::ConstructRunnerFlags(
     std::string_view extra_flags, bool disable_coverage) {
+  int64_t force_abort_timeout = 0;
+  if (env_.force_abort_timeout == absl::InfiniteDuration() ||
+      env_.force_abort_timeout <= absl::ZeroDuration()) {
+    LOG(INFO) << "Centipede's force abort feature is disabled because force "
+                 "abort timeout is set to "
+              << env_.force_abort_timeout << ".";
+    force_abort_timeout = 0;
+  } else {
+    force_abort_timeout = absl::ToInt64Seconds(env_.force_abort_timeout);
+  }
   std::vector<std::string> flags = {
       "CENTIPEDE_RUNNER_FLAGS=",
       absl::StrCat("timeout_per_input=", env_.timeout_per_input),
       absl::StrCat("timeout_per_batch=", env_.timeout_per_batch),
+      absl::StrCat("force_abort_timeout=", force_abort_timeout),
       absl::StrCat("address_space_limit_mb=", env_.address_space_limit_mb),
       absl::StrCat("rss_limit_mb=", env_.rss_limit_mb),
       absl::StrCat("stack_limit_kb=", env_.stack_limit_kb),
diff --git a/centipede/centipede_test.cc b/centipede/centipede_test.cc
index fa0ff71..a39bd52 100644
--- a/centipede/centipede_test.cc
+++ b/centipede/centipede_test.cc
@@ -34,6 +34,7 @@
 #include "absl/log/check.h"
 #include "absl/log/log.h"
 #include "absl/strings/str_cat.h"
+#include "absl/time/time.h"
 #include "./centipede/centipede_callbacks.h"
 #include "./centipede/centipede_default_callbacks.h"
 #include "./centipede/centipede_interface.h"
@@ -1095,5 +1096,21 @@
   EXPECT_NE(inputs, mutants);
 }
 
+TEST_F(CentipedeWithTemporaryLocalDir, HangingFuzzTargetExitsAfterTimeout) {
+  Environment env;
+  env.binary =
+      GetDataDependencyFilepath("centipede/testing/hanging_fuzz_target");
+  BatchResult batch_result;
+  const std::vector<ByteArray> inputs = {{0}};
+  CentipedeDefaultCallbacks callbacks(env);
+
+  env.force_abort_timeout = absl::Seconds(1);
+  env.timeout_per_batch = 1;
+  env.fork_server = false;
+
+  // Test that the process does not get stuck and exits promptly.
+  EXPECT_FALSE(callbacks.Execute(env.binary, {{0}}, batch_result));
+}
+
 }  // namespace
 }  // namespace centipede
diff --git a/centipede/environment.h b/centipede/environment.h
index aadc666..22d27b8 100644
--- a/centipede/environment.h
+++ b/centipede/environment.h
@@ -67,6 +67,7 @@
   size_t stack_limit_kb = 0;
   size_t timeout_per_input = 60;
   size_t timeout_per_batch = 0;
+  absl::Duration force_abort_timeout = absl::Minutes(15);
   absl::Time stop_at = absl::InfiniteFuture();
   bool fork_server = true;
   bool full_sync = false;
diff --git a/centipede/environment_flags.cc b/centipede/environment_flags.cc
index f5b5ed2..325bac2 100644
--- a/centipede/environment_flags.cc
+++ b/centipede/environment_flags.cc
@@ -24,6 +24,7 @@
 #include "absl/log/check.h"
 #include "absl/log/log.h"
 #include "absl/strings/match.h"
+#include "absl/strings/str_cat.h"
 #include "absl/strings/str_split.h"
 #include "absl/time/clock.h"
 #include "absl/time/time.h"
@@ -157,6 +158,14 @@
           "finish within --timeout_per_batch seconds. The default is computed "
           "as a function of --timeout_per_input * --batch_size. Support may "
           "vary depending on the runner.");
+ABSL_FLAG(
+    absl::Duration, force_abort_timeout,
+    Environment::Default().force_abort_timeout,
+    absl::StrCat("The timeout to forcefully exit the test binary if it is "
+                 "still running after a SIGABRT. This is useful to prevent "
+                 "hangs (e.g., during stacktrace dumps). The default value is ",
+                 Environment::Default().force_abort_timeout,
+                 "; use 'inf' to disable."));
 ABSL_FLAG(absl::Time, stop_at, Environment::Default().stop_at,
           "Stop fuzzing in all shards (--total_shards) at approximately this "
           "time in ISO-8601/RFC-3339 format, e.g. 2023-04-06T23:35:02Z. "
@@ -462,6 +471,7 @@
       /*stack_limit_kb=*/absl::GetFlag(FLAGS_stack_limit_kb),
       /*timeout_per_input=*/absl::GetFlag(FLAGS_timeout_per_input),
       /*timeout_per_batch=*/absl::GetFlag(FLAGS_timeout_per_batch),
+      /*force_abort_timeout=*/absl::GetFlag(FLAGS_force_abort_timeout),
       /*stop_at=*/
       GetStopAtTime(absl::GetFlag(FLAGS_stop_at),
                     absl::GetFlag(FLAGS_stop_after)),
diff --git a/centipede/runner.cc b/centipede/runner.cc
index 59ddbd9..91e76ce 100644
--- a/centipede/runner.cc
+++ b/centipede/runner.cc
@@ -211,6 +211,28 @@
   return tv.tv_sec * kUsecInSec + tv.tv_usec;
 }
 
+static void CheckForceAbortTimeout() {
+  // No timeout is set, ignore.
+  if (state.run_time_flags.force_abort_timeout == 0) return;
+  // Watchdog did not invoke abort yet, ignore.
+  if (state.force_abort_deadline == 0) return;
+  // The runner is still running unexpectedly long after we started aborting.
+  if (time(nullptr) > state.force_abort_deadline) {
+    fprintf(stderr,
+            "========= Force Abort timer exceeded; "
+            "the program is still running after %" PRIu64
+            " seconds after raising SIGABRT. Forcefully aborting the runner in "
+            "the watchdog thread.\n",
+            state.run_time_flags.force_abort_timeout);
+    // std::_Exit() is preferred over std::abort() and std::exit().
+    // std::abort() would raise a SIGABRT and trigger the same crash handler
+    // that is currently stuck; std::exit() would trigger the onexit handlers,
+    // which may or may not be problematic. To be safe, we use _exit() to bypass
+    // these failure handlers and terminate the process immediately.
+    std::_Exit(EXIT_FAILURE);
+  }
+}
+
 static void CheckWatchdogLimits() {
   const uint64_t curr_time = time(nullptr);
   struct Resource {
@@ -261,6 +283,8 @@
         pthread_mutex_lock(&state.runner_main_thread_mu);
         if (state.runner_main_thread.has_value()) {
           fprintf(stderr, "Sending SIGABRT to the runner main thread.\n");
+          state.force_abort_deadline =
+              time(nullptr) + state.run_time_flags.force_abort_timeout;
           pthread_kill(*state.runner_main_thread, SIGABRT);
           pthread_mutex_unlock(&state.runner_main_thread_mu);
           return;
@@ -281,6 +305,7 @@
   state.watchdog_thread_started = true;
   while (true) {
     sleep(1);
+    CheckForceAbortTimeout();
 
     // No calls to ResetInputTimer() yet: input execution hasn't started.
     if (state.input_start_time == 0) continue;
@@ -322,10 +347,12 @@
 void GlobalRunnerState::StartWatchdogThread() {
   fprintf(stderr,
           "Starting watchdog thread: timeout_per_input: %" PRIu64
-          " sec; timeout_per_batch: %" PRIu64 " sec; rss_limit_mb: %" PRIu64
+          " sec; timeout_per_batch: %" PRIu64
+          " sec; force_abort_timeout: %" PRIu64 " sec; rss_limit_mb: %" PRIu64
           " MB; stack_limit_kb: %" PRIu64 " KB\n",
           state.run_time_flags.timeout_per_input.load(),
           state.run_time_flags.timeout_per_batch,
+          state.run_time_flags.force_abort_timeout,
           state.run_time_flags.rss_limit_mb.load(),
           state.run_time_flags.stack_limit_kb.load());
   pthread_t watchdog_thread;
diff --git a/centipede/runner.h b/centipede/runner.h
index 3233b19..fc30e1f 100644
--- a/centipede/runner.h
+++ b/centipede/runner.h
@@ -67,6 +67,7 @@
   uint64_t use_auto_dictionary : 1;
   std::atomic<uint64_t> timeout_per_input;
   uint64_t timeout_per_batch;
+  uint64_t force_abort_timeout;
   std::atomic<uint64_t> stack_limit_kb;
   std::atomic<uint64_t> rss_limit_mb;
   uint64_t crossover_level;
@@ -172,6 +173,7 @@
       /*use_auto_dictionary=*/HasFlag(":use_auto_dictionary:"),
       /*timeout_per_input=*/HasIntFlag(":timeout_per_input=", 0),
       /*timeout_per_batch=*/HasIntFlag(":timeout_per_batch=", 0),
+      /*force_abort_timeout=*/HasIntFlag(":force_abort_timeout=", 0),
       /*stack_limit_kb=*/HasIntFlag(":stack_limit_kb=", 0),
       /*rss_limit_mb=*/HasIntFlag(":rss_limit_mb=", 0),
       /*crossover_level=*/HasIntFlag(":crossover_level=", 50),
@@ -345,6 +347,9 @@
   // Per-batch timer. Initially, zero. ResetInputTimer() sets it to the current
   // time before the first input and never resets it.
   std::atomic<time_t> batch_start_time;
+  // Initially, zero. Watchdog thread sets it based on the provided timeout when
+  // it attempts to abort the runner main thread.
+  std::atomic<time_t> force_abort_deadline;
 
   // The Watchdog thread sets this to true.
   std::atomic<bool> watchdog_thread_started;
diff --git a/centipede/testing/BUILD b/centipede/testing/BUILD
index 543fb8b..4d6f25f 100644
--- a/centipede/testing/BUILD
+++ b/centipede/testing/BUILD
@@ -293,6 +293,10 @@
     deps = ["@com_google_absl//absl/base:nullability"],
 )
 
+centipede_fuzz_target(
+    name = "hanging_fuzz_target",
+)
+
 ################################################################################
 # This fuzz target is not currently used by any automated tests and is here for
 # manual tests only.
diff --git a/centipede/testing/hanging_fuzz_target.cc b/centipede/testing/hanging_fuzz_target.cc
new file mode 100644
index 0000000..fe60757
--- /dev/null
+++ b/centipede/testing/hanging_fuzz_target.cc
@@ -0,0 +1,25 @@
+// Copyright 2025 The Centipede Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <csignal>
+#include <cstddef>
+#include <cstdint>
+
+// A fuzz target that hangs even during abort.
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+  signal(SIGABRT, SIG_IGN);  // Ignore abort signals and let the process hang.
+  while (true) {
+  }
+  return 0;
+}