Enable stack depth tracking via -fsanitize-coverage=stack-depth.

When combined with -fsanitize-coverage-stack-depth-callback-min=, the stack depth is tracked in the __sanitizer_cov_stack_depth() callbacks. But this may cause binary data deps to have missing references to the callback (because sanitizers do not come with a default definition for it). So tracking without the callback is implemented too, which checks __sancov_lowest_stack at the end of each test.

PiperOrigin-RevId: 885099833
diff --git a/centipede/runner.cc b/centipede/runner.cc
index 3cf4888..e085b54 100644
--- a/centipede/runner.cc
+++ b/centipede/runner.cc
@@ -216,20 +216,21 @@
   }
 }
 
-__attribute__((noinline)) void CheckStackLimit(uintptr_t sp) {
+__attribute__((noinline)) void CheckStackLimit(size_t stack_usage,
+                                               bool is_current_stack) {
   static std::atomic_flag stack_limit_exceeded = ATOMIC_FLAG_INIT;
   const size_t stack_limit = state->run_time_flags.stack_limit_kb.load() << 10;
   // Check for the stack limit only if sp is inside the stack region.
-  if (stack_limit > 0 && tls.stack_region_low &&
-      tls.top_frame_sp - sp > stack_limit) {
+  if (stack_limit > 0 && stack_usage > stack_limit) {
     const bool test_not_running = state->input_start_time == 0;
-    if (test_not_running) return;
+    if (test_not_running && is_current_stack) return;
     if (stack_limit_exceeded.test_and_set()) return;
     fprintf(stderr,
-            "========= Stack limit exceeded: %" PRIuPTR
+            "========= Stack limit exceeded: %zu"
             " > %zu"
-            " (byte); aborting\n",
-            tls.top_frame_sp - sp, stack_limit);
+            " (byte) in %s; aborting\n",
+            stack_usage, stack_limit,
+            is_current_stack ? "the current stack" : "a previous stack");
     CentipedeSetFailureDescription(
         fuzztest::internal::kExecutionFailureStackLimitExceeded.data());
     std::abort();
diff --git a/centipede/runner_utils.h b/centipede/runner_utils.h
index 20fa40e..71b3005 100644
--- a/centipede/runner_utils.h
+++ b/centipede/runner_utils.h
@@ -24,6 +24,10 @@
 
 #include "absl/base/nullability.h"
 
+// Use this attribute for functions that must not be instrumented even if
+// the library is built with sanitizers (asan, etc).
+#define FUZZTEST_NO_SANITIZE __attribute__((no_sanitize("all")))
+
 namespace fuzztest::internal {
 
 // If `condition` prints `error` and calls exit(1).
diff --git a/centipede/sancov_callbacks.cc b/centipede/sancov_callbacks.cc
index 7e7fd15..3e5d0c5 100644
--- a/centipede/sancov_callbacks.cc
+++ b/centipede/sancov_callbacks.cc
@@ -29,6 +29,7 @@
 #include "./centipede/pc_info.h"
 #include "./centipede/reverse_pc_table.h"
 #include "./centipede/runner_dl_info.h"
+#include "./centipede/runner_utils.h"
 #include "./centipede/sancov_state.h"
 
 namespace fuzztest::internal {
@@ -61,10 +62,6 @@
 // https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html.
 #define ENFORCE_INLINE __attribute__((always_inline)) inline
 
-// Use this attribute for functions that must not be instrumented even if
-// the runner is built with sanitizers (asan, etc).
-#define NO_SANITIZE __attribute__((no_sanitize("all")))
-
 // NOTE: Enforce inlining so that `__builtin_return_address` works.
 ENFORCE_INLINE static void TraceLoad(void *addr) {
   if (ABSL_PREDICT_FALSE(!tls.traced) ||
@@ -127,55 +124,65 @@
 //------------------------------------------------------------------------------
 
 extern "C" {
-NO_SANITIZE void __sanitizer_cov_load1(uint8_t *addr) { TraceLoad(addr); }
-NO_SANITIZE void __sanitizer_cov_load2(uint16_t *addr) { TraceLoad(addr); }
-NO_SANITIZE void __sanitizer_cov_load4(uint32_t *addr) { TraceLoad(addr); }
-NO_SANITIZE void __sanitizer_cov_load8(uint64_t *addr) { TraceLoad(addr); }
-NO_SANITIZE void __sanitizer_cov_load16(__uint128_t *addr) { TraceLoad(addr); }
+FUZZTEST_NO_SANITIZE void __sanitizer_cov_load1(uint8_t* addr) {
+  TraceLoad(addr);
+}
+FUZZTEST_NO_SANITIZE void __sanitizer_cov_load2(uint16_t* addr) {
+  TraceLoad(addr);
+}
+FUZZTEST_NO_SANITIZE void __sanitizer_cov_load4(uint32_t* addr) {
+  TraceLoad(addr);
+}
+FUZZTEST_NO_SANITIZE void __sanitizer_cov_load8(uint64_t* addr) {
+  TraceLoad(addr);
+}
+FUZZTEST_NO_SANITIZE void __sanitizer_cov_load16(__uint128_t* addr) {
+  TraceLoad(addr);
+}
 
-NO_SANITIZE
+FUZZTEST_NO_SANITIZE
 void __sanitizer_cov_trace_const_cmp1(uint8_t Arg1, uint8_t Arg2) {
   if (ABSL_PREDICT_FALSE(!tls.traced)) return;
   TraceCmp(Arg1, Arg2,
            reinterpret_cast<uintptr_t>(__builtin_return_address(0)));
 }
-NO_SANITIZE
+FUZZTEST_NO_SANITIZE
 void __sanitizer_cov_trace_const_cmp2(uint16_t Arg1, uint16_t Arg2) {
   if (ABSL_PREDICT_FALSE(!tls.traced)) return;
   TraceCmp(Arg1, Arg2,
            reinterpret_cast<uintptr_t>(__builtin_return_address(0)));
 }
-NO_SANITIZE
+FUZZTEST_NO_SANITIZE
 void __sanitizer_cov_trace_const_cmp4(uint32_t Arg1, uint32_t Arg2) {
   if (ABSL_PREDICT_FALSE(!tls.traced)) return;
   TraceCmp(Arg1, Arg2,
            reinterpret_cast<uintptr_t>(__builtin_return_address(0)));
 }
-NO_SANITIZE
+FUZZTEST_NO_SANITIZE
 void __sanitizer_cov_trace_const_cmp8(uint64_t Arg1, uint64_t Arg2) {
   if (ABSL_PREDICT_FALSE(!tls.traced)) return;
   TraceCmp(Arg1, Arg2,
            reinterpret_cast<uintptr_t>(__builtin_return_address(0)));
 }
-NO_SANITIZE
+FUZZTEST_NO_SANITIZE
 void __sanitizer_cov_trace_cmp1(uint8_t Arg1, uint8_t Arg2) {
   if (ABSL_PREDICT_FALSE(!tls.traced)) return;
   TraceCmp(Arg1, Arg2,
            reinterpret_cast<uintptr_t>(__builtin_return_address(0)));
 }
-NO_SANITIZE
+FUZZTEST_NO_SANITIZE
 void __sanitizer_cov_trace_cmp2(uint16_t Arg1, uint16_t Arg2) {
   if (ABSL_PREDICT_FALSE(!tls.traced)) return;
   TraceCmp(Arg1, Arg2,
            reinterpret_cast<uintptr_t>(__builtin_return_address(0)));
 }
-NO_SANITIZE
+FUZZTEST_NO_SANITIZE
 void __sanitizer_cov_trace_cmp4(uint32_t Arg1, uint32_t Arg2) {
   if (ABSL_PREDICT_FALSE(!tls.traced)) return;
   TraceCmp(Arg1, Arg2,
            reinterpret_cast<uintptr_t>(__builtin_return_address(0)));
 }
-NO_SANITIZE
+FUZZTEST_NO_SANITIZE
 void __sanitizer_cov_trace_cmp8(uint64_t Arg1, uint64_t Arg2) {
   if (ABSL_PREDICT_FALSE(!tls.traced)) return;
   TraceCmp(Arg1, Arg2,
@@ -188,7 +195,7 @@
 // LLVM/libFuzzer implementation).
 //
 // Source: https://clang.llvm.org/docs/SanitizerCoverage.html
-NO_SANITIZE
+FUZZTEST_NO_SANITIZE
 void __sanitizer_cov_trace_switch(uint64_t val, uint64_t* cases) {
   if (ABSL_PREDICT_FALSE(!tls.traced)) return;
   const auto num_cases = cases[0];
@@ -262,6 +269,23 @@
   sancov_state->path_feature_set.set(hash);
 }
 
+// Updates the lowest stack using the current stack pointer `sp` and checks
+// against the stack limit if needed.
+static ENFORCE_INLINE void UpdateLowestStackAndCheckLimit(uintptr_t sp) {
+  // It should be rare for the stack pointer to be valid and exceed the previous
+  // record.
+  if (ABSL_PREDICT_FALSE(sp < tls.lowest_sp && sp <= tls.top_frame_sp &&
+                         sp >= tls.stack_region_low &&
+                         tls.stack_region_low > 0)) {
+    tls.lowest_sp = sp;
+    if (fuzztest::internal::CheckStackLimit == nullptr) {
+      return;
+    }
+    fuzztest::internal::CheckStackLimit(tls.top_frame_sp - sp,
+                                        /*is_current_stack=*/true);
+  }
+}
+
 // Handles one observed PC.
 // `normalized_pc` is an integer representation of PC that is stable between
 // the executions.
@@ -278,18 +302,7 @@
 
   if (pc_guard.is_function_entry) {
     uintptr_t sp = reinterpret_cast<uintptr_t>(__builtin_frame_address(0));
-    // It should be rare for the stack depth to exceed the previous record.
-    if (__builtin_expect(
-            sp < tls.lowest_sp &&
-                // And ignore the stack pointer when it is not in the known
-                // region (e.g. for signal handling with an alternative stack).
-                (tls.stack_region_low == 0 || sp >= tls.stack_region_low),
-            0)) {
-      tls.lowest_sp = sp;
-      if (fuzztest::internal::CheckStackLimit != nullptr) {
-        fuzztest::internal::CheckStackLimit(sp);
-      }
-    }
+    UpdateLowestStackAndCheckLimit(sp);
     if (sancov_state->flags.callstack_level != 0) {
       tls.call_stack.OnFunctionEntry(pc_guard.pc_index, sp);
       sancov_state->callstack_set.set(tls.call_stack.Hash());
@@ -361,6 +374,7 @@
 // This instrumentation is redundant if other instrumentation
 // (e.g. trace-pc-guard) is available, but GCC as of 2022-04 only supports
 // this variant.
+FUZZTEST_NO_SANITIZE
 void __sanitizer_cov_trace_pc() {
   if (ABSL_PREDICT_FALSE(!tls.traced)) return;
   uintptr_t pc = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
@@ -386,7 +400,7 @@
 }
 
 // This function is called on every instrumented edge.
-NO_SANITIZE
+FUZZTEST_NO_SANITIZE
 void __sanitizer_cov_trace_pc_guard(PCGuard *absl_nonnull guard) {
   if (ABSL_PREDICT_FALSE(!tls.traced)) return;
   // This function may be called very early during the DSO initialization,
@@ -397,4 +411,12 @@
   HandleOnePc(*guard);
 }
 
+// This callback is called by the compiler on every function entry when enabled.
+// https://clang.llvm.org/docs/SanitizerCoverage.html#tracing-stack-depth
+FUZZTEST_NO_SANITIZE void __sanitizer_cov_stack_depth() {
+  if (ABSL_PREDICT_FALSE(!tls.traced)) return;
+  UpdateLowestStackAndCheckLimit(
+      reinterpret_cast<uintptr_t>(__builtin_frame_address(0)));
+}
+
 }  // extern "C"
diff --git a/centipede/sancov_interceptors.cc b/centipede/sancov_interceptors.cc
index 304d949..cf1e9ac 100644
--- a/centipede/sancov_interceptors.cc
+++ b/centipede/sancov_interceptors.cc
@@ -22,6 +22,7 @@
 
 #include "absl/base/nullability.h"
 #include "absl/base/optimization.h"
+#include "./centipede/runner_utils.h"
 #include "./centipede/sancov_state.h"
 
 using fuzztest::internal::tls;
@@ -30,7 +31,6 @@
 // before or during the sanitizer initialization. Instead, we check if the
 // current thread is marked as started by the runner as the proxy of sanitizier
 // initialization. If not, we skip the interception logic.
-#define NO_SANITIZE __attribute__((no_sanitize("all")))
 
 namespace {
 
@@ -131,8 +131,8 @@
 
 // Fallback for the case *cmp_orig is null.
 // Will be executed several times at process startup, if at all.
-static NO_SANITIZE int memcmp_fallback(const void *s1, const void *s2,
-                                       size_t n) {
+static FUZZTEST_NO_SANITIZE int memcmp_fallback(const void* s1, const void* s2,
+                                                size_t n) {
   const auto *p1 = static_cast<const uint8_t *>(s1);
   const auto *p2 = static_cast<const uint8_t *>(s2);
   for (size_t i = 0; i < n; ++i) {
@@ -143,8 +143,8 @@
 }
 
 // Fallback for case insensitive comparison.
-static NO_SANITIZE int memcasecmp_fallback(const void* s1, const void* s2,
-                                           size_t n) {
+static FUZZTEST_NO_SANITIZE int memcasecmp_fallback(const void* s1,
+                                                    const void* s2, size_t n) {
   static uint8_t to_lower[256];
   [[maybe_unused]] static bool initialize_to_lower = [&] {
     for (size_t i = 0; i < sizeof(to_lower); ++i) {
@@ -166,7 +166,8 @@
 
 // memcmp interceptor.
 // Calls the real memcmp() and possibly modifies state.cmp_feature_set.
-extern "C" NO_SANITIZE int memcmp(const void *s1, const void *s2, size_t n) {
+extern "C" FUZZTEST_NO_SANITIZE int memcmp(const void* s1, const void* s2,
+                                           size_t n) {
   const int result =
       memcmp_orig ? memcmp_orig(s1, s2, n) : memcmp_fallback(s1, s2, n);
   if (ABSL_PREDICT_FALSE(!tls.traced)) {
@@ -183,7 +184,7 @@
 
 // strcmp interceptor.
 // Calls the real strcmp() and possibly modifies state.cmp_feature_set.
-extern "C" NO_SANITIZE int strcmp(const char *s1, const char *s2) {
+extern "C" FUZZTEST_NO_SANITIZE int strcmp(const char* s1, const char* s2) {
   // Find the length of the shorter string, as this determines the actual number
   // of bytes that are compared. Note that this is needed even if we call
   // `strcmp_orig` because we're passing it to `TraceMemCmp()`.
@@ -205,7 +206,8 @@
 
 // strncmp interceptor.
 // Calls the real strncmp() and possibly modifies state.cmp_feature_set.
-extern "C" NO_SANITIZE int strncmp(const char *s1, const char *s2, size_t n) {
+extern "C" FUZZTEST_NO_SANITIZE int strncmp(const char* s1, const char* s2,
+                                            size_t n) {
   // Find the length of the shorter string, as this determines the actual number
   // of bytes that are compared. Note that this is needed even if we call
   // `strncmp_orig` because we're passing it to `TraceMemCmp()`.
@@ -228,7 +230,7 @@
 
 // strcasecmp interceptor.
 // Calls the real strcasecmp() and possibly modifies state.cmp_feature_set.
-extern "C" NO_SANITIZE int strcasecmp(const char* s1, const char* s2) {
+extern "C" FUZZTEST_NO_SANITIZE int strcasecmp(const char* s1, const char* s2) {
   // Find the length of the shorter string, as this determines the actual number
   // of bytes that are compared. Note that this is needed even if we call
   // `strcasecmp_orig` because we're passing it to `TraceMemCmp()`.
@@ -251,8 +253,8 @@
 
 // strncasecmp interceptor.
 // Calls the real strncasecmp() and possibly modifies state.cmp_feature_set.
-extern "C" NO_SANITIZE int strncasecmp(const char* s1, const char* s2,
-                                       size_t n) {
+extern "C" FUZZTEST_NO_SANITIZE int strncasecmp(const char* s1, const char* s2,
+                                                size_t n) {
   // Find the length of the shorter string, as this determines the actual number
   // of bytes that are compared. Note that this is needed even if we call
   // `strncasecmp_orig` because we're passing it to `TraceMemCmp()`.
diff --git a/centipede/sancov_state.cc b/centipede/sancov_state.cc
index 25bcd73..19570d6 100644
--- a/centipede/sancov_state.cc
+++ b/centipede/sancov_state.cc
@@ -38,6 +38,10 @@
 __attribute__((weak)) extern fuzztest::internal::feature_t
     __stop___centipede_extra_features;
 
+// May be updated by sancov with -fsanitize-coverage=stack-depth.
+__attribute__((visibility("default")))
+__attribute__((weak)) thread_local uintptr_t __sancov_lowest_stack;
+
 namespace fuzztest::internal {
 
 ExplicitLifetime<SancovState> sancov_state;
@@ -56,9 +60,8 @@
 //
 // Must not be sanitized because sanitizers may trigger this on unsanitized
 // data, causing false positives and nested failures.
-__attribute__((no_sanitize("all"))) size_t LengthOfCommonPrefix(const void* s1,
-                                                                const void* s2,
-                                                                size_t n) {
+FUZZTEST_NO_SANITIZE size_t LengthOfCommonPrefix(const void* s1, const void* s2,
+                                                 size_t n) {
   const auto *p1 = static_cast<const uint8_t *>(s1);
   const auto *p2 = static_cast<const uint8_t *>(s2);
   static constexpr size_t kMaxLen = feature_domains::kCMPScoreBitmask;
@@ -122,7 +125,8 @@
   // Always trace threads by default. Internal threads that do not want tracing
   // will set this to false later.
   tls.traced = true;
-  tls.lowest_sp = tls.top_frame_sp =
+  tls.sancov_lowest_sp = &__sancov_lowest_stack;
+  *tls.sancov_lowest_sp = tls.lowest_sp = tls.top_frame_sp =
       reinterpret_cast<uintptr_t>(__builtin_frame_address(0));
   tls.stack_region_low = GetCurrentThreadStackRegionLow();
   if (tls.stack_region_low == 0) {
@@ -142,6 +146,13 @@
 void ThreadLocalSancovState::OnThreadStop() {
   tls.traced = false;
   LockGuard lock(sancov_state->tls_list_mu);
+  const size_t sancov_lowest_sp = *tls.sancov_lowest_sp;
+  tls.sancov_lowest_sp = nullptr;
+  if (sancov_lowest_sp <= tls.top_frame_sp &&
+      sancov_lowest_sp < tls.lowest_sp &&
+      sancov_lowest_sp >= tls.stack_region_low && tls.stack_region_low > 0) {
+    tls.lowest_sp = sancov_lowest_sp;
+  }
   // Remove myself from state.tls_list. The list never
   // becomes empty because the main thread does not call OnThreadStop().
   if (&tls == sancov_state->tls_list) {
@@ -297,13 +308,17 @@
 
 void CleanUpSancovTls() {
   sancov_state->CleanUpDetachedTls();
-  if (sancov_state->flags.path_level != 0) {
-    sancov_state->ForEachTls([](ThreadLocalSancovState& tls) {
+  sancov_state->ForEachTls([](ThreadLocalSancovState& tls) {
+    if (sancov_state->flags.path_level != 0) {
       tls.path_ring_buffer.Reset(sancov_state->flags.path_level);
+    }
+    if (sancov_state->flags.callstack_level != 0) {
       tls.call_stack.Reset(sancov_state->flags.callstack_level);
-      tls.lowest_sp = tls.top_frame_sp;
-    });
-  }
+    }
+    RunnerCheck(tls.sancov_lowest_sp != nullptr,
+                "sancov_lowest_sp is null for a live thread");
+    *tls.sancov_lowest_sp = tls.lowest_sp = tls.top_frame_sp;
+  });
 }
 
 void PrepareSancov(bool full_clear) {
@@ -439,10 +454,23 @@
 
   // Iterate all threads and get features from TLS data.
   sancov_state->ForEachTls([&feature_handler](ThreadLocalSancovState& tls) {
+    RunnerCheck(tls.top_frame_sp >= tls.lowest_sp,
+                "bad values of tls.top_frame_sp and tls.lowest_sp");
+    uintptr_t lowest_sp = tls.lowest_sp;
+    if (tls.sancov_lowest_sp != nullptr) {
+      const uintptr_t sancov_lowest_sp = *tls.sancov_lowest_sp;
+      if (sancov_lowest_sp <= tls.top_frame_sp &&
+          sancov_lowest_sp <= lowest_sp &&
+          sancov_lowest_sp >= tls.stack_region_low &&
+          tls.stack_region_low > 0) {
+        lowest_sp = sancov_lowest_sp;
+      }
+    }
+    const size_t sp_diff = tls.top_frame_sp - lowest_sp;
+    if (CheckStackLimit != nullptr) {
+      CheckStackLimit(sp_diff, /*is_current_stack=*/false);
+    }
     if (sancov_state->flags.callstack_level != 0) {
-      RunnerCheck(tls.top_frame_sp >= tls.lowest_sp,
-                  "bad values of tls.top_frame_sp and tls.lowest_sp");
-      size_t sp_diff = tls.top_frame_sp - tls.lowest_sp;
       feature_handler(feature_domains::kCallStack.ConvertToMe(sp_diff));
     }
   });
diff --git a/centipede/sancov_state.h b/centipede/sancov_state.h
index 3afb6c4..85407e3 100644
--- a/centipede/sancov_state.h
+++ b/centipede/sancov_state.h
@@ -110,8 +110,22 @@
   uintptr_t top_frame_sp;
   // The lower bound of the stack region of this thread. 0 means unknown.
   uintptr_t stack_region_low;
-  // Lowest observed value of SP.
+
+  // `lowest_sp` and `*sancov_lowest_sp` are read and written by both
+  // the current thread and the sancov processing thread. Thus race conditions
+  // may happen. We don't use mutex/atomic because they are slow (and it is not
+  // possible on sancov_lowest_sp). Instead we let race conditions happen and
+  // tolerate them with the best effort. An SP value is valid if and only if
+  // `stack_region_low <= SP <= top_frame_sp && stack_region_low > 0`.
+  // This should reject most bad values caused by race conditions.
+  //
+  // Lowest sp observed by the this library.
   uintptr_t lowest_sp;
+  // A pointer to the lowest sp updated by sancov (if enabled). It is constant
+  // and non-null when the thread is alive, and set to null when the thread
+  // is terminated, guarded by `state.tls_list_mu`. So no race conditions on
+  // the pointer itself.
+  uintptr_t* sancov_lowest_sp;
 
   // The (imprecise) call stack is updated by the PC callback.
   CallStack<> call_stack;
@@ -296,8 +310,10 @@
 // Gets the execution metadata gathered in `PostProcessSancov`.
 const ExecutionMetadata& SanCovRuntimeGetExecutionMetadata();
 
-// Check for stack limit for the stack pointer `sp` in the current thread.
-__attribute__((weak)) void CheckStackLimit(uintptr_t sp);
+// Check for stack limit for `stack_usage`, with `is_current_stack` set if it
+// is for the current calling stack.
+__attribute__((weak)) void CheckStackLimit(size_t stack_usage,
+                                           bool is_current_stack);
 
 extern ExplicitLifetime<SancovState> sancov_state;
 extern __thread ThreadLocalSancovState tls;
diff --git a/centipede/weak_sancov_stubs.cc b/centipede/weak_sancov_stubs.cc
index c9bfa63..66f78a6 100644
--- a/centipede/weak_sancov_stubs.cc
+++ b/centipede/weak_sancov_stubs.cc
@@ -12,6 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+#include <cstdint>
+
 #define WEAK_SANCOV_DEF(return_type, name, ...)                           \
   extern "C" __attribute__((visibility("default"))) __attribute__((weak)) \
   return_type                                                             \
@@ -42,3 +44,7 @@
 WEAK_SANCOV_DEF(void, __sanitizer_cov_load4, void) {}
 WEAK_SANCOV_DEF(void, __sanitizer_cov_load8, void) {}
 WEAK_SANCOV_DEF(void, __sanitizer_cov_load16, void) {}
+
+WEAK_SANCOV_DEF(void, __sanitizer_cov_stack_depth, void) {}
+extern "C" __attribute__((visibility("default")))
+__attribute__((weak)) thread_local uintptr_t __sancov_lowest_stack;