refactor and simplify capturing backtraces from mmap hooks

The case of mmap and sbrk hooks is simple enough that we can do
simpler "skip right number of frames" approach. Instead of relying on
less portable and brittle attribute section trick.
diff --git a/CMakeLists.txt b/CMakeLists.txt
index b921f8b..51664e7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -614,12 +614,15 @@
   target_link_libraries(malloc_hook_test common gtest)
   add_test(malloc_hook_test malloc_hook_test)
 
-  if(NOT MINGW AND NOT MSVC)
-    add_executable(mmap_hook_test
-            src/tests/mmap_hook_test.cc
-            src/mmap_hook.cc)
-    target_link_libraries(mmap_hook_test common gtest)
-    add_test(mmap_hook_test mmap_hook_test)
+  if(GPERFTOOLS_BUILD_HEAP_CHECKER OR GPERFTOOLS_BUILD_HEAP_PROFILER)
+    if(NOT MINGW AND NOT MSVC)
+      add_executable(mmap_hook_test
+              src/tests/mmap_hook_test.cc
+              src/mmap_hook.cc
+              src/malloc_backtrace.cc)
+      target_link_libraries(mmap_hook_test stacktrace common gtest)
+      add_test(mmap_hook_test mmap_hook_test)
+    endif()
   endif()
 
   add_executable(malloc_extension_test src/tests/malloc_extension_test.cc)
diff --git a/Makefile.am b/Makefile.am
index 4b19d1c..35ff4e1 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -361,9 +361,10 @@
 if !MINGW
 TESTS += mmap_hook_test
 mmap_hook_test_SOURCES = src/tests/mmap_hook_test.cc \
-                         src/mmap_hook.cc
+                         src/mmap_hook.cc \
+                         src/malloc_backtrace.cc
 mmap_hook_test_CPPFLAGS = $(gtest_CPPFLAGS)
-mmap_hook_test_LDADD = libcommon.la libgtest.la
+mmap_hook_test_LDADD = libstacktrace.la libcommon.la libgtest.la
 endif !MINGW
 endif WITH_HEAP_PROFILER_OR_CHECKER
 
diff --git a/src/memory_region_map.cc b/src/memory_region_map.cc
index 57fcfa3..2728cc5 100644
--- a/src/memory_region_map.cc
+++ b/src/memory_region_map.cc
@@ -122,7 +122,6 @@
 #include "mmap_hook.h"
 
 #include <gperftools/stacktrace.h>
-#include <gperftools/malloc_hook.h> // For MallocHook::GetCallerStackTrace
 
 using std::max;
 
@@ -175,7 +174,7 @@
   }
 
   // Set our hooks and make sure they were installed:
-  tcmalloc::HookMMapEvents(&mapping_hook_space_, HandleMappingEvent);
+  tcmalloc::HookMMapEventsWithBacktrace(&mapping_hook_space_, HandleMappingEvent, NeedBacktrace);
 
   // We need to set recursive_insert since the NewArena call itself
   // will already do some allocations with mmap which our hooks will catch
@@ -540,45 +539,16 @@
   }
 }
 
-// We strip out different number of stack frames in debug mode
-// because less inlining happens in that case
-#ifdef NDEBUG
-static const int kStripFrames = 1;
-#else
-static const int kStripFrames = 3;
-#endif
-
-void MemoryRegionMap::RecordRegionAddition(const void* start, size_t size) {
+void MemoryRegionMap::RecordRegionAddition(const void* start, size_t size,
+                                           int stack_depth, void** stack) {
   // Record start/end info about this memory acquisition call in a new region:
   Region region;
   region.Create(start, size);
-  // First get the call stack info into the local varible 'region':
-  int depth = 0;
-  // NOTE: libunwind also does mmap and very much likely while holding
-  // it's own lock(s). So some threads may first take libunwind lock,
-  // and then take region map lock (necessary to record mmap done from
-  // inside libunwind). On the other hand other thread(s) may do
-  // normal mmap. Which would call this method to record it. Which
-  // would then proceed with installing that record to region map
-  // while holding region map lock. That may cause mmap from our own
-  // internal allocators, so attempt to unwind in this case may cause
-  // reverse order of taking libuwind and region map locks. Which is
-  // obvious deadlock.
-  //
-  // Thankfully, we can easily detect if we're holding region map lock
-  // and avoid recording backtrace in this (rare and largely
-  // irrelevant) case. By doing this we "declare" that thread needing
-  // both locks must take region map lock last. In other words we do
-  // not allow taking libuwind lock when we already have region map
-  // lock. Note, this is generally impossible when somebody tries to
-  // mix cpu profiling and heap checking/profiling, because cpu
-  // profiler grabs backtraces at arbitrary places. But at least such
-  // combination is rarer and less relevant.
-  if (max_stack_depth_ > 0 && !LockIsHeld()) {
-    depth = MallocHook::GetCallerStackTrace(const_cast<void**>(region.call_stack),
-                                            max_stack_depth_, kStripFrames + 1);
+  stack_depth = std::min(stack_depth, max_stack_depth_);
+  if (stack_depth) {
+    memcpy(region.call_stack, stack, sizeof(*stack)*stack_depth);
   }
-  region.set_call_stack_depth(depth);  // record stack info fully
+  region.set_call_stack_depth(stack_depth);
   RAW_VLOG(10, "New global region %p..%p from %p",
               reinterpret_cast<void*>(region.start_addr),
               reinterpret_cast<void*>(region.end_addr),
@@ -590,7 +560,7 @@
     // This will (eventually) allocate storage for and copy over the stack data
     // from region.call_stack_data_ that is pointed by region.call_stack().
   if (bucket_table_ != NULL) {
-    HeapProfileBucket* b = GetBucket(depth, region.call_stack);
+    HeapProfileBucket* b = GetBucket(stack_depth, region.call_stack);
     ++b->allocs;
     b->alloc_size += size;
     if (!recursive_insert) {
@@ -730,6 +700,10 @@
   b->free_size += size;
 }
 
+static bool HasAddition(const tcmalloc::MappingEvent& evt) {
+  return evt.after_valid && (evt.after_length != 0);
+}
+
 void MemoryRegionMap::HandleMappingEvent(const tcmalloc::MappingEvent& evt) {
   RAW_VLOG(10, "MMap: before: %p, +%zu; after: %p, +%zu; fd: %d, off: %lld, sbrk: %s",
            evt.before_address, evt.before_valid ? evt.before_length : 0,
@@ -739,11 +713,48 @@
   if (evt.before_valid && evt.before_length != 0) {
     RecordRegionRemoval(evt.before_address, evt.before_length);
   }
-  if (evt.after_valid && evt.after_length != 0) {
-    RecordRegionAddition(evt.after_address, evt.after_length);
+  if (HasAddition(evt)) {
+    RecordRegionAddition(evt.after_address, evt.after_length,
+                         evt.stack_depth, evt.stack);
   }
 }
 
+int MemoryRegionMap::NeedBacktrace(const tcmalloc::MappingEvent& evt) {
+  // We only use backtraces when recording additions (see
+  // above). Otherwise, no backtrace is needed.
+  if (!HasAddition(evt)) {
+    return 0;
+  }
+
+  // NOTE: libunwind also does mmap and very much likely while holding
+  // it's own lock(s). So some threads may first take libunwind lock,
+  // and then take region map lock (necessary to record mmap done from
+  // inside libunwind). On the other hand other thread(s) may do
+  // normal mmap. Which would call this method to record it. Which
+  // would then proceed with installing that record to region map
+  // while holding region map lock. That may cause mmap from our own
+  // internal allocators, so attempt to unwind in this case may cause
+  // reverse order of taking libuwind and region map locks. Which is
+  // obvious deadlock.
+  //
+  // Thankfully, we can easily detect if we're holding region map lock
+  // and avoid recording backtrace in this (rare and largely
+  // irrelevant) case. By doing this we "declare" that thread needing
+  // both locks must take region map lock last. In other words we do
+  // not allow taking libuwind lock when we already have region map
+  // lock.
+  //
+  // Note, such rule is in general impossible to enforce, when
+  // somebody tries to mix cpu profiling and heap checking/profiling,
+  // because cpu profiler grabs backtraces at arbitrary places. But at
+  // least such combination is rarer and less relevant.
+  if (LockIsHeld()) {
+    return 0;
+  }
+
+  return max_stack_depth_;
+}
+
 void MemoryRegionMap::LogAllLocked() {
   RAW_CHECK(LockIsHeld(), "should be held (by this thread)");
   RAW_LOG(INFO, "List of regions:");
diff --git a/src/memory_region_map.h b/src/memory_region_map.h
index dae1c53..0d2aa89 100644
--- a/src/memory_region_map.h
+++ b/src/memory_region_map.h
@@ -368,11 +368,13 @@
 
   // Record addition of a memory region at address "start" of size "size"
   // (called from our mmap/mremap/sbrk hook).
-  static void RecordRegionAddition(const void* start, size_t size);
+  static void RecordRegionAddition(const void* start, size_t size, int stack_depth, void** stack);
   // Record deletion of a memory region at address "start" of size "size"
   // (called from our munmap/mremap/sbrk hook).
   static void RecordRegionRemoval(const void* start, size_t size);
 
+  static int NeedBacktrace(const tcmalloc::MappingEvent& evt);
+
   // Record deletion of a memory region of size "size" in a bucket whose
   // caller stack trace is "key".  The stack trace is used to a depth of
   // "depth" at most.
diff --git a/src/mmap_hook.cc b/src/mmap_hook.cc
index e329bb9..e375ad2 100644
--- a/src/mmap_hook.cc
+++ b/src/mmap_hook.cc
@@ -44,15 +44,17 @@
 
 #include "mmap_hook.h"
 
-#include "base/spinlock.h"
-#include "base/logging.h"
-
-#include <atomic>
-
 #if HAVE_SYS_SYSCALL_H
 #include <sys/syscall.h>
 #endif
 
+#include <algorithm>
+#include <atomic>
+
+#include "base/logging.h"
+#include "base/spinlock.h"
+#include "malloc_backtrace.h"
+
 // Disable the glibc prototype of mremap(), as older versions of the
 // system headers define this function with only four arguments,
 // whereas newer versions allow an optional fifth argument:
@@ -83,9 +85,12 @@
 namespace {
 
 struct MappingHookDescriptor {
-  MappingHookDescriptor(MMapEventFn fn) : fn(fn) {}
+  MappingHookDescriptor(MMapEventFn fn,
+                        MMapEventNeedBacktraceFn need_backtrace)
+    : fn(fn), need_backtrace(need_backtrace) {}
 
   const MMapEventFn fn;
+  const MMapEventNeedBacktraceFn need_backtrace;
 
   std::atomic<bool> inactive{false};
   std::atomic<MappingHookDescriptor*> next;
@@ -103,7 +108,7 @@
     return reinterpret_cast<MappingHookDescriptor*>(space->storage);
   }
 
-  void Add(MappingHookSpace *space, MMapEventFn fn) {
+  void Add(MappingHookSpace *space, MMapEventFn fn, MMapEventNeedBacktraceFn need_backtrace) {
     MappingHookDescriptor* desc = SpaceToDesc(space);
     if (space->initialized) {
       desc->inactive.store(false);
@@ -111,7 +116,7 @@
     }
 
     space->initialized = true;
-    new (desc) MappingHookDescriptor(fn);
+    new (desc) MappingHookDescriptor(fn, need_backtrace);
 
     MappingHookDescriptor* next_candidate = list_head_.load(std::memory_order_relaxed);
     do {
@@ -124,7 +129,7 @@
     SpaceToDesc(space)->inactive.store(true);
   }
 
-  void InvokeAll(const MappingEvent& evt) {
+  int PreInvokeAll(const MappingEvent& evt) {
     if (!ran_initial_hooks_.load(std::memory_order_relaxed)) {
       bool already_ran = ran_initial_hooks_.exchange(true, std::memory_order_seq_cst);
       if (!already_ran) {
@@ -132,6 +137,21 @@
       }
     }
 
+    int stack_depth = 0;
+
+    std::atomic<MappingHookDescriptor*> *place = &list_head_;
+    while (MappingHookDescriptor* desc = place->load(std::memory_order_acquire)) {
+      place = &desc->next;
+      if (!desc->inactive && desc->need_backtrace) {
+        int need = desc->need_backtrace(evt);
+        stack_depth = std::max(stack_depth, need);
+      }
+    }
+
+    return stack_depth;
+  }
+
+  void InvokeAll(const MappingEvent& evt) {
     std::atomic<MappingHookDescriptor*> *place = &list_head_;
     while (MappingHookDescriptor* desc = place->load(std::memory_order_acquire)) {
       place = &desc->next;
@@ -141,7 +161,7 @@
     }
   }
 
-  void InvokeSbrk(void* result, intptr_t increment) {
+  static MappingEvent FillSbrk(void* result, intptr_t increment) {
     MappingEvent evt;
     evt.is_sbrk = 1;
     if (increment > 0) {
@@ -155,8 +175,7 @@
       evt.before_length = -increment;
       evt.before_valid = 1;
     }
-
-    InvokeAll(evt);
+    return evt;
   }
 
 private:
@@ -168,7 +187,11 @@
 }  // namespace
 
 void HookMMapEvents(MappingHookSpace* place, MMapEventFn callback) {
-  mapping_hooks.Add(place, callback);
+  mapping_hooks.Add(place, callback, nullptr);
+}
+
+void HookMMapEventsWithBacktrace(MappingHookSpace* place, MMapEventFn callback, MMapEventNeedBacktraceFn need_backtrace) {
+  mapping_hooks.Add(place, callback, need_backtrace);
 }
 
 void UnHookMMapEvents(MappingHookSpace* place) {
@@ -236,8 +259,31 @@
 #define DEFINED_DO_MMAP
 #endif  // 64-bit FreeBSD
 
+namespace {
+
+struct BacktraceHelper {
+  static inline constexpr int kDepth = 32;
+  void* backtrace[kDepth];
+
+  int PreInvoke(tcmalloc::MappingEvent* evt) {
+    int want_stack = tcmalloc::mapping_hooks.PreInvokeAll(*evt);
+    if (want_stack) {
+      want_stack = std::min(want_stack, kDepth);
+      evt->stack = backtrace;
+    }
+    return want_stack;
+  }
+};
+
+}  // namespace
+
 #ifdef DEFINED_DO_MMAP
 
+// Note, this code gets build by gcc or gcc-compatible compilers
+// (e.g. clang). So we can rely on ALWAYS_INLINE to actually work even
+// when built with -O0 -fno-inline. This matters because we're
+// carefully controlling backtrace skipping count, so that mmap hook
+// sees backtrace just "up-to" mmap call.
 static ALWAYS_INLINE
 void* do_mmap_with_hooks(void* start, size_t length, int prot, int flags, int fd, int64_t offset) {
   void* result = do_mmap(start, length, prot, flags, fd, offset);
@@ -256,6 +302,12 @@
   evt.flags = flags;
   evt.prot = prot;
 
+  BacktraceHelper helper;
+  int want_stack = helper.PreInvoke(&evt);
+  if (want_stack) {
+    evt.stack_depth = tcmalloc::GrabBacktrace(evt.stack, want_stack, 1);
+  }
+
   tcmalloc::mapping_hooks.InvokeAll(evt);
 
   return result;
@@ -300,10 +352,13 @@
 #undef mmap64
 #undef mmap
 
-extern "C" PERFTOOLS_DLL_DECL void* mmap64(void* start, size_t length, int prot, int flags, int fd, off_t off)
-  __THROW ATTRIBUTE_SECTION(malloc_hook);
-extern "C" PERFTOOLS_DLL_DECL void* mmap(void* start, size_t length, int prot, int flags, int fd, off_t off)
-  __THROW ATTRIBUTE_SECTION(malloc_hook);
+extern "C" PERFTOOLS_DLL_DECL ATTRIBUTE_NOINLINE
+void* mmap64(void* start, size_t length, int prot, int flags, int fd, off_t off)
+  __THROW;
+
+extern "C" PERFTOOLS_DLL_DECL ATTRIBUTE_NOINLINE
+void* mmap(void* start, size_t length, int prot, int flags, int fd, off_t off)
+  __THROW;
 
 void* mmap64(void* start, size_t length, int prot, int flags, int fd, off_t off) __THROW {
   return do_mmap_with_hooks(start, length, prot, flags, fd, off);
@@ -320,10 +375,13 @@
 
 static_assert(sizeof(int32_t) == sizeof(off_t), "");
 
-extern "C" PERFTOOLS_DLL_DECL void* mmap64(void* start, size_t length, int prot, int flags, int fd, int64_t off)
-  __THROW ATTRIBUTE_SECTION(malloc_hook);
-extern "C" PERFTOOLS_DLL_DECL void* mmap(void* start, size_t length, int prot, int flags, int fd, off_t off)
-  __THROW ATTRIBUTE_SECTION(malloc_hook);
+extern "C" PERFTOOLS_DLL_DECL ATTRIBUTE_NOINLINE
+void* mmap64(void* start, size_t length, int prot, int flags, int fd, int64_t off)
+  __THROW;
+
+extern "C" PERFTOOLS_DLL_DECL ATTRIBUTE_NOINLINE
+void* mmap(void* start, size_t length, int prot, int flags, int fd, off_t off)
+  __THROW;
 
 void* mmap(void *start, size_t length, int prot, int flags, int fd, off_t off) __THROW {
   return do_mmap_with_hooks(start, length, prot, flags, fd, off);
@@ -340,7 +398,8 @@
 
 #ifdef HOOKED_MMAP
 
-extern "C" PERFTOOLS_DLL_DECL int munmap(void* start, size_t length) __THROW ATTRIBUTE_SECTION(malloc_hook);
+extern "C" PERFTOOLS_DLL_DECL ATTRIBUTE_NOINLINE int munmap(void* start, size_t length) __THROW;
+
 int munmap(void* start, size_t length) __THROW {
   int result = tcmalloc::DirectMUnMap(/* invoke_hooks=*/ false, start, length);
   if (result < 0) {
@@ -352,6 +411,12 @@
   evt.before_length = length;
   evt.before_valid = 1;
 
+  BacktraceHelper helper;
+  int want_stack = helper.PreInvoke(&evt);
+  if (want_stack) {
+    evt.stack_depth = tcmalloc::GrabBacktrace(evt.stack, want_stack, 1);
+  }
+
   tcmalloc::mapping_hooks.InvokeAll(evt);
 
   return result;
@@ -382,9 +447,9 @@
 }
 
 #if __linux__
-extern "C" PERFTOOLS_DLL_DECL
+extern "C" PERFTOOLS_DLL_DECL ATTRIBUTE_NOINLINE
 void* mremap(void* old_addr, size_t old_size, size_t new_size,
-             int flags, ...) __THROW ATTRIBUTE_SECTION(malloc_hook);
+             int flags, ...) __THROW;
 // We only handle mremap on Linux so far.
 void* mremap(void* old_addr, size_t old_size, size_t new_size,
              int flags, ...) __THROW {
@@ -405,6 +470,12 @@
     evt.after_valid = 1;
     evt.flags = flags;
 
+    BacktraceHelper helper;
+    int want_stack = helper.PreInvoke(&evt);
+    if (want_stack) {
+      evt.stack_depth = tcmalloc::GrabBacktrace(evt.stack, want_stack, 1);
+    }
+
     tcmalloc::mapping_hooks.InvokeAll(evt);
   }
 
@@ -417,27 +488,13 @@
 // glibc's version:
 extern "C" void* __sbrk(intptr_t increment);
 
-extern "C" PERFTOOLS_DLL_DECL void* sbrk(intptr_t increment) __THROW ATTRIBUTE_SECTION(malloc_hook);
-
-void* sbrk(intptr_t increment) __THROW {
-  void *result = __sbrk(increment);
-  if (increment == 0 || result == reinterpret_cast<void*>(static_cast<intptr_t>(-1))) {
-    return result;
-  }
-
-  tcmalloc::mapping_hooks.InvokeSbrk(result, increment);
-
-  return result;
-}
+#define do_sbrk(i) __sbrk(i)
 
 #define HOOKED_SBRK
-
-#endif
+#endif  // linux and __sbrk
 
 #if defined(__FreeBSD__) && defined(_LP64) && defined(HAVE_SBRK)
-extern "C" PERFTOOLS_DLL_DECL void* sbrk(intptr_t increment) __THROW ATTRIBUTE_SECTION(malloc_hook);
-
-void* sbrk(intptr_t increment) __THROW {
+static void* do_sbrk(intptr_t increment) {
   uintptr_t curbrk = __syscall(SYS_break, nullptr);
   uintptr_t badbrk = static_cast<uintptr_t>(static_cast<intptr_t>(-1));
   if (curbrk == badbrk) {
@@ -464,15 +521,32 @@
     goto nomem;
   }
 
-  auto result = reinterpret_cast<void*>(curbrk);
-  tcmalloc::mapping_hooks.InvokeSbrk(result, increment);
-
-  return result;
+  return reinterpret_cast<void*>(curbrk);
 }
 
 #define HOOKED_SBRK
+#endif  // FreeBSD
 
-#endif
+#ifdef HOOKED_SBRK
+extern "C" PERFTOOLS_DLL_DECL ATTRIBUTE_NOINLINE void* sbrk(intptr_t increment) __THROW;
+
+void* sbrk(intptr_t increment) __THROW {
+  void *result = do_sbrk(increment);
+  if (increment == 0 || result == reinterpret_cast<void*>(static_cast<intptr_t>(-1))) {
+    return result;
+  }
+
+  tcmalloc::MappingEvent evt = tcmalloc::MappingHooks::FillSbrk(result, increment);
+  BacktraceHelper helper;
+  int want_stack = helper.PreInvoke(&evt);
+  if (want_stack) {
+    evt.stack_depth = tcmalloc::GrabBacktrace(evt.stack, want_stack, 1);
+  }
+  tcmalloc::mapping_hooks.InvokeAll(evt);
+
+  return result;
+}
+#endif  // HOOKED_SBRK
 
 namespace tcmalloc {
 #ifdef HOOKED_MMAP
diff --git a/src/mmap_hook.h b/src/mmap_hook.h
index d434cf3..546dc5b 100644
--- a/src/mmap_hook.h
+++ b/src/mmap_hook.h
@@ -86,6 +86,11 @@
   unsigned before_valid:1;
   unsigned file_valid:1;
   unsigned is_sbrk:1;
+
+  // NOTE, in order to get mapping event backtrace you need to request
+  // it via need_backtrace callback.
+  int stack_depth;
+  void** stack;
 };
 
 // Pass this to Hook/Unhook function below. Note, nature of
@@ -100,11 +105,12 @@
 
   bool initialized = false;
 
-  static constexpr size_t kSize = sizeof(void*) * 3;
+  static constexpr size_t kSize = sizeof(void*) * 4;
   alignas(alignof(void*)) char storage[kSize] = {};
 };
 
 using MMapEventFn = void (*)(const MappingEvent& evt);
+using MMapEventNeedBacktraceFn = int (*)(const MappingEvent& evt);
 
 // HookMMapEvents address hook for mmap events, using given place to
 // store relevant metadata (linked list membership etc).
@@ -112,6 +118,9 @@
 // It does no memory allocation and is safe to be called from hooks of all kinds.
 ATTRIBUTE_VISIBILITY_HIDDEN void HookMMapEvents(MappingHookSpace* place, MMapEventFn callback);
 
+ATTRIBUTE_VISIBILITY_HIDDEN void HookMMapEventsWithBacktrace(MappingHookSpace* place, MMapEventFn callback,
+                                                             MMapEventNeedBacktraceFn need_backtrace);
+
 // UnHookMMapEvents undoes effect of HookMMapEvents. This one is also
 // entirely safe to be called from out of anywhere. Including from
 // inside MMapEventFn invokations.
diff --git a/src/tests/mmap_hook_test.cc b/src/tests/mmap_hook_test.cc
index e89b1dd..a6e205f 100644
--- a/src/tests/mmap_hook_test.cc
+++ b/src/tests/mmap_hook_test.cc
@@ -42,18 +42,29 @@
 
 #include "mmap_hook.h"
 
+#include "base/function_ref.h"
+#include "gperftools/stacktrace.h"
+
+#include "tests/testutil.h"
+
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 
 #include "gtest/gtest.h"
 
+static bool got_first_allocation;
+
 extern "C" int MallocHook_InitAtFirstAllocation_HeapLeakChecker() {
 #ifndef __FreeBSD__
   // Effing, FreeBSD. Super-annoying with broken everything when it is
   // early.
   printf("first mmap!\n");
 #endif
+  if (got_first_allocation) {
+    abort();
+  }
+  got_first_allocation = true;
   return 1;
 }
 
@@ -73,10 +84,20 @@
     assert(!have_last_evt_);
     memcpy(&last_evt_, &evt, sizeof(evt));
     have_last_evt_ = true;
+    assert(evt.stack_depth == 1);
+    backtrace_address_ = evt.stack[0];
+  }
+
+  void SetUp() {
+    have_last_evt_ = false;
+    backtrace_address_ = nullptr;
   }
 
   static void SetUpTestSuite() {
-    tcmalloc::HookMMapEvents(&hook_space_, &HandleMappingEvent);
+    tcmalloc::HookMMapEventsWithBacktrace(&hook_space_, &HandleMappingEvent,
+                                          [] (const tcmalloc::MappingEvent& evt) {
+                                            return 1;
+                                          });
   }
   static void TearDownTestSuite() {
     tcmalloc::UnHookMMapEvents(&hook_space_);
@@ -84,47 +105,11 @@
 
 protected:
   static inline tcmalloc::MappingEvent last_evt_;
+  static inline void* backtrace_address_;
   static inline bool have_last_evt_;
   static inline tcmalloc::MappingHookSpace hook_space_;
 };
 
-TEST_F(MMapHookTest, Sbrk) {
-  if (!tcmalloc::sbrk_hook_works) {
-    puts("sbrk test SKIPPED");
-    return;
-  }
-
-  void* addr = sbrk(8);
-
-  EXPECT_TRUE(last_evt_.is_sbrk);
-  EXPECT_TRUE(!last_evt_.before_valid && !last_evt_.file_valid && last_evt_.after_valid);
-  EXPECT_EQ(last_evt_.after_address, addr);
-  EXPECT_EQ(last_evt_.after_length, 8);
-
-  ASSERT_FALSE(HasFatalFailure());
-  have_last_evt_ = false;
-
-  void* addr2 = sbrk(16);
-
-  EXPECT_TRUE(last_evt_.is_sbrk);
-  EXPECT_TRUE(!last_evt_.before_valid && !last_evt_.file_valid && last_evt_.after_valid);
-  EXPECT_EQ(last_evt_.after_address, addr2);
-  EXPECT_EQ(last_evt_.after_length, 16);
-
-  ASSERT_FALSE(HasFatalFailure());
-  have_last_evt_ = false;
-
-  char* addr3 = static_cast<char*>(sbrk(-13));
-
-  EXPECT_TRUE(last_evt_.is_sbrk);
-  EXPECT_TRUE(last_evt_.before_valid && !last_evt_.file_valid && !last_evt_.after_valid);
-  EXPECT_EQ(last_evt_.before_address, addr3-13);
-  EXPECT_EQ(last_evt_.before_length, 13);
-
-  ASSERT_FALSE(HasFatalFailure());
-  have_last_evt_ = false;
-}
-
 TEST_F(MMapHookTest, MMap) {
   if (!tcmalloc::mmap_hook_works) {
     puts("mmap test SKIPPED");
@@ -162,6 +147,8 @@
   have_last_evt_ = false;
   ASSERT_FALSE(HasFatalFailure());
 
+  ASSERT_TRUE(got_first_allocation);
+
 #ifdef __linux__
   void* reserve = mmap(nullptr, pagesz * 2, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
   ASSERT_NE(reserve, MAP_FAILED) << "errno: " << strerror(errno);
@@ -232,4 +219,117 @@
   ASSERT_FALSE(HasFatalFailure());
 }
 
+TEST_F(MMapHookTest, MMapBacktrace) {
+  if (!tcmalloc::mmap_hook_works) {
+    puts("mmap backtrace test SKIPPED");
+    return;
+  }
+
+  using mmap_fn = void* (*)(void*, size_t, int, int, int, off_t);
+
+  static void* expected_address;
+
+  struct Helper {
+    // noinline ensures that all trampoline invocations will call fn
+    // with same return address (inside trampoline). We use that to
+    // test backtrace accuracy.
+    static ATTRIBUTE_NOINLINE
+    void trampoline(void** res, mmap_fn fn) {
+      *res = noopt(fn)(nullptr, getpagesize(), PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+    }
+    static void* prepare(void* hint, size_t sz, int prot, int flags, int fd, off_t off) {
+      EXPECT_EQ(1, GetStackTrace(&expected_address, 1, 1));
+      return nullptr;
+    }
+  };
+
+  void* addr;
+  Helper::trampoline(&addr, Helper::prepare);
+  ASSERT_NE(nullptr, expected_address);
+  ASSERT_EQ(nullptr, addr);
+
+  Helper::trampoline(&addr, mmap);
+  ASSERT_NE(nullptr, addr);
+  ASSERT_EQ(backtrace_address_, expected_address);
+}
+
+#ifdef HAVE_SBRK
+
+TEST_F(MMapHookTest, Sbrk) {
+  if (!tcmalloc::sbrk_hook_works) {
+    puts("sbrk test SKIPPED");
+    return;
+  }
+
+  void* addr = sbrk(8);
+
+  ASSERT_TRUE(got_first_allocation);
+
+  EXPECT_TRUE(last_evt_.is_sbrk);
+  EXPECT_TRUE(!last_evt_.before_valid && !last_evt_.file_valid && last_evt_.after_valid);
+  EXPECT_EQ(last_evt_.after_address, addr);
+  EXPECT_EQ(last_evt_.after_length, 8);
+
+  ASSERT_FALSE(HasFatalFailure());
+  have_last_evt_ = false;
+
+  void* addr2 = sbrk(16);
+
+  EXPECT_TRUE(last_evt_.is_sbrk);
+  EXPECT_TRUE(!last_evt_.before_valid && !last_evt_.file_valid && last_evt_.after_valid);
+  EXPECT_EQ(last_evt_.after_address, addr2);
+  EXPECT_EQ(last_evt_.after_length, 16);
+
+  ASSERT_FALSE(HasFatalFailure());
+  have_last_evt_ = false;
+
+  char* addr3 = static_cast<char*>(sbrk(-13));
+
+  EXPECT_TRUE(last_evt_.is_sbrk);
+  EXPECT_TRUE(last_evt_.before_valid && !last_evt_.file_valid && !last_evt_.after_valid);
+  EXPECT_EQ(last_evt_.before_address, addr3-13);
+  EXPECT_EQ(last_evt_.before_length, 13);
+
+  ASSERT_FALSE(HasFatalFailure());
+  have_last_evt_ = false;
+}
+
+TEST_F(MMapHookTest, SbrkBacktrace) {
+  if (!tcmalloc::sbrk_hook_works) {
+    puts("sbrk backtrace test SKIPPED");
+    return;
+  }
+
+  static void* expected_address;
+
+  struct Helper {
+    // noinline ensures that all trampoline invocations will call fn
+    // with same return address (inside trampoline). We use that to
+    // test backtrace accuracy.
+    static ATTRIBUTE_NOINLINE
+    void trampoline(void** res, void* (*fn)(intptr_t increment)) {
+      *res = noopt(fn)(32);
+    }
+    static void* prepare(intptr_t increment) {
+      EXPECT_EQ(1, GetStackTrace(&expected_address, 1, 1));
+      return nullptr;
+    }
+  };
+
+  void* addr;
+  Helper::trampoline(&addr, Helper::prepare);
+  ASSERT_NE(nullptr, expected_address);
+  ASSERT_EQ(nullptr, addr);
+
+  printf("expected_address: %p, &trampoline: %p\n",
+         expected_address, reinterpret_cast<void*>(&Helper::trampoline));
+
+  // Why cast? Because some OS-es define sbrk as accepting long.
+  Helper::trampoline(&addr, reinterpret_cast<void*(*)(intptr_t)>(sbrk));
+  ASSERT_NE(nullptr, addr);
+  ASSERT_EQ(backtrace_address_, expected_address);
+}
+
+#endif // HAVE_SBRK
+
 #endif // HAVE_MMAP