Revert "[heap profiler] Make use of thread_local instead of base::TLS"
This reverts commit 684f41852a1ac8403b6c2447db684a3ee4002619.
Reason for revert: It broke the ios-simulator-cronet builder:
https://ci.chromium.org/p/chromium/builders/luci.chromium.try/ios-simulator-cronet
The ios-simulator-cronet compiler says:
../../base/sampling_heap_profiler/poisson_allocation_sampler.cc:101:1: error: thread-local storage is not supported for the current target
thread_local bool g_internal_reentry_guard;
^
../../base/sampling_heap_profiler/poisson_allocation_sampler.cc:104:1: error: thread-local storage is not supported for the current target
thread_local intptr_t g_accumulated_bytes_tls;
^
../../base/sampling_heap_profiler/poisson_allocation_sampler.cc:113:1: error: thread-local storage is not supported for the current target
thread_local bool g_sampling_interval_initialized_tls;
^
3 errors generated.
This blocks CLs that trigger that builder, including
https://chromium-review.googlesource.com/c/chromium/src/+/1475156
https://chromium-review.googlesource.com/c/chromium/src/+/1430089
Original change's description:
> [heap profiler] Make use of thread_local instead of base::TLS
>
> The C++ thread_local is slightly faster while making the code clear.
> Besides that calls to TlsGetValue on Windows may alter the result of
> GetLastError, thus changing behavior of the underlying code.
>
> BUG=920440
>
> Change-Id: Ic89632f4a54f35d58b93cdecfffc68fc1a94dac1
> Reviewed-on: https://chromium-review.googlesource.com/c/1461681
> Reviewed-by: Vlad Tsyrklevich <vtsyrklevich@chromium.org>
> Reviewed-by: Erik Chen <erikchen@chromium.org>
> Commit-Queue: Alexei Filippov <alph@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#632819}
TBR=alph@chromium.org,erikchen@chromium.org,vtsyrklevich@chromium.org
# Not skipping CQ checks because original CL landed > 1 day ago.
Bug: 920440
Change-Id: Iad4158b406c1b28d14382a8a711a0304c094389c
Reviewed-on: https://chromium-review.googlesource.com/c/1477076
Reviewed-by: Daniel Bratell <bratell@opera.com>
Commit-Queue: Daniel Bratell <bratell@opera.com>
Cr-Commit-Position: refs/heads/master@{#633092}
diff --git a/base/sampling_heap_profiler/poisson_allocation_sampler.cc b/base/sampling_heap_profiler/poisson_allocation_sampler.cc
index 3522e328..1f393456 100644
--- a/base/sampling_heap_profiler/poisson_allocation_sampler.cc
+++ b/base/sampling_heap_profiler/poisson_allocation_sampler.cc
@@ -19,49 +19,87 @@
#include "base/sampling_heap_profiler/lock_free_address_hash_set.h"
#include "build/build_config.h"
-#if defined(OS_MACOSX) || defined(OS_ANDROID)
+#if defined(OS_POSIX)
#include <pthread.h>
#endif
+#if defined(OS_WIN)
+#include <windows.h>
+#endif
+
namespace base {
using allocator::AllocatorDispatch;
namespace {
-#if defined(OS_MACOSX) || defined(OS_ANDROID)
+// PoissonAllocationSampler cannot use ThreadLocalStorage, as during thread
+// exiting when TLS storage is already released, there might be a call to
+// |free| which would trigger the profiler hook and would make it access TLS.
+// It instead uses OS primitives directly. As it only stores POD types it
+// does not need thread exit callbacks.
+#if defined(OS_WIN)
-// The macOS implementation of libmalloc sometimes calls malloc recursively,
+using TLSKey = DWORD;
+
+void TLSInit(TLSKey* key) {
+ *key = ::TlsAlloc();
+ CHECK_NE(TLS_OUT_OF_INDEXES, *key);
+}
+
+uintptr_t TLSGetValue(const TLSKey& key) {
+ return reinterpret_cast<uintptr_t>(::TlsGetValue(key));
+}
+
+void TLSSetValue(const TLSKey& key, uintptr_t value) {
+ ::TlsSetValue(key, reinterpret_cast<LPVOID>(value));
+}
+
+#else // defined(OS_WIN)
+
+using TLSKey = pthread_key_t;
+
+void TLSInit(TLSKey* key) {
+ int result = pthread_key_create(key, nullptr);
+ CHECK_EQ(0, result);
+}
+
+uintptr_t TLSGetValue(const TLSKey& key) {
+ return reinterpret_cast<uintptr_t>(pthread_getspecific(key));
+}
+
+void TLSSetValue(const TLSKey& key, uintptr_t value) {
+ pthread_setspecific(key, reinterpret_cast<void*>(value));
+}
+
+#endif
+
+// On MacOS the implementation of libmalloc sometimes calls malloc recursively,
// delegating allocations between zones. That causes our hooks being called
// twice. The scoped guard allows us to detect that.
-//
-// Besides that the implementations of thread_local on macOS and Android
-// seem to allocate memory lazily on the first access to thread_local variables.
-// Make use of pthread TLS instead of C++ thread_local there.
+#if defined(OS_MACOSX)
+
class ReentryGuard {
public:
- ReentryGuard() : allowed_(!pthread_getspecific(entered_key_)) {
- pthread_setspecific(entered_key_, reinterpret_cast<void*>(true));
+ ReentryGuard() : allowed_(!TLSGetValue(entered_key_)) {
+ TLSSetValue(entered_key_, true);
}
~ReentryGuard() {
if (LIKELY(allowed_))
- pthread_setspecific(entered_key_, nullptr);
+ TLSSetValue(entered_key_, false);
}
operator bool() { return allowed_; }
- static void Init() {
- int error = pthread_key_create(&entered_key_, nullptr);
- CHECK(!error);
- }
+ static void Init() { TLSInit(&entered_key_); }
private:
bool allowed_;
- static pthread_key_t entered_key_;
+ static TLSKey entered_key_;
};
-pthread_key_t ReentryGuard::entered_key_;
+TLSKey ReentryGuard::entered_key_;
#else
@@ -73,44 +111,21 @@
#endif
+TLSKey g_internal_reentry_guard;
+
const size_t kDefaultSamplingIntervalBytes = 128 * 1024;
-// Notes on TLS usage:
-//
-// * There's no safe way to use TLS in malloc() as both C++ thread_local and
-// pthread do not pose any guarantees on whether they allocate or not.
-// * We think that we can safely use thread_local w/o re-entrancy guard because
-// the compiler will use "tls static access model" for static builds of
-// Chrome [https://www.uclibc.org/docs/tls.pdf].
-// But there's no guarantee that this will stay true, and in practice
-// it seems to have problems on macOS/Android. These platforms do allocate
-// on the very first access to a thread_local on each thread.
-// * Directly using/warming-up platform TLS seems to work on all platforms,
-// but is also not guaranteed to stay true. We make use of it for reentrancy
-// guards on macOS/Android.
-// * We cannot use Windows Tls[GS]etValue API as it modifies the result of
-// GetLastError.
-//
-// Android thread_local seems to be using __emutls_get_address from libgcc:
-// https://github.com/gcc-mirror/gcc/blob/master/libgcc/emutls.c
-// macOS version is based on _tlv_get_addr from dyld:
-// https://opensource.apple.com/source/dyld/dyld-635.2/src/threadLocalHelpers.s.auto.html
-
-// The guard protects against reentering on platforms other the macOS and
-// Android.
-thread_local bool g_internal_reentry_guard;
-
// Accumulated bytes towards sample thread local key.
-thread_local intptr_t g_accumulated_bytes_tls;
+TLSKey g_accumulated_bytes_tls;
-// A boolean used to distinguish first allocation on a thread:
-// false - first allocation on the thread;
-// true - otherwise.
+// A boolean used to distinguish first allocation on a thread.
+// false - first allocation on the thread.
+// true - otherwise
// Since g_accumulated_bytes_tls is initialized with zero the very first
// allocation on a thread would always trigger the sample, thus skewing the
// profile towards such allocations. To mitigate that we use the flag to
// ensure the first allocation is properly accounted.
-thread_local bool g_sampling_interval_initialized_tls;
+TLSKey g_sampling_interval_initialized_tls;
// Controls if sample intervals should not be randomized. Used for testing.
bool g_deterministic;
@@ -299,18 +314,18 @@
} // namespace
PoissonAllocationSampler::ScopedMuteThreadSamples::ScopedMuteThreadSamples() {
- DCHECK(!g_internal_reentry_guard);
- g_internal_reentry_guard = true;
+ DCHECK(!TLSGetValue(g_internal_reentry_guard));
+ TLSSetValue(g_internal_reentry_guard, true);
}
PoissonAllocationSampler::ScopedMuteThreadSamples::~ScopedMuteThreadSamples() {
- DCHECK(g_internal_reentry_guard);
- g_internal_reentry_guard = false;
+ DCHECK(TLSGetValue(g_internal_reentry_guard));
+ TLSSetValue(g_internal_reentry_guard, false);
}
// static
bool PoissonAllocationSampler::ScopedMuteThreadSamples::IsMuted() {
- return g_internal_reentry_guard;
+ return TLSGetValue(g_internal_reentry_guard);
}
PoissonAllocationSampler* PoissonAllocationSampler::instance_;
@@ -328,6 +343,9 @@
void PoissonAllocationSampler::Init() {
static bool init_once = []() {
ReentryGuard::Init();
+ TLSInit(&g_internal_reentry_guard);
+ TLSInit(&g_accumulated_bytes_tls);
+ TLSInit(&g_sampling_interval_initialized_tls);
return true;
}();
ignore_result(init_once);
@@ -409,11 +427,11 @@
const char* context) {
if (UNLIKELY(!g_running.load(std::memory_order_relaxed)))
return;
- g_accumulated_bytes_tls += size;
- intptr_t accumulated_bytes = g_accumulated_bytes_tls;
+ intptr_t accumulated_bytes = TLSGetValue(g_accumulated_bytes_tls) + size;
if (LIKELY(accumulated_bytes < 0))
- return;
- instance_->DoRecordAlloc(accumulated_bytes, size, address, type, context);
+ TLSSetValue(g_accumulated_bytes_tls, accumulated_bytes);
+ else
+ instance_->DoRecordAlloc(accumulated_bytes, size, address, type, context);
}
void PoissonAllocationSampler::DoRecordAlloc(intptr_t accumulated_bytes,
@@ -434,10 +452,10 @@
++samples;
} while (accumulated_bytes >= 0);
- g_accumulated_bytes_tls = accumulated_bytes;
+ TLSSetValue(g_accumulated_bytes_tls, accumulated_bytes);
- if (UNLIKELY(!g_sampling_interval_initialized_tls)) {
- g_sampling_interval_initialized_tls = true;
+ if (UNLIKELY(!TLSGetValue(g_sampling_interval_initialized_tls))) {
+ TLSSetValue(g_sampling_interval_initialized_tls, true);
// This is the very first allocation on the thread. It always produces an
// extra sample because g_accumulated_bytes_tls is initialized with zero
// due to TLS semantics.
@@ -446,7 +464,7 @@
return;
}
- if (UNLIKELY(ScopedMuteThreadSamples::IsMuted()))
+ if (UNLIKELY(TLSGetValue(g_internal_reentry_guard)))
return;
ScopedMuteThreadSamples no_reentrancy_scope;
@@ -473,7 +491,7 @@
}
void PoissonAllocationSampler::DoRecordFree(void* address) {
- if (UNLIKELY(ScopedMuteThreadSamples::IsMuted()))
+ if (UNLIKELY(TLSGetValue(g_internal_reentry_guard)))
return;
ScopedMuteThreadSamples no_reentrancy_scope;
AutoLock lock(mutex_);
diff --git a/base/sampling_heap_profiler/sampling_heap_profiler_unittest.cc b/base/sampling_heap_profiler/sampling_heap_profiler_unittest.cc
index 8e08e01..110d8256 100644
--- a/base/sampling_heap_profiler/sampling_heap_profiler_unittest.cc
+++ b/base/sampling_heap_profiler/sampling_heap_profiler_unittest.cc
@@ -9,7 +9,6 @@
#include "base/allocator/allocator_shim.h"
#include "base/debug/alias.h"
-#include "base/rand_util.h"
#include "base/threading/simple_thread.h"
#include "build/build_config.h"
#include "testing/gtest/include/gtest/gtest.h"
@@ -201,40 +200,4 @@
});
}
-// Platform TLS: alloc+free[ns]: 22.184 alloc[ns]: 8.910 free[ns]: 13.274
-// thread_local: alloc+free[ns]: 18.353 alloc[ns]: 5.021 free[ns]: 13.331
-TEST_F(SamplingHeapProfilerTest, MANUAL_SamplerMicroBenchmark) {
- // With the sampling interval of 100KB it happens to record ~ every 450th
- // allocation in the browser process. We model this pattern here.
- constexpr size_t sampling_interval = 100000;
- constexpr size_t allocation_size = sampling_interval / 450;
- SamplesCollector collector(0);
- auto* sampler = PoissonAllocationSampler::Get();
- sampler->SetSamplingInterval(sampling_interval);
- sampler->AddSamplesObserver(&collector);
- int kNumAllocations = 50000000;
-
- base::TimeTicks t0 = base::TimeTicks::Now();
- for (int i = 1; i <= kNumAllocations; ++i) {
- sampler->RecordAlloc(
- reinterpret_cast<void*>(static_cast<intptr_t>(i)), allocation_size,
- PoissonAllocationSampler::AllocatorType::kMalloc, nullptr);
- }
- base::TimeTicks t1 = base::TimeTicks::Now();
- for (int i = 1; i <= kNumAllocations; ++i)
- sampler->RecordFree(reinterpret_cast<void*>(static_cast<intptr_t>(i)));
- base::TimeTicks t2 = base::TimeTicks::Now();
-
- printf(
- "alloc+free[ns]: %.3f alloc[ns]: %.3f free[ns]: %.3f "
- "alloc+free[mln/s]: %.1f total[ms]: %.1f\n",
- (t2 - t0).InNanoseconds() * 1. / kNumAllocations,
- (t1 - t0).InNanoseconds() * 1. / kNumAllocations,
- (t2 - t1).InNanoseconds() * 1. / kNumAllocations,
- kNumAllocations / (t2 - t0).InMicrosecondsF(),
- (t2 - t0).InMillisecondsF());
-
- sampler->RemoveSamplesObserver(&collector);
-}
-
} // namespace base