Added support to configure lower bound on per-thread cache size
[alkondratenko@gmail.com: removed spurious new line at thread_cache.h]
Signed-off-by: Aliaksey Kandratsenka <alkondratenko@gmail.com>
diff --git a/.gitignore b/.gitignore
index 87e16a4..b3f7603 100644
--- a/.gitignore
+++ b/.gitignore
@@ -155,6 +155,8 @@
/test-driver
/thread_dealloc_unittest
/thread_dealloc_unittest.exe
+/per_thread_cache_size_test
+/per_thread_cache_size_test.exe
/unique_path_unittest
/unique_path_unittest.exe
/unwind_bench
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7e969ee..f24d3d3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -687,6 +687,10 @@
src/tests/testutil.cc)
target_link_libraries(thread_dealloc_unittest tcmalloc_minimal)
add_test(thread_dealloc_unittest thread_dealloc_unittest)
+
+ add_executable(min_per_thread_cache_size_test src/tests/min_per_thread_cache_size_test.cc)
+ target_link_libraries(min_per_thread_cache_size_test tcmalloc_minimal gtest)
+ add_test(min_per_thread_cache_size_test min_per_thread_cache_size_test)
endif()
### ------- tcmalloc_minimal_debug (thread-caching malloc with debugallocation)
diff --git a/Makefile.am b/Makefile.am
index 35ff4e1..4fa6349 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -467,6 +467,12 @@
thread_dealloc_unittest_LDFLAGS = $(TCMALLOC_FLAGS) $(AM_LDFLAGS)
thread_dealloc_unittest_LDADD = libtcmalloc_minimal.la
+TESTS += min_per_thread_cache_size_test
+min_per_thread_cache_size_test_SOURCES = src/tests/min_per_thread_cache_size_test.cc
+min_per_thread_cache_size_test_LDFLAGS = $(TCMALLOC_FLAGS) $(AM_LDFLAGS)
+min_per_thread_cache_size_test_CPPFLAGS = $(gtest_CPPFLAGS)
+min_per_thread_cache_size_test_LDADD = libtcmalloc_minimal.la libgtest.la
+
### Documentation
dist_doc_DATA += docs/tcmalloc.html \
docs/overview.gif \
diff --git a/docs/tcmalloc.html b/docs/tcmalloc.html
index 33b8cc5..2096560 100644
--- a/docs/tcmalloc.html
+++ b/docs/tcmalloc.html
@@ -403,7 +403,7 @@
</table>
-<ul>
+<ul>
<li> TCMalloc is much more consistently scalable than PTMalloc2 - for
all thread counts >1 it achieves ~7-9 million ops/sec for small
allocations, falling to ~2 million ops/sec for larger
@@ -517,7 +517,7 @@
<tr valign=top>
<td><code>TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES</code></td>
- <td>default: 16777216</td>
+ <td>default: 33554432</td>
<td>
Bound on the total amount of bytes allocated to thread caches. This
bound is not strict, so it is possible for the cache to go over this
@@ -746,6 +746,16 @@
</td>
</tr>
+<tr valign=top>
+ <td><code>tcmalloc.min_per_thread_cache_bytes</code></td>
+ <td>
+ A lower limit to how much memory TCMalloc dedicates for small objects per
+ thread. Note that this property only shows effect if per-thread cache
+ calculated using tcmalloc.max_total_thread_cache_bytes ended up being less
+ than tcmalloc.min_per_thread_cache_bytes.
+ </td>
+</tr>
+
</table>
<h2><A NAME="caveats">Caveats</A></h2>
diff --git a/src/gperftools/malloc_extension.h b/src/gperftools/malloc_extension.h
index cf55939..3f3f0b0 100644
--- a/src/gperftools/malloc_extension.h
+++ b/src/gperftools/malloc_extension.h
@@ -175,7 +175,14 @@
// --------
// "tcmalloc.max_total_thread_cache_bytes"
// Upper limit on total number of bytes stored across all
- // per-thread caches. Default: 16MB.
+ // per-thread caches. Default: 32MB.
+ //
+ // "tcmalloc.min_per_thread_cache_bytes"
+ // Lower limit on total number of bytes stored per-thread cache.
+ // Default: 512kB.
+ // Note that this property only shows effect if per-thread cache
+ // calculated using tcmalloc.max_total_thread_cache_bytes ended up being
+ // less than tcmalloc.min_per_thread_cache_bytes.
//
// "tcmalloc.current_total_thread_cache_bytes"
// Number of bytes used across all thread caches.
diff --git a/src/tcmalloc.cc b/src/tcmalloc.cc
index 98bae87..c51a351 100644
--- a/src/tcmalloc.cc
+++ b/src/tcmalloc.cc
@@ -833,6 +833,11 @@
return true;
}
+ if (strcmp(name, "tcmalloc.min_per_thread_cache_bytes") == 0) {
+ *value = ThreadCache::min_per_thread_cache_size();
+ return true;
+ }
+
if (strcmp(name, "tcmalloc.current_total_thread_cache_bytes") == 0) {
TCMallocStats stats;
ExtractStats(&stats, NULL, NULL, NULL);
@@ -876,6 +881,11 @@
return true;
}
+ if (strcmp(name, "tcmalloc.min_per_thread_cache_bytes") == 0) {
+ ThreadCache::set_min_per_thread_cache_size(value);
+ return true;
+ }
+
if (strcmp(name, "tcmalloc.aggressive_memory_decommit") == 0) {
SpinLockHolder l(Static::pageheap_lock());
Static::pageheap()->SetAggressiveDecommit(value != 0);
diff --git a/src/tests/min_per_thread_cache_size_test.cc b/src/tests/min_per_thread_cache_size_test.cc
new file mode 100644
index 0000000..a0c6d24
--- /dev/null
+++ b/src/tests/min_per_thread_cache_size_test.cc
@@ -0,0 +1,124 @@
+/* -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
+ * Copyright (c) 2024, gperftools Contributors
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config_for_unittests.h"
+
+#include <new>
+#include <thread>
+#include <vector>
+
+#include <gperftools/malloc_extension.h>
+#include <gperftools/malloc_extension_c.h>
+
+#include "base/logging.h"
+#include "gtest/gtest.h"
+
+// Number of allocations per thread.
+static const int kAllocationsPerThread = 10000;
+
+// Number of threads to create.
+static const int kNumThreads = 50;
+
+// Per thread cache size to set.
+static const size_t kPerThreadCacheSize = 64 << 10;
+
+// Number of passes to run.
+static const int kNumPasses = 10;
+
+// Get current total thread-cache size.
+static size_t CurrentThreadCacheSize() {
+ size_t result = 0;
+ EXPECT_TRUE(MallocExtension::instance()->GetNumericProperty(
+ "tcmalloc.current_total_thread_cache_bytes",
+ &result));
+ return result;
+}
+
+// Maximum cache size seen so far.
+static size_t max_cache_size;
+
+// Mutex and condition variable to synchronize threads.
+std::mutex filler_mtx;
+std::condition_variable filler_cv;
+int current_thread = 0;
+
+// A thread that cycles through allocating lots of objects of varying
+// size, in an attempt to fill up its thread cache.
+void Filler(int thread_id, int num_threads) {
+ std::unique_lock<std::mutex> filler_lock(filler_mtx);
+ for (int i = 0; i < kNumPasses; i++) {
+ // Wait for the current thread to be the one that should run.
+ filler_cv.wait(filler_lock, [thread_id] { return thread_id == current_thread; });
+
+ // Fill the cache by allocating and deallocating objects of varying sizes.
+ int size = 0;
+ for (int i = 0; i < kAllocationsPerThread; i++) {
+ void* p = ::operator new(size);
+ ::operator delete(p);
+ size += 64;
+ if (size > (32 << 10)) size = 0;
+ }
+
+ // Get the maximum cache size seen so far.
+ const size_t cache_size = CurrentThreadCacheSize();
+ max_cache_size = std::max(max_cache_size, cache_size);
+
+ // Move to the next thread.
+ current_thread = (current_thread + 1) % num_threads;
+ filler_cv.notify_all();
+ }
+}
+
+TEST(MinPerThreadCacheSizeTest, Basics) {
+ // Start all threads.
+ std::vector<std::thread> threads;
+ threads.reserve(kNumThreads);
+
+ // Set the lower bound on per cache size.
+ CHECK(MallocExtension::instance()->SetNumericProperty(
+ "tcmalloc.min_per_thread_cache_bytes", kPerThreadCacheSize));
+
+ // Setting the max total thread cache size to 0 to ensure that the
+ // per thread cache size is set to the lower bound.
+ CHECK(MallocExtension::instance()->SetNumericProperty(
+ "tcmalloc.max_total_thread_cache_bytes", 0));
+
+ for (int i = 0; i < kNumThreads; i++) {
+ threads.emplace_back(Filler, i, kNumThreads);
+ }
+
+ // Wait for all threads to finish.
+ for (auto& t : threads) { t.join(); }
+
+ // Check that the maximum cache size does not exceed the limit set.
+ ASSERT_LT(max_cache_size, kPerThreadCacheSize * kNumThreads);
+}
+
diff --git a/src/thread_cache.cc b/src/thread_cache.cc
index ba40e61..3ac1019 100644
--- a/src/thread_cache.cc
+++ b/src/thread_cache.cc
@@ -67,6 +67,8 @@
static bool phinited = false;
volatile size_t ThreadCache::per_thread_cache_size_ = kMaxThreadCacheSize;
+
+std::atomic<size_t> ThreadCache::min_per_thread_cache_size_ = kMinThreadCacheSize;
size_t ThreadCache::overall_thread_cache_size_ = kDefaultOverallThreadCacheSize;
ssize_t ThreadCache::unclaimed_cache_space_ = kDefaultOverallThreadCacheSize;
PageHeapAllocator<ThreadCache> threadcache_allocator;
@@ -84,10 +86,11 @@
if (max_size_ == 0) {
// There isn't enough memory to go around. Just give the minimum to
// this thread.
- SetMaxSize(kMinThreadCacheSize);
+ size_t min_size = min_per_thread_cache_size_.load(std::memory_order_relaxed);
+ SetMaxSize(min_size);
// Take unclaimed_cache_space_ negative.
- unclaimed_cache_space_ -= kMinThreadCacheSize;
+ unclaimed_cache_space_ -= min_size;
ASSERT(unclaimed_cache_space_ < 0);
}
@@ -267,7 +270,8 @@
next_memory_steal_ = thread_heaps_;
}
if (next_memory_steal_ == this ||
- next_memory_steal_->max_size_ <= kMinThreadCacheSize) {
+ next_memory_steal_->max_size_
+ <= min_per_thread_cache_size_.load(std::memory_order_relaxed)) {
continue;
}
next_memory_steal_->SetMaxSize(next_memory_steal_->max_size_ - kStealAmount);
@@ -352,8 +356,9 @@
int n = thread_heap_count_ > 0 ? thread_heap_count_ : 1;
size_t space = overall_thread_cache_size_ / n;
+ size_t min_size = min_per_thread_cache_size_.load(std::memory_order_relaxed);
// Limit to allowed range
- if (space < kMinThreadCacheSize) space = kMinThreadCacheSize;
+ if (space < min_size) space = min_size;
if (space > kMaxThreadCacheSize) space = kMaxThreadCacheSize;
double ratio = space / max<double>(1, per_thread_cache_size_);
@@ -383,7 +388,10 @@
void ThreadCache::set_overall_thread_cache_size(size_t new_size) {
// Clip the value to a reasonable range
- if (new_size < kMinThreadCacheSize) new_size = kMinThreadCacheSize;
+ size_t min_size = min_per_thread_cache_size_.load(std::memory_order_relaxed);
+ if (new_size < min_size) {
+ new_size = min_size;
+ }
if (new_size > (1<<30)) new_size = (1<<30); // Limit to 1GB
overall_thread_cache_size_ = new_size;
diff --git a/src/thread_cache.h b/src/thread_cache.h
index 5231e1a..7454688 100644
--- a/src/thread_cache.h
+++ b/src/thread_cache.h
@@ -35,6 +35,7 @@
#define TCMALLOC_THREAD_CACHE_H_
#include <config.h>
+#include <atomic>
#include <stddef.h> // for size_t, NULL
#include <stdint.h> // for uint32_t, uint64_t
#include <sys/types.h> // for ssize_t
@@ -114,6 +115,15 @@
return overall_thread_cache_size_;
}
+ // Sets the lower bound on per-thread cache size to new_size.
+ static void set_min_per_thread_cache_size(size_t new_size) {
+ min_per_thread_cache_size_.store(new_size, std::memory_order_relaxed);
+ }
+
+ static size_t min_per_thread_cache_size() {
+ return min_per_thread_cache_size_.load(std::memory_order_relaxed);
+ }
+
static int thread_heap_count() {
return thread_heap_count_;
}
@@ -263,6 +273,9 @@
// thread_heaps_. Protected by Static::pageheap_lock.
static ThreadCache* next_memory_steal_;
+ // Lower bound on per thread cache size. Default value is 512 KBs.
+ static std::atomic<size_t> min_per_thread_cache_size_;
+
// Overall thread cache size. Protected by Static::pageheap_lock.
static size_t overall_thread_cache_size_;