| // -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- |
| // Copyright (c) 2008, Google Inc. |
| // All rights reserved. |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions are |
| // met: |
| // |
| // * Redistributions of source code must retain the above copyright |
| // notice, this list of conditions and the following disclaimer. |
| // * Redistributions in binary form must reproduce the above |
| // copyright notice, this list of conditions and the following disclaimer |
| // in the documentation and/or other materials provided with the |
| // distribution. |
| // * Neither the name of Google Inc. nor the names of its |
| // contributors may be used to endorse or promote products derived from |
| // this software without specific prior written permission. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| |
| // --- |
| // Author: Sanjay Ghemawat <opensource@google.com> |
| |
| #ifndef TCMALLOC_THREAD_CACHE_H_ |
| #define TCMALLOC_THREAD_CACHE_H_ |
| |
| #include <config.h> |
| #include <atomic> |
| #include <stddef.h> // for size_t, NULL |
| #include <stdint.h> // for uint32_t, uint64_t |
| #include <sys/types.h> // for ssize_t |
| #include "base/commandlineflags.h" |
| #include "base/threading.h" |
| #include "common.h" |
| #include "linked_list.h" |
| #include "page_heap_allocator.h" |
| #include "sampler.h" |
| #include "static_vars.h" |
| |
| #include "common.h" // for SizeMap, kMaxSize, etc |
| #include "internal_logging.h" // for ASSERT, etc |
| #include "linked_list.h" // for SLL_Pop, SLL_PopRange, etc |
| #include "page_heap_allocator.h" // for PageHeapAllocator |
| #include "sampler.h" // for Sampler |
| #include "static_vars.h" // for Static |
| |
| DECLARE_int64(tcmalloc_sample_parameter); |
| |
| namespace tcmalloc { |
| |
| //------------------------------------------------------------------- |
| // Data kept per thread |
| //------------------------------------------------------------------- |
| |
| class ThreadCache { |
| public: |
| // Allocate a new heap. REQUIRES: Static::pageheap_lock is not held. |
| static ThreadCache* NewHeap(); |
| // REQUIRES: Static::pageheap_lock is not held. |
| static void DeleteCache(ThreadCache* heap); |
| |
| // REQUIRES: Static::pageheap_lock is held |
| ThreadCache(); |
| // REQUIRES: Static::pageheap_lock is not held |
| ~ThreadCache(); |
| |
| // Accessors (mostly just for printing stats) |
| int freelist_length(uint32_t cl) const { return list_[cl].length(); } |
| |
| // Total byte size in cache |
| size_t Size() const { return size_; } |
| |
| // Allocate an object of the given size and class. The size given |
| // must be the same as the size of the class in the size map. |
| void* Allocate(size_t size, uint32_t cl, void *(*oom_handler)(size_t size)); |
| void Deallocate(void* ptr, uint32_t size_class); |
| |
| void Scavenge(); |
| |
| int GetSamplePeriod(); |
| |
| // Record allocation of "k" bytes. Return true iff allocation |
| // should be sampled |
| bool SampleAllocation(size_t k); |
| |
| bool TryRecordAllocationFast(size_t k); |
| |
| static void InitModule(); |
| |
| // Return the number of thread heaps in use. |
| static inline int HeapsInUse(); |
| |
| // Adds to *total_bytes the total number of bytes used by all thread heaps. |
| // Also, if class_count is not NULL, it must be an array of size kNumClasses, |
| // and this function will increment each element of class_count by the number |
| // of items in all thread-local freelists of the corresponding size class. |
| // REQUIRES: Static::pageheap_lock is held. |
| static void GetThreadStats(uint64_t* total_bytes, uint64_t* class_count); |
| |
| // Sets the total thread cache size to new_size, recomputing the |
| // individual thread cache sizes as necessary. |
| // REQUIRES: Static::pageheap lock is held. |
| static void set_overall_thread_cache_size(size_t new_size); |
| static size_t overall_thread_cache_size() { |
| return overall_thread_cache_size_; |
| } |
| |
| // Sets the lower bound on per-thread cache size to new_size. |
| static void set_min_per_thread_cache_size(size_t new_size) { |
| min_per_thread_cache_size_.store(new_size, std::memory_order_relaxed); |
| } |
| |
| static size_t min_per_thread_cache_size() { |
| return min_per_thread_cache_size_.load(std::memory_order_relaxed); |
| } |
| |
| static int thread_heap_count() { |
| return thread_heap_count_; |
| } |
| |
| private: |
| class FreeList { |
| private: |
| void* list_; // Linked list of nodes |
| |
| #ifdef _LP64 |
| // On 64-bit hardware, manipulating 16-bit values may be slightly slow. |
| uint32_t length_; // Current length. |
| uint32_t lowater_; // Low water mark for list length. |
| uint32_t max_length_; // Dynamic max list length based on usage. |
| // Tracks the number of times a deallocation has caused |
| // length_ > max_length_. After the kMaxOverages'th time, max_length_ |
| // shrinks and length_overages_ is reset to zero. |
| uint32_t length_overages_; |
| #else |
| // If we aren't using 64-bit pointers then pack these into less space. |
| uint16_t length_; |
| uint16_t lowater_; |
| uint16_t max_length_; |
| uint16_t length_overages_; |
| #endif |
| |
| int32_t size_; |
| |
| public: |
| void Init(size_t size) { |
| list_ = NULL; |
| length_ = 0; |
| lowater_ = 0; |
| max_length_ = 1; |
| length_overages_ = 0; |
| size_ = size; |
| } |
| |
| // Return current length of list |
| size_t length() const { |
| return length_; |
| } |
| |
| int32_t object_size() const { |
| return size_; |
| } |
| |
| // Return the maximum length of the list. |
| size_t max_length() const { |
| return max_length_; |
| } |
| |
| // Set the maximum length of the list. If 'new_max' > length(), the |
| // client is responsible for removing objects from the list. |
| void set_max_length(size_t new_max) { |
| max_length_ = new_max; |
| } |
| |
| // Return the number of times that length() has gone over max_length(). |
| size_t length_overages() const { |
| return length_overages_; |
| } |
| |
| void set_length_overages(size_t new_count) { |
| length_overages_ = new_count; |
| } |
| |
| // Is list empty? |
| bool empty() const { |
| return list_ == NULL; |
| } |
| |
| // Low-water mark management |
| int lowwatermark() const { return lowater_; } |
| void clear_lowwatermark() { lowater_ = length_; } |
| |
| uint32_t Push(void* ptr) { |
| uint32_t length = length_ + 1; |
| SLL_Push(&list_, ptr); |
| length_ = length; |
| return length; |
| } |
| |
| void* Pop() { |
| ASSERT(list_ != NULL); |
| length_--; |
| if (length_ < lowater_) lowater_ = length_; |
| return SLL_Pop(&list_); |
| } |
| |
| bool TryPop(void **rv) { |
| if (SLL_TryPop(&list_, rv)) { |
| length_--; |
| if (PREDICT_FALSE(length_ < lowater_)) lowater_ = length_; |
| return true; |
| } |
| return false; |
| } |
| |
| void* Next() { |
| return SLL_Next(&list_); |
| } |
| |
| void PushRange(int N, void *start, void *end) { |
| SLL_PushRange(&list_, start, end); |
| length_ += N; |
| } |
| |
| void PopRange(int N, void **start, void **end) { |
| SLL_PopRange(&list_, N, start, end); |
| ASSERT(length_ >= N); |
| length_ -= N; |
| if (length_ < lowater_) lowater_ = length_; |
| } |
| }; |
| |
| // Gets and returns an object from the central cache, and, if possible, |
| // also adds some objects of that size class to this thread cache. |
| void* FetchFromCentralCache(uint32_t cl, int32_t byte_size, |
| void *(*oom_handler)(size_t size)); |
| |
| void ListTooLong(void* ptr, uint32_t cl); |
| |
| // Releases some number of items from src. Adjusts the list's max_length |
| // to eventually converge on num_objects_to_move(cl). |
| void ListTooLong(FreeList* src, uint32_t cl); |
| |
| // Releases N items from this thread cache. |
| void ReleaseToCentralCache(FreeList* src, uint32_t cl, int N); |
| |
| void SetMaxSize(int32_t new_max_size); |
| |
| // Increase max_size_ by reducing unclaimed_cache_space_ or by |
| // reducing the max_size_ of some other thread. In both cases, |
| // the delta is kStealAmount. |
| void IncreaseCacheLimit(); |
| // Same as above but requires Static::pageheap_lock() is held. |
| void IncreaseCacheLimitLocked(); |
| |
| // Linked list of heap objects. Protected by Static::pageheap_lock. |
| static ThreadCache* thread_heaps_; |
| static int thread_heap_count_; |
| |
| // A pointer to one of the objects in thread_heaps_. Represents |
| // the next ThreadCache from which a thread over its max_size_ should |
| // steal memory limit. Round-robin through all of the objects in |
| // thread_heaps_. Protected by Static::pageheap_lock. |
| static ThreadCache* next_memory_steal_; |
| |
| // Lower bound on per thread cache size. Default value is 512 KBs. |
| static std::atomic<size_t> min_per_thread_cache_size_; |
| |
| // Overall thread cache size. Protected by Static::pageheap_lock. |
| static size_t overall_thread_cache_size_; |
| |
| // Global per-thread cache size. Writes are protected by |
| // Static::pageheap_lock. Reads are done without any locking, which should be |
| // fine as long as size_t can be written atomically and we don't place |
| // invariants between this variable and other pieces of state. |
| static volatile size_t per_thread_cache_size_; |
| |
| // Represents overall_thread_cache_size_ minus the sum of max_size_ |
| // across all ThreadCaches. Protected by Static::pageheap_lock. |
| static ssize_t unclaimed_cache_space_; |
| |
| // This class is laid out with the most frequently used fields |
| // first so that hot elements are placed on the same cache line. |
| |
| FreeList list_[kClassSizesMax]; // Array indexed by size-class |
| |
| int32_t size_; // Combined size of data |
| int32_t max_size_; // size_ > max_size_ --> Scavenge() |
| |
| // We sample allocations, biased by the size of the allocation |
| Sampler sampler_; // A sampler |
| |
| static void RecomputePerThreadCacheSize(); |
| |
| // All ThreadCache objects are kept in a linked list (for stats collection) |
| ThreadCache* next_; |
| ThreadCache* prev_; |
| |
| // Ensure that this class is cacheline-aligned. This is critical for |
| // performance, as false sharing would negate many of the benefits |
| // of a per-thread cache. |
| } CACHELINE_ALIGNED; |
| |
| // Allocator for thread heaps |
| // This is logically part of the ThreadCache class, but MSVC, at |
| // least, does not like using ThreadCache as a template argument |
| // before the class is fully defined. So we put it outside the class. |
| extern PageHeapAllocator<ThreadCache> threadcache_allocator; |
| |
| inline int ThreadCache::HeapsInUse() { |
| return threadcache_allocator.inuse(); |
| } |
| |
| ALWAYS_INLINE void* ThreadCache::Allocate( |
| size_t size, uint32_t cl, void *(*oom_handler)(size_t size)) { |
| FreeList* list = &list_[cl]; |
| |
| #ifdef NO_TCMALLOC_SAMPLES |
| size = list->object_size(); |
| #endif |
| |
| ASSERT(size <= kMaxSize); |
| ASSERT(size != 0); |
| ASSERT(size == 0 || size == Static::sizemap()->ByteSizeForClass(cl)); |
| |
| void* rv; |
| if (!list->TryPop(&rv)) { |
| return FetchFromCentralCache(cl, size, oom_handler); |
| } |
| size_ -= size; |
| return rv; |
| } |
| |
| ALWAYS_INLINE void ThreadCache::Deallocate(void* ptr, uint32_t cl) { |
| ASSERT(list_[cl].max_length() > 0); |
| FreeList* list = &list_[cl]; |
| |
| // This catches back-to-back frees of allocs in the same size |
| // class. A more comprehensive (and expensive) test would be to walk |
| // the entire freelist. But this might be enough to find some bugs. |
| ASSERT(ptr != list->Next()); |
| |
| uint32_t length = list->Push(ptr); |
| |
| if (PREDICT_FALSE(length > list->max_length())) { |
| ListTooLong(list, cl); |
| return; |
| } |
| |
| size_ += list->object_size(); |
| if (PREDICT_FALSE(size_ > max_size_)){ |
| Scavenge(); |
| } |
| } |
| |
| inline void ThreadCache::SetMaxSize(int32_t new_max_size) { |
| max_size_ = new_max_size; |
| } |
| |
| #ifndef NO_TCMALLOC_SAMPLES |
| |
| inline bool ThreadCache::SampleAllocation(size_t k) { |
| ASSERT(Static::IsInited()); |
| return !sampler_.RecordAllocation(k); |
| } |
| |
| inline bool ThreadCache::TryRecordAllocationFast(size_t k) { |
| ASSERT(Static::IsInited()); |
| return sampler_.TryRecordAllocationFast(k); |
| } |
| |
| #else |
| |
| inline bool ThreadCache::SampleAllocation(size_t k) { |
| return false; |
| } |
| |
| inline bool ThreadCache::TryRecordAllocationFast(size_t k) { |
| return true; |
| } |
| |
| #endif |
| |
| } // namespace tcmalloc |
| |
| #endif // TCMALLOC_THREAD_CACHE_H_ |