| // Copyright 2020 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #ifndef BASE_ALLOCATOR_PARTITION_ALLOCATOR_THREAD_CACHE_H_ |
| #define BASE_ALLOCATOR_PARTITION_ALLOCATOR_THREAD_CACHE_H_ |
| |
| #include <atomic> |
| #include <memory> |
| |
| #include "base/allocator/partition_allocator/partition_alloc_forward.h" |
| #include "base/allocator/partition_allocator/partition_cookie.h" |
| #include "base/allocator/partition_allocator/partition_freelist_entry.h" |
| #include "base/allocator/partition_allocator/partition_lock.h" |
| #include "base/allocator/partition_allocator/partition_stats.h" |
| #include "base/allocator/partition_allocator/partition_tls.h" |
| #include "base/base_export.h" |
| #include "base/gtest_prod_util.h" |
| #include "base/no_destructor.h" |
| #include "base/partition_alloc_buildflags.h" |
| #include "base/synchronization/lock.h" |
| |
| // Need TLS support. |
| #if defined(OS_POSIX) || defined(OS_WIN) |
| #define PA_THREAD_CACHE_SUPPORTED |
| #endif |
| |
| namespace base { |
| |
| namespace internal { |
| |
| class ThreadCache; |
| |
| extern BASE_EXPORT PartitionTlsKey g_thread_cache_key; |
| |
| // Global registry of all ThreadCache instances. |
| // |
| // This class cannot allocate in the (Un)registerThreadCache() functions, as |
| // they are called from ThreadCache constructor, which is from within the |
| // allocator. However the other members can allocate. |
| class BASE_EXPORT ThreadCacheRegistry { |
| public: |
| static ThreadCacheRegistry& Instance(); |
| // Do not instantiate. |
| // |
| // Several things are surprising here: |
| // - The constructor is public even though this is intended to be a singleton: |
| // we cannot use a "static local" variable in |Instance()| as this is |
| // reached too early during CRT initialization on Windows, meaning that |
| // static local variables don't work (as they call into the uninitialized |
| // runtime). To sidestep that, we use a regular global variable in the .cc, |
| // which is fine as this object's constructor is constexpr. |
| // - Marked inline so that the chromium style plugin doesn't complain that a |
| // "complex constructor" has an inline body. This warning is disabled when |
| // the constructor is explicitly marked "inline". Note that this is a false |
| // positive of the plugin, since constexpr implies inline. |
| inline constexpr ThreadCacheRegistry(); |
| |
| void RegisterThreadCache(ThreadCache* cache); |
| void UnregisterThreadCache(ThreadCache* cache); |
| // Prints statistics for all thread caches, or this thread's only. |
| void DumpStats(bool my_thread_only, ThreadCacheStats* stats); |
| // Purge() this thread's cache, and asks the other ones to trigger Purge() at |
| // a later point (during a deallocation). |
| void PurgeAll(); |
| |
| static PartitionLock& GetLock() { return Instance().lock_; } |
| |
| private: |
| friend class NoDestructor<ThreadCacheRegistry>; |
| // Not using base::Lock as the object's constructor must be constexpr. |
| PartitionLock lock_; |
| ThreadCache* list_head_ GUARDED_BY(GetLock()) = nullptr; |
| }; |
| |
| constexpr ThreadCacheRegistry::ThreadCacheRegistry() = default; |
| |
| // Optional statistics collection. |
| #define PA_ENABLE_THREAD_CACHE_STATISTICS 1 |
| |
| #if defined(PA_ENABLE_THREAD_CACHE_STATISTICS) |
| #define INCREMENT_COUNTER(counter) ++counter |
| #define GET_COUNTER(counter) counter |
| #else |
| #define INCREMENT_COUNTER(counter) \ |
| do { \ |
| } while (0) |
| #define GET_COUNTER(counter) 0 |
| #endif // defined(PA_ENABLE_THREAD_CACHE_STATISTICS) |
| |
| ALWAYS_INLINE static constexpr int ConstexprLog2(size_t n) { |
| return n < 1 ? -1 : (n < 2 ? 0 : (1 + ConstexprLog2(n >> 1))); |
| } |
| |
| // Per-thread cache. *Not* threadsafe, must only be accessed from a single |
| // thread. |
| // |
| // In practice, this is easily enforced as long as only |instance| is |
| // manipulated, as it is a thread_local member. As such, any |
| // |ThreadCache::instance->*()| call will necessarily be done from a single |
| // thread. |
| class BASE_EXPORT ThreadCache { |
| public: |
| // Initializes the thread cache for |root|. May allocate, so should be called |
| // with the thread cache disabled on the partition side, and without the |
| // partition lock held. |
| // |
| // May only be called by a single PartitionRoot. |
| static void Init(PartitionRoot<ThreadSafe>* root); |
| static void Init(PartitionRoot<NotThreadSafe>* root) { IMMEDIATE_CRASH(); } |
| |
| static ThreadCache* Get() { |
| return reinterpret_cast<ThreadCache*>(PartitionTlsGet(g_thread_cache_key)); |
| } |
| |
| // Create a new ThreadCache associated with |root|. |
| // Must be called without the partition locked, as this may allocate. |
| static ThreadCache* Create(PartitionRoot<ThreadSafe>* root); |
| static ThreadCache* Create(PartitionRoot<NotThreadSafe>* root) { |
| IMMEDIATE_CRASH(); |
| } |
| |
| ~ThreadCache(); |
| |
| // Force placement new. |
| void* operator new(size_t) = delete; |
| void* operator new(size_t, void* buffer) { return buffer; } |
| void operator delete(void* ptr) = delete; |
| ThreadCache(const ThreadCache&) = delete; |
| ThreadCache(const ThreadCache&&) = delete; |
| ThreadCache& operator=(const ThreadCache&) = delete; |
| |
| // Tries to put a memory block at |address| into the cache. |
| // The block comes from the bucket at index |bucket_index| from the partition |
| // this cache is for. |
| // |
| // Returns true if the memory was put in the cache, and false otherwise. This |
| // can happen either because the cache is full or the allocation was too |
| // large. |
| ALWAYS_INLINE bool MaybePutInCache(void* address, size_t bucket_index); |
| |
| // Tries to allocate memory from the cache. |
| // Returns nullptr for failure. |
| // |
| // Has the same behavior as RawAlloc(), that is: no cookie nor ref-count |
| // handling. |
| ALWAYS_INLINE void* GetFromCache(size_t bucket_index); |
| |
| // Asks this cache to trigger |Purge()| at a later point. Can be called from |
| // any thread. |
| void SetShouldPurge(); |
| // Empties the cache. |
| // The Partition lock must *not* be held when calling this. |
| // Must be called from the thread this cache is for. |
| void Purge(); |
| void AccumulateStats(ThreadCacheStats* stats) const; |
| |
| size_t bucket_count_for_testing(size_t index) const { |
| return buckets_[index].count; |
| } |
| |
| private: |
| struct Bucket { |
| PartitionFreelistEntry* freelist_head; |
| uint16_t count; |
| uint16_t limit; |
| }; |
| |
| explicit ThreadCache(PartitionRoot<ThreadSafe>* root); |
| static void Delete(void* thread_cache_ptr); |
| // Empties the |bucket| until there are at most |limit| objects in it. |
| void ClearBucket(Bucket& bucket, size_t limit); |
| |
| // TODO(lizeb): Optimize the threshold. |
| static constexpr size_t kSizeThreshold = 512; |
| static constexpr uint16_t kBucketCount = |
| ((ConstexprLog2(kSizeThreshold) - kMinBucketedOrder + 1) |
| << kNumBucketsPerOrderBits) + |
| 1; |
| static_assert( |
| kBucketCount < kNumBuckets, |
| "Cannot have more cached buckets than what the allocator supports"); |
| |
| std::atomic<bool> should_purge_; |
| Bucket buckets_[kBucketCount]; |
| ThreadCacheStats stats_; |
| PartitionRoot<ThreadSafe>* const root_; |
| |
| // Intrusive list since ThreadCacheRegistry::RegisterThreadCache() cannot |
| // allocate. |
| ThreadCache* next_ GUARDED_BY(ThreadCacheRegistry::GetLock()); |
| ThreadCache* prev_ GUARDED_BY(ThreadCacheRegistry::GetLock()); |
| |
| friend class ThreadCacheRegistry; |
| FRIEND_TEST_ALL_PREFIXES(ThreadCacheTest, LargeAllocationsAreNotCached); |
| FRIEND_TEST_ALL_PREFIXES(ThreadCacheTest, MultipleThreadCaches); |
| FRIEND_TEST_ALL_PREFIXES(ThreadCacheTest, RecordStats); |
| FRIEND_TEST_ALL_PREFIXES(ThreadCacheTest, ThreadCacheRegistry); |
| FRIEND_TEST_ALL_PREFIXES(ThreadCacheTest, MultipleThreadCachesAccounting); |
| }; |
| |
| ALWAYS_INLINE bool ThreadCache::MaybePutInCache(void* address, |
| size_t bucket_index) { |
| if (UNLIKELY(should_purge_.load(std::memory_order_relaxed))) |
| Purge(); |
| |
| INCREMENT_COUNTER(stats_.cache_fill_count); |
| |
| if (UNLIKELY(bucket_index >= kBucketCount)) { |
| INCREMENT_COUNTER(stats_.cache_fill_misses); |
| return false; |
| } |
| |
| auto& bucket = buckets_[bucket_index]; |
| |
| PA_DCHECK(bucket.count != 0 || bucket.freelist_head == nullptr); |
| |
| auto* entry = reinterpret_cast<PartitionFreelistEntry*>(address); |
| entry->SetNextForThreadCache(bucket.freelist_head); |
| bucket.freelist_head = entry; |
| bucket.count++; |
| |
| INCREMENT_COUNTER(stats_.cache_fill_hits); |
| |
| // Batched deallocation, amortizing lock acquisitions. |
| if (UNLIKELY(bucket.count >= bucket.limit)) { |
| ClearBucket(bucket, bucket.limit >> 1); |
| } |
| |
| return true; |
| } |
| |
| ALWAYS_INLINE void* ThreadCache::GetFromCache(size_t bucket_index) { |
| INCREMENT_COUNTER(stats_.alloc_count); |
| // Only handle "small" allocations. |
| if (UNLIKELY(bucket_index >= kBucketCount)) { |
| INCREMENT_COUNTER(stats_.alloc_miss_too_large); |
| INCREMENT_COUNTER(stats_.alloc_misses); |
| return nullptr; |
| } |
| |
| auto& bucket = buckets_[bucket_index]; |
| auto* result = bucket.freelist_head; |
| if (UNLIKELY(!result)) { |
| PA_DCHECK(bucket.count == 0); |
| INCREMENT_COUNTER(stats_.alloc_miss_empty); |
| INCREMENT_COUNTER(stats_.alloc_misses); |
| return nullptr; |
| } |
| |
| PA_DCHECK(bucket.count != 0); |
| auto* next = result->GetNext(); |
| PA_DCHECK(result != next); |
| bucket.count--; |
| PA_DCHECK(bucket.count != 0 || !next); |
| bucket.freelist_head = next; |
| |
| INCREMENT_COUNTER(stats_.alloc_hits); |
| return result; |
| } |
| |
| } // namespace internal |
| } // namespace base |
| |
| #endif // BASE_ALLOCATOR_PARTITION_ALLOCATOR_THREAD_CACHE_H_ |