blob: 12b447fe036874a067e2d0331ae0e739a8a74ce8 [file] [log] [blame]
// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
// Copyright (c) 2008, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// ---
// Author: Sanjay Ghemawat <opensource@google.com>
#ifndef TCMALLOC_PAGE_HEAP_H_
#define TCMALLOC_PAGE_HEAP_H_
#include <config.h>
#include <stddef.h> // for size_t
#include <stdint.h> // for uint64_t, int64_t, uint16_t
#include <gperftools/malloc_extension.h>
#include "base/basictypes.h"
#include "base/spinlock.h"
#include "base/thread_annotations.h"
#include "common.h"
#include "packed-cache-inl.h"
#include "pagemap.h"
#include "span.h"
// We need to dllexport PageHeap just for the unittest. MSVC complains
// that we don't dllexport the PageHeap members, but we don't need to
// test those, so I just suppress this warning.
#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable:4251)
#endif
// This #ifdef should almost never be set. Set NO_TCMALLOC_SAMPLES if
// you're porting to a system where you really can't get a stacktrace.
// Because we control the definition of GetStackTrace, all clients of
// GetStackTrace should #include us rather than stacktrace.h.
#ifdef NO_TCMALLOC_SAMPLES
// We use #define so code compiles even if you #include stacktrace.h somehow.
# define GetStackTrace(stack, depth, skip) (0)
#else
# include <gperftools/stacktrace.h>
#endif
namespace base {
struct MallocRange;
}
namespace tcmalloc {
// -------------------------------------------------------------------------
// Map from page-id to per-page data
// -------------------------------------------------------------------------
// We use PageMap2<> for 32-bit and PageMap3<> for 64-bit machines.
// We also use a simple one-level cache for hot PageID-to-sizeclass mappings,
// because sometimes the sizeclass is all the information we need.
// Selector class -- general selector uses 3-level map
template <int BITS> class MapSelector {
public:
typedef TCMalloc_PageMap3<BITS-kPageShift> Type;
};
#ifndef TCMALLOC_SMALL_BUT_SLOW
// x86-64 and arm64 are using 48 bits of address space. So we can use
// just two level map, but since initial ram consumption of this mode
// is a bit on the higher side, we opt-out of it in
// TCMALLOC_SMALL_BUT_SLOW mode.
template <> class MapSelector<48> {
public:
typedef TCMalloc_PageMap2<48-kPageShift> Type;
};
#endif // TCMALLOC_SMALL_BUT_SLOW
// A two-level map for 32-bit machines
template <> class MapSelector<32> {
public:
typedef TCMalloc_PageMap2<32-kPageShift> Type;
};
// -------------------------------------------------------------------------
// Page-level allocator
// * Eager coalescing
//
// Heap for page-level allocation. We allow allocating and freeing a
// contiguous runs of pages (called a "span").
// -------------------------------------------------------------------------
class PERFTOOLS_DLL_DECL PageHeap {
public:
PageHeap() : PageHeap(1) {}
PageHeap(Length smallest_span_size);
SpinLock* pageheap_lock() {
return &lock_;
}
// Aligns given size up to be multiple of smallest_span_size.
Length RoundUpSize(Length n);
// Allocate a run of "n" pages. Returns zero if out of memory.
// Caller should not pass "n == 0" -- instead, n should have
// been rounded up already.
Span* New(Length n) {
return NewWithSizeClass(n, 0);
}
Span* NewWithSizeClass(Length n, uint32 sizeclass);
// Same as above but with alignment. Requires page heap
// lock, like New above.
Span* NewAligned(Length n, Length align_pages);
// Delete the span "[p, p+n-1]".
// REQUIRES: span was returned by earlier call to New() and
// has not yet been deleted.
void Delete(Span* span);
template <typename Body>
void PrepareAndDelete(Span* span, const Body& body) LOCKS_EXCLUDED(lock_) {
SpinLockHolder h(&lock_);
body();
DeleteLocked(span);
}
// Mark an allocated span as being used for small objects of the
// specified size-class.
// REQUIRES: span was returned by an earlier call to New()
// and has not yet been deleted.
void RegisterSizeClass(Span* span, uint32 sc);
Span* SplitForTest(Span* span, Length n) {
SpinLockHolder l(&lock_);
return Split(span, n);
}
// Return the descriptor for the specified page. Returns NULL if
// this PageID was not allocated previously.
inline ATTRIBUTE_ALWAYS_INLINE
Span* GetDescriptor(PageID p) const {
return reinterpret_cast<Span*>(pagemap_.get(p));
}
// If this page heap is managing a range with starting page # >= start,
// store info about the range in *r and return true. Else return false.
bool GetNextRange(PageID start, base::MallocRange* r);
// Page heap statistics
struct Stats {
Stats() : system_bytes(0), free_bytes(0), unmapped_bytes(0), committed_bytes(0),
scavenge_count(0), commit_count(0), total_commit_bytes(0),
decommit_count(0), total_decommit_bytes(0),
reserve_count(0), total_reserve_bytes(0) {}
uint64_t system_bytes; // Total bytes allocated from system
uint64_t free_bytes; // Total bytes on normal freelists
uint64_t unmapped_bytes; // Total bytes on returned freelists
uint64_t committed_bytes; // Bytes committed, always <= system_bytes_.
uint64_t scavenge_count; // Number of times scavagened flush pages
uint64_t commit_count; // Number of virtual memory commits
uint64_t total_commit_bytes; // Bytes committed in lifetime of process
uint64_t decommit_count; // Number of virtual memory decommits
uint64_t total_decommit_bytes; // Bytes decommitted in lifetime of process
uint64_t reserve_count; // Number of virtual memory reserves
uint64_t total_reserve_bytes; // Bytes reserved in lifetime of process
};
inline Stats StatsLocked() const { return stats_; }
struct SmallSpanStats {
// For each free list of small spans, the length (in spans) of the
// normal and returned free lists for that size.
//
// NOTE: index 'i' accounts the number of spans of length 'i + 1'.
int64 normal_length[kMaxPages];
int64 returned_length[kMaxPages];
};
void GetSmallSpanStatsLocked(SmallSpanStats* result);
// Stats for free large spans (i.e., spans with more than kMaxPages pages).
struct LargeSpanStats {
int64 spans; // Number of such spans
int64 normal_pages; // Combined page length of normal large spans
int64 returned_pages; // Combined page length of unmapped spans
};
void GetLargeSpanStatsLocked(LargeSpanStats* result);
bool Check();
// Like Check() but does some more comprehensive checking.
bool CheckExpensive();
bool CheckList(Span* list, Length min_pages, Length max_pages,
int freelist); // ON_NORMAL_FREELIST or ON_RETURNED_FREELIST
bool CheckSet(SpanSet *s, Length min_pages, int freelist);
// Try to release at least num_pages for reuse by the OS. Returns
// the actual number of pages released, which may be less than
// num_pages if there weren't enough pages to release. The result
// may also be larger than num_pages since page_heap might decide to
// release one large range instead of fragmenting it into two
// smaller released and unreleased ranges.
Length ReleaseAtLeastNPages(Length num_pages);
// Reads and writes to pagemap_cache_ do not require locking.
bool TryGetSizeClass(PageID p, uint32* out) const {
return pagemap_cache_.TryGet(p, out);
}
void SetCachedSizeClass(PageID p, uint32 cl) {
ASSERT(cl != 0);
pagemap_cache_.Put(p, cl);
}
void InvalidateCachedSizeClass(PageID p) { pagemap_cache_.Invalidate(p); }
uint32 GetSizeClassOrZero(PageID p) const {
uint32 cached_value;
if (!TryGetSizeClass(p, &cached_value)) {
cached_value = 0;
}
return cached_value;
}
bool GetAggressiveDecommit(void) {return aggressive_decommit_;}
void SetAggressiveDecommit(bool aggressive_decommit) {
aggressive_decommit_ = aggressive_decommit;
}
private:
struct LockingContext;
void HandleUnlock(LockingContext* context) UNLOCK_FUNCTION(lock_) ;
// Allocates a big block of memory for the pagemap once we reach more than
// 128MB
static const size_t kPageMapBigAllocationThreshold = 128 << 20;
// Minimum number of pages to fetch from system at a time. Must be
// significantly bigger than kBlockSize to amortize system-call
// overhead, and also to reduce external fragementation. Also, we
// should keep this value big because various incarnations of Linux
// have small limits on the number of mmap() regions per
// address-space.
// REQUIRED: kMinSystemAlloc <= kMaxPages;
static const int kMinSystemAlloc = kMaxPages;
// Never delay scavenging for more than the following number of
// deallocated pages. With 4K pages, this comes to 4GB of
// deallocation.
static const int kMaxReleaseDelay = 1 << 20;
// If there is nothing to release, wait for so many pages before
// scavenging again. With 4K pages, this comes to 1GB of memory.
static const int kDefaultReleaseDelay = 1 << 18;
const Length smallest_span_size_;
SpinLock lock_;
// Pick the appropriate map and cache types based on pointer size
typedef MapSelector<kAddressBits>::Type PageMap;
typedef PackedCache<kAddressBits - kPageShift> PageMapCache;
mutable PageMapCache pagemap_cache_;
PageMap pagemap_;
// We segregate spans of a given size into two circular linked
// lists: one for normal spans, and one for spans whose memory
// has been returned to the system.
struct SpanList {
Span normal;
Span returned;
};
// Sets of spans with length > kMaxPages.
//
// Rather than using a linked list, we use sets here for efficient
// best-fit search.
SpanSet large_normal_;
SpanSet large_returned_;
// Array mapping from span length to a doubly linked list of free spans
//
// NOTE: index 'i' stores spans of length 'i + 1'.
SpanList free_[kMaxPages];
// Statistics on system, free, and unmapped bytes
Stats stats_;
Span* NewLocked(Length n, LockingContext* context) EXCLUSIVE_LOCKS_REQUIRED(lock_);
void DeleteLocked(Span* span) EXCLUSIVE_LOCKS_REQUIRED(lock_);
// Split an allocated span into two spans: one of length "n" pages
// followed by another span of length "span->length - n" pages.
// Modifies "*span" to point to the first span of length "n" pages.
// Returns a pointer to the second span.
//
// REQUIRES: "0 < n < span->length"
// REQUIRES: span->location == IN_USE
// REQUIRES: span->sizeclass == 0
Span* Split(Span* span, Length n);
Span* SearchFreeAndLargeLists(Length n);
bool GrowHeap(Length n, LockingContext* context) EXCLUSIVE_LOCKS_REQUIRED(lock_);
// REQUIRES: span->length >= n
// REQUIRES: span->location != IN_USE
// Remove span from its free list, and move any leftover part of
// span into appropriate free lists. Also update "span" to have
// length exactly "n" and mark it as non-free so it can be returned
// to the client. After all that, decrease free_pages_ by n and
// return span.
Span* Carve(Span* span, Length n);
void RecordSpan(Span* span) {
pagemap_.set(span->start, span);
if (span->length > 1) {
pagemap_.set(span->start + span->length - 1, span);
}
}
// Allocate a large span of length == n. If successful, returns a
// span of exactly the specified length. Else, returns NULL.
Span* AllocLarge(Length n);
// Coalesce span with neighboring spans if possible, prepend to
// appropriate free list, and adjust stats.
void MergeIntoFreeList(Span* span);
// Commit the span.
void CommitSpan(Span* span);
// Decommit the span.
bool DecommitSpan(Span* span);
// Prepends span to appropriate free list, and adjusts stats.
void PrependToFreeList(Span* span);
// Removes span from its free list, and adjust stats.
void RemoveFromFreeList(Span* span);
// Incrementally release some memory to the system.
// IncrementalScavenge(n) is called whenever n pages are freed.
void IncrementalScavenge(Length n);
// Attempts to decommit 's' and move it to the returned freelist.
//
// Returns the length of the Span or zero if release failed.
//
// REQUIRES: 's' must be on the NORMAL freelist.
Length ReleaseSpan(Span *s);
// Checks if we are allowed to take more memory from the system.
// If limit is reached and allowRelease is true, tries to release
// some unused spans.
bool EnsureLimit(Length n, bool allowRelease = true);
Span* CheckAndHandlePreMerge(Span *span, Span *other);
// Number of pages to deallocate before doing more scavenging
int64_t scavenge_counter_;
// Index of last free list where we released memory to the OS.
int release_index_;
bool aggressive_decommit_;
};
} // namespace tcmalloc
#ifdef _MSC_VER
#pragma warning(pop)
#endif
#endif // TCMALLOC_PAGE_HEAP_H_