blob: 49618d9d04b09b81c4481497511a2e2cd360b3e5 [file] [log] [blame]
/* **********************************************************
* Copyright (c) 2010-2023 Google, Inc. All rights reserved.
* Copyright (c) 2001-2010 VMware, Inc. All rights reserved.
* **********************************************************/
/*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* * Neither the name of VMware, Inc. nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
/* Copyright (c) 2003-2007 Determina Corp. */
/* Copyright (c) 2001-2003 Massachusetts Institute of Technology */
/* Copyright (c) 2001 Hewlett-Packard Company */
/*
* heap.c - heap manager
*/
#include "globals.h"
#include <limits.h>
#include "fragment.h" /* for struct sizes */
#include "link.h" /* for struct sizes */
#include "instr.h" /* for struct sizes */
#include "fcache.h" /* fcache_low_on_memory */
#ifdef UNIX
# include "memquery.h"
#endif
#ifdef DEBUG
# include "hotpatch.h" /* To handle leak for case 9593. */
#endif
#include "instrument.h"
#ifdef HEAP_ACCOUNTING
# ifndef DEBUG
# error HEAP_ACCOUNTING requires DEBUG
# endif
#endif
#ifdef DEBUG_MEMORY
/* on by default but higher than general asserts */
# define CHKLVL_MEMFILL CHKLVL_DEFAULT
#endif
extern bool vm_areas_exited;
/***************************************************************************
* we cannot use malloc in the middle of interpreting the client program
* because we could be in the middle of interpreting malloc, which is not
* always reentrant
*
* We have a virtual memory manager which makes sure memory is
* reserved within the application address space so that we don't have
* to fight with the application. We call os_heap_reserve to allocate
* virtual space in a single consecutive region. We later use
* os_heap_commit to get committed memory in large chunks and manage
* the chunks using a simple scheme of free lists of different sizes.
* The virtual memory manager has to store out of band information about
* used and free blocks, since of course there is no real memory to use.
* The chunks (heap units) store in band extra information both for
* used and free. However, in the allocated blocks within a unit we
* don't need to store any information since heap_free passes in the
* size; we store the next pointers for the free lists at the start of
* the free blocks themselves. We have one large reservation for most of
* our allocations, and yet another for allocations that we do not
* plan on ever freeing up on detach - the only unavoidable tombstones
* are those for thread private code system calls that may be stuck on
* callbacks. In case we run out of reserved memory we do fall back
* on requests from the OS, but any of these may fail if we are
* competing with the application.
*
* looking at dynamo behavior as of Jan 2001, most heap_alloc requests are
* for < 128 bytes, very few for larger, so we have a bunch of fixed-size
* blocks of small sizes
*
* the UINT_MAX size is a variable-length block, we keep one byte to store
* the size (again storing the next pointer when free at the start of
* what we pass to the user)
*/
static const uint BLOCK_SIZES[] = {
8, /* for instr bits */
#ifndef X64
/* for x64 future_fragment_t is 24 bytes (could be 20 if we could put flags last) */
sizeof(future_fragment_t), /* 12 (24 x64) */
#endif
/* we have a lot of size 16 requests for IR but they are transient */
24, /* fcache empties and vm_area_t are now 20, vm area extras still 24 */
/* 40 dbg / 36 rel: */
ALIGN_FORWARD(sizeof(fragment_t) + sizeof(indirect_linkstub_t), HEAP_ALIGNMENT),
#if defined(X64)
# ifdef DEBUG
sizeof(fragment_t) + sizeof(direct_linkstub_t) +
sizeof(cbr_fallthrough_linkstub_t), /* 112 dbg x64 / 104 rel x64 */
# else
sizeof(instr_t), /* 112 x64 */
# endif
#else
sizeof(fragment_t) + sizeof(direct_linkstub_t) +
sizeof(cbr_fallthrough_linkstub_t), /* 60 dbg / 56 rel */
# ifndef DEBUG
sizeof(instr_t), /* 72 */
# endif
#endif
/* we keep this bucket even though only 10% or so of normal bbs
* hit this.
*/
ALIGN_FORWARD(sizeof(fragment_t) + 2 * sizeof(direct_linkstub_t),
HEAP_ALIGNMENT), /* 68 dbg / 64 rel (128 x64) */
ALIGN_FORWARD(sizeof(trace_t) + 2 * sizeof(direct_linkstub_t) + sizeof(uint),
HEAP_ALIGNMENT), /* 80 dbg / 76 rel (148 x64 => 152) */
/* FIXME: measure whether should put in indirect mixes as well */
ALIGN_FORWARD(sizeof(trace_t) + 3 * sizeof(direct_linkstub_t) + sizeof(uint),
HEAP_ALIGNMENT), /* 96 dbg / 92 rel (180 x64 => 184) */
ALIGN_FORWARD(sizeof(trace_t) + 5 * sizeof(direct_linkstub_t) + sizeof(uint),
HEAP_ALIGNMENT), /* 128 dbg / 124 rel (244 x64 => 248) */
256, 512, UINT_MAX /* variable-length */
};
#define BLOCK_TYPES (sizeof(BLOCK_SIZES) / sizeof(uint))
#ifdef DEBUG
/* FIXME: would be nice to have these stats per HEAPACCT category */
/* These are ints only b/c we used to do non-atomic adds and wanted to
* gracefully handle underflow to negative values
*/
DECLARE_NEVERPROT_VAR(static int block_total_count[BLOCK_TYPES], { 0 });
DECLARE_NEVERPROT_VAR(static int block_count[BLOCK_TYPES], { 0 });
DECLARE_NEVERPROT_VAR(static int block_peak_count[BLOCK_TYPES], { 0 });
DECLARE_NEVERPROT_VAR(static int block_wasted[BLOCK_TYPES], { 0 });
DECLARE_NEVERPROT_VAR(static int block_peak_wasted[BLOCK_TYPES], { 0 });
DECLARE_NEVERPROT_VAR(static int block_align_pad[BLOCK_TYPES], { 0 });
DECLARE_NEVERPROT_VAR(static int block_peak_align_pad[BLOCK_TYPES], { 0 });
DECLARE_NEVERPROT_VAR(static bool out_of_vmheap_once, false);
#endif
/* variable-length: we steal one int for the size */
#define HEADER_SIZE (sizeof(size_t))
/* VARIABLE_SIZE is assignable */
#define VARIABLE_SIZE(p) (*(size_t *)((p)-HEADER_SIZE))
#define MEMSET_HEADER(p, value) VARIABLE_SIZE(p) = HEAP_TO_PTR_UINT(value)
#define GET_VARIABLE_ALLOCATION_SIZE(p) (VARIABLE_SIZE(p) + HEADER_SIZE)
/* The heap is allocated in units.
* We start out with a small unit. Then each additional unit we
* need doubles in size, up to a maximum.
* We keep the initial units small for thread-private heaps, since with
* thousands of threads the space can add up.
*/
#define HEAP_UNIT_MIN_SIZE DYNAMO_OPTION(initial_heap_unit_size)
#define HEAP_UNIT_MAX_SIZE INTERNAL_OPTION(max_heap_unit_size)
#define GLOBAL_UNIT_MIN_SIZE DYNAMO_OPTION(initial_global_heap_unit_size)
#define GUARD_PAGE_ADJUSTMENT (dynamo_options.guard_pages ? 2 * PAGE_SIZE : 0)
/* gets usable space in the unit */
#define UNITROOM(u) ((size_t)(u->end_pc - u->start_pc))
#define UNIT_RESERVED_ROOM(u) (u->reserved_end_pc - u->start_pc)
/* we keep the heap_unit_t header at top of the unit, this macro calculates
* the committed size of the unit by adding header size to available size
*/
#define UNIT_COMMIT_SIZE(u) (UNITROOM(u) + sizeof(heap_unit_t))
#define UNIT_RESERVED_SIZE(u) (UNIT_RESERVED_ROOM(u) + sizeof(heap_unit_t))
#define UNIT_ALLOC_START(u) (u->start_pc - sizeof(heap_unit_t))
#define UNIT_GET_START_PC(u) (byte *)(((ptr_uint_t)u) + sizeof(heap_unit_t))
#define UNIT_COMMIT_END(u) (u->end_pc)
#define UNIT_RESERVED_END(u) (u->reserved_end_pc)
/* Gets the allocated size of the unit (reserved size; doesn't include guard pages
* as those are not considered part of the usable space).
*/
#define UNITALLOC(u) (UNIT_RESERVED_SIZE(u))
/* Gets unit overhead: includes reserved and committed (sizeof(heap_unit_t)) portions. */
#define UNITOVERHEAD sizeof(heap_unit_t)
/* any alloc request larger than this needs a special unit */
#define MAXROOM (HEAP_UNIT_MAX_SIZE - UNITOVERHEAD)
/* maximum valid allocation (to guard against internal integer overflows) */
#define MAX_VALID_HEAP_ALLOCATION INT_MAX
/* thread-local heap structure
* this struct is kept at top of unit itself, not in separate allocation
*/
typedef struct _heap_unit_t {
heap_pc start_pc; /* start address of heap storage */
heap_pc end_pc; /* open-ended end address of heap storage */
heap_pc cur_pc; /* open-ended current end of allocated storage */
heap_pc reserved_end_pc; /* open-ended end of reserved (not nec committed) memory */
bool in_vmarea_list; /* perf opt for delayed batch vmarea updating */
which_vmm_t which;
#ifdef DEBUG
int id; /* # of this unit */
#endif
struct _heap_unit_t *next_local; /* used to link thread's units */
struct _heap_unit_t *next_global; /* used to link all units */
struct _heap_unit_t *prev_global; /* used to link all units */
} heap_unit_t;
#ifdef HEAP_ACCOUNTING
typedef struct _heap_acct_t {
size_t alloc_reuse[ACCT_LAST];
size_t alloc_new[ACCT_LAST];
size_t cur_usage[ACCT_LAST];
size_t max_usage[ACCT_LAST];
size_t max_single[ACCT_LAST];
uint num_alloc[ACCT_LAST];
} heap_acct_t;
#endif
/* FIXME (case 6336): rename to heap_t:
* a heap_t is a collection of units with the same properties
* to reflect that this is used for more than just thread-private memory.
* Also rename the "tu" vars to "h"
*/
typedef struct _thread_units_t {
heap_unit_t *top_unit; /* start of linked list of heap units */
heap_unit_t *cur_unit; /* current unit in heap list */
heap_pc free_list[BLOCK_TYPES];
#ifdef DEBUG
int num_units; /* total # of heap units */
#endif
dcontext_t *dcontext; /* back pointer to owner */
which_vmm_t which;
bool writable; /* remember state of heap protection */
#ifdef HEAP_ACCOUNTING
heap_acct_t acct;
#endif
} thread_units_t;
#define REACHABLE_HEAP() (IF_X64_ELSE(DYNAMO_OPTION(reachable_heap), true))
/* per-thread structure: */
typedef struct _thread_heap_t {
thread_units_t *local_heap;
/* We separate out heap memory used for fragments, linking, and vmarea multi-entries
* both to enable resetting memory and for safety for unlink flushing in the presence
* of clean calls out of the cache that might allocate IR memory (which does not
* use nonpersistent heap). Any client actions that involve fragments or linking
* should require couldbelinking status, which makes them safe wrt unlink flushing.
* Xref DrMi#1791.
*/
thread_units_t *nonpersistent_heap;
thread_units_t *reachable_heap; /* Only used if !REACHABLE_HEAP() */
#ifdef UNIX
/* Used for -satisfy_w_xor_x. */
heap_pc fork_copy_start;
size_t fork_copy_size;
vm_area_vector_t *fork_copy_areas;
#endif
} thread_heap_t;
/* global, unique thread-shared structure:
* FIXME: give this name to thread_units_t, and name this AllHeapUnits
*/
typedef struct _heap_t {
heap_unit_t *units; /* list of all allocated units */
heap_unit_t *dead; /* list of deleted units ready for re-allocation */
/* FIXME: num_dead duplicates d_r_stats->heap_num_free, but we want num_dead
* for release build too, so it's separate...can we do better?
*/
uint num_dead;
} heap_t;
/* no synch needed since only written once */
static bool heap_exiting = false;
#ifdef DEBUG
DECLARE_NEVERPROT_VAR(static bool ever_beyond_vmm, false);
#endif
/* Lock used only for managing heap units, not for normal thread-local alloc.
* Must be recursive due to circular dependencies between vmareas and global heap.
* Furthermore, always grab dynamo_vm_areas_lock() before grabbing this lock,
* to make DR areas update and heap alloc/free atomic!
*/
DECLARE_CXTSWPROT_VAR(static recursive_lock_t heap_unit_lock,
INIT_RECURSIVE_LOCK(heap_unit_lock));
/* N.B.: if these two locks are ever owned at the same time, the convention is
* that global_alloc_lock MUST be grabbed first, to avoid deadlocks
*/
/* separate lock for global heap access to avoid contention between local unit
* creation and global heap alloc
* must be recursive so that heap_vmareas_synch_units can hold it and heap_unit_lock
* up front to avoid deadlocks, and still allow vmareas to global_alloc --
* BUT we do NOT want global_heap_alloc() to be able to recurse!
* FIXME: either find a better solution to the heap_vmareas_synch_units deadlock
* that is as efficient, or find a way to assert that the only recursion is
* from heap_vmareas_synch_units to global_alloc
*/
DECLARE_CXTSWPROT_VAR(static recursive_lock_t global_alloc_lock,
INIT_RECURSIVE_LOCK(global_alloc_lock));
/* Used to sync low on memory event */
DECLARE_CXTSWPROT_VAR(static recursive_lock_t low_on_memory_pending_lock,
INIT_RECURSIVE_LOCK(low_on_memory_pending_lock));
/* Denotes whether or not low on memory event requires triggering. */
DECLARE_FREQPROT_VAR(bool low_on_memory_pending, false);
#if defined(DEBUG) && defined(HEAP_ACCOUNTING) && defined(HOT_PATCHING_INTERFACE)
static int
get_special_heap_header_size(void);
#endif
vm_area_vector_t *landing_pad_areas; /* PR 250294 */
#ifdef WINDOWS
/* i#939: we steal space from ntdll's +rx segment */
static app_pc lpad_temp_writable_start;
static size_t lpad_temp_writable_size;
static void
release_landing_pad_mem(void);
#endif
/* Indicates whether should back out of a global alloc/free and grab the
* DR areas lock first, to retry
*/
static bool
safe_to_allocate_or_free_heap_units()
{
return ((!self_owns_recursive_lock(&global_alloc_lock) &&
!self_owns_recursive_lock(&heap_unit_lock)) ||
self_owns_dynamo_vm_area_lock());
}
/* indicates a dynamo vm area remove was delayed
* protected by the heap_unit_lock
*/
DECLARE_FREQPROT_VAR(static bool dynamo_areas_pending_remove, false);
#ifdef HEAP_ACCOUNTING
const char *whichheap_name[] = {
/* max length for aligned output is length of "BB Fragments" */
"BB Fragments",
"Coarse Links",
"Future Frag",
"Frag Tables",
"IBL Tables",
"Traces",
"FC Empties",
"Vm Multis",
"IR",
"RCT Tables",
"VM Areas",
"Symbols",
# ifdef SIDELINE
"Sideline",
# endif
"TH Counter",
"Tombstone",
"Hot Patching",
"Thread Mgt",
"Memory Mgt",
"Stats",
"SpecialHeap",
"Client",
"Lib Dup",
"Clean Call",
/* NOTE: Add your heap name here */
"Other",
};
/* Since using a lock for these stats adds a lot of contention, we
* follow a two-pronged strategy:
* 1) For accurate stats we add a thread's final stats to the global only
* when it is cleaned up. But, this prevents global stats from being
* available in the middle of a run or if a run is not cleaned up nicely.
* 2) We have a set of heap_accounting stats for incremental global stats
* that are available at any time, yet racy and so may be off a little.
*/
/* all set to 0 is only initialization we need */
DECLARE_NEVERPROT_VAR(static thread_units_t global_racy_units, { 0 });
/* macro to get the type abstracted */
# define ACCOUNT_FOR_ALLOC_HELPER(type, tu, which, alloc_sz, ask_sz) \
do { \
(tu)->acct.type[which] += alloc_sz; \
(tu)->acct.num_alloc[which]++; \
(tu)->acct.cur_usage[which] += alloc_sz; \
if ((tu)->acct.cur_usage[which] > (tu)->acct.max_usage[which]) \
(tu)->acct.max_usage[which] = (tu)->acct.cur_usage[which]; \
if (ask_sz > (tu)->acct.max_single[which]) \
(tu)->acct.max_single[which] = ask_sz; \
} while (0)
# define ACCOUNT_FOR_ALLOC(type, tu, which, alloc_sz, ask_sz) \
do { \
STATS_ADD_PEAK(heap_claimed, alloc_sz); \
ACCOUNT_FOR_ALLOC_HELPER(type, tu, which, alloc_sz, ask_sz); \
ACCOUNT_FOR_ALLOC_HELPER(type, &global_racy_units, which, alloc_sz, ask_sz); \
} while (0)
# define ACCOUNT_FOR_FREE(tu, which, size) \
do { \
STATS_SUB(heap_claimed, (size)); \
(tu)->acct.cur_usage[which] -= size; \
global_racy_units.acct.cur_usage[which] -= size; \
} while (0)
#else
# define ACCOUNT_FOR_ALLOC(type, tu, which, alloc_sz, ask_sz)
# define ACCOUNT_FOR_FREE(tu, which, size)
#endif
typedef byte *vm_addr_t;
#ifdef X64
/* designates the closed interval within which we must allocate DR heap space */
static byte *heap_allowable_region_start = (byte *)PTR_UINT_0;
static byte *heap_allowable_region_end = (byte *)POINTER_MAX;
/* In standalone mode we do not guarantee 32-bit reachability for anything.
* This lets apps grow beyond 4G of heap.
*/
# define HEAP_REACHABILITY_ENABLED() (!standalone_library)
/* Used only to protect read/write access to the must_reach_* static variables
* used in request_region_be_heap_reachable().
*/
DECLARE_CXTSWPROT_VAR(static mutex_t request_region_be_heap_reachable_lock,
INIT_LOCK_FREE(request_region_be_heap_reachable_lock));
/* Initialize so will be overridden on first call; protected by the
* request_region_be_heap_reachable_lock.
*/
static byte *must_reach_region_start = (byte *)POINTER_MAX;
static byte *must_reach_region_end = (byte *)PTR_UINT_0; /* closed */
static void
reset_heap_reachable_bounds(void)
{
heap_allowable_region_start = (byte *)PTR_UINT_0;
heap_allowable_region_end = (byte *)POINTER_MAX;
must_reach_region_start = (byte *)POINTER_MAX;
must_reach_region_end = (byte *)PTR_UINT_0; /* closed */
}
/* Request that the supplied region be 32bit offset reachable from the DR heap. Should
* be called before vmm_heap_init() so we can place the DR heap to meet these constraints.
* Can also be called post vmm_heap_init() but at that point acts as an assert that the
* supplied region is reachable since the heap is already reserved.
*
* Must be called at least once up front, for the -heap_in_lower_4GB code here
* to kick in!
*/
void
request_region_be_heap_reachable(byte *start, size_t size)
{
if (!HEAP_REACHABILITY_ENABLED())
return;
LOG(GLOBAL, LOG_HEAP, 2,
"Adding must-be-reachable-from-heap region " PFX "-" PFX "\n"
"Existing must-be-reachable region " PFX "-" PFX "\n"
"Existing allowed range " PFX "-" PFX "\n",
start, start + size, must_reach_region_start, must_reach_region_end,
heap_allowable_region_start, heap_allowable_region_end);
ASSERT(!POINTER_OVERFLOW_ON_ADD(start, size));
ASSERT(size > 0);
d_r_mutex_lock(&request_region_be_heap_reachable_lock);
if (start < must_reach_region_start) {
byte *allowable_end_tmp;
SELF_UNPROTECT_DATASEC(DATASEC_RARELY_PROT);
must_reach_region_start = start;
allowable_end_tmp =
REACHABLE_32BIT_END(must_reach_region_start, must_reach_region_end);
/* PR 215395 - add in absolute address reachability */
if (DYNAMO_OPTION(heap_in_lower_4GB) &&
allowable_end_tmp > (byte *)POINTER_MAX_32BIT) {
allowable_end_tmp = (byte *)POINTER_MAX_32BIT;
}
/* Write assumed to be atomic so we don't have to hold a lock to use
* heap_allowable_region_end. */
heap_allowable_region_end = allowable_end_tmp;
SELF_PROTECT_DATASEC(DATASEC_RARELY_PROT);
}
if (start + size - 1 > must_reach_region_end) {
SELF_UNPROTECT_DATASEC(DATASEC_RARELY_PROT);
must_reach_region_end = start + size - 1; /* closed */
/* Write assumed to be atomic so we don't have to hold a lock to use
* heap_allowable_region_start. */
heap_allowable_region_start =
REACHABLE_32BIT_START(must_reach_region_start, must_reach_region_end);
SELF_PROTECT_DATASEC(DATASEC_RARELY_PROT);
}
ASSERT(must_reach_region_start <= must_reach_region_end); /* correctness check */
/* verify can be addressed absolutely (if required), correctness check */
ASSERT(!DYNAMO_OPTION(heap_in_lower_4GB) ||
heap_allowable_region_end <= (byte *)POINTER_MAX_32BIT);
d_r_mutex_unlock(&request_region_be_heap_reachable_lock);
LOG(GLOBAL, LOG_HEAP, 1,
"Added must-be-reachable-from-heap region " PFX "-" PFX "\n"
"New must-be-reachable region " PFX "-" PFX "\n"
"New allowed range " PFX "-" PFX "\n",
start, start + size, must_reach_region_start, must_reach_region_end,
heap_allowable_region_start, heap_allowable_region_end);
/* Reachability checks (xref PR 215395, note since we currently can't directly
* control where DR/client dlls are loaded these could fire if rebased). */
ASSERT(heap_allowable_region_start <= must_reach_region_start &&
"x64 reachability contraints not satisfiable");
ASSERT(must_reach_region_end <= heap_allowable_region_end &&
"x64 reachability contraints not satisfiable");
/* Handle release build failure. */
if (heap_allowable_region_start > must_reach_region_start ||
must_reach_region_end > heap_allowable_region_end) {
/* FIXME - in a released product we may want to detach or something else less
* drastic than triggering a FATAL_USAGE_ERROR. */
FATAL_USAGE_ERROR(HEAP_CONTRAINTS_UNSATISFIABLE, 2, get_application_name(),
get_application_pid());
}
}
void
vmcode_get_reachable_region(byte **region_start DR_PARAM_OUT,
byte **region_end DR_PARAM_OUT)
{
/* We track sub-page for more accuracy on additional constraints, and
* align when asked about it.
*/
if (region_start != NULL)
*region_start = (byte *)ALIGN_FORWARD(heap_allowable_region_start, PAGE_SIZE);
if (region_end != NULL)
*region_end = (byte *)ALIGN_BACKWARD(heap_allowable_region_end, PAGE_SIZE);
}
#endif
/* forward declarations of static functions */
static void
threadunits_init(dcontext_t *dcontext, thread_units_t *tu, size_t size, bool reachable);
/* dcontext only used for debugging */
static void
threadunits_exit(thread_units_t *tu, dcontext_t *dcontext);
static void *
common_heap_alloc(thread_units_t *tu, size_t size HEAPACCT(which_heap_t which));
static bool
common_heap_free(thread_units_t *tu, void *p, size_t size HEAPACCT(which_heap_t which));
static void
release_real_memory(void *p, size_t size, bool remove_vm, which_vmm_t which);
static void
release_guarded_real_memory(vm_addr_t p, size_t size, bool remove_vm, bool guarded,
which_vmm_t which);
typedef enum {
/* I - Init, Interop - first allocation failed
* check for incompatible kernel drivers
*/
OOM_INIT = 0x1,
/* R - Reserve - out of virtual reservation *
* increase -vm_size to reserve more memory
*/
OOM_RESERVE = 0x2,
/* C - Commit - systemwide page file limit, or current process job limit hit
* Increase pagefile size, check for memory leak in any application.
*
* FIXME: possible automatic actions
* if systemwide failure we may want to wait if transient
* FIXME: if in a job latter we want to detect and just die
* (though after freeing as much memory as we can)
*/
OOM_COMMIT = 0x4,
/* E - Extending Commit - same reasons as Commit
* as a possible workaround increasing -heap_commit_increment
* may make expose us to commit-ing less frequently,
* On the other hand committing smaller chunks has a higher
* chance of getting through when there is very little memory.
*
* FIXME: not much more informative than OOM_COMMIT
*/
OOM_EXTEND = 0x8,
} oom_source_t;
static void
report_low_on_memory(which_vmm_t which, oom_source_t source,
heap_error_code_t os_error_code);
#define MAX_VMCODE_SIZE (2ULL * 1024 * 1024 * 1024)
#define MAX_VMHEAP_SIZE (IF_X64_ELSE(128ULL, (4ULL - 1)) * 1024 * 1024 * 1024)
/* We should normally have only one large unit, so this is in fact
* the maximum we should count on in one process
*/
/* minimum will be used only if an invalid option is set */
#define MIN_VMM_HEAP_UNIT_SIZE DYNAMO_OPTION(vmm_block_size)
typedef struct {
vm_addr_t start_addr; /* base virtual address */
vm_addr_t end_addr; /* noninclusive virtual memory range [start,end) */
vm_addr_t alloc_start; /* base allocation virtual address */
size_t alloc_size; /* allocation size */
/* for 64-bit do we want to shift to size_t to allow a larger region?
* if so must update the bitmap_t routines
*/
uint num_blocks; /* total number of blocks in virtual allocation */
mutex_t lock; /* write access to the rest of the fields is protected */
/* We make an assumption about the bitmap_t implementation being
static therefore we don't grab locks on read accesses. Anyways,
currently the bitmap_t is used with no write intent only for ASSERTs. */
uint num_free_blocks; /* currently free blocks */
const char *name;
/* We dynamically allocate the bitmap to allow for different sizes for
* vmcode and vmheap and to allow for large vmheap sizes.
* We place it at start_addr, or the writable equivalent for vmcode.
*/
bitmap_element_t *blocks;
} vm_heap_t;
/* We keep our heap management structs on the heap for selfprot (case 8074).
* Note that we do have static structs for bootstrapping and we later move
* the data here.
*/
typedef struct _heap_management_t {
/* We split our 32-bit-displacement-reachable memory, which is mostly our
* code cache and thus is called "vmcode", from our heap which can go anywhere,
* "vmheap".
* For each, we reserve a single vm_heap_t for guaranteed allocation.
* We fall back to the OS when run out of reservation space.
* If REACHABLE_HEAP() we do not use vmheap and put everything in
* vmcode.
*/
vm_heap_t vmheap;
/* We only need a single 32-bit-displacement-reachable region since it cannot
* be larger than 2G anyway.
* XXX i#1132: for 64-bit, we make it 2G by default so we never have to
* fall back to the OS! We'll have to make room to load client libs inside
* the VMM-managed space is all.
* For 32-bit it will have to remain smaller and handle falling back to the OS.
*/
vm_heap_t vmcode;
/* A writable mirror of read-only vmcode for -satisfy_w_xor_x. */
file_t dual_map_file;
vm_addr_t vmcode_writable_base;
vm_addr_t vmcode_writable_alloc;
heap_t heap;
/* thread-shared heaps: */
thread_units_t global_units;
/* Separate non-persistent heap. See thread_heap_t.nonpersisent_heap comment. */
thread_units_t global_nonpersistent_units;
bool global_heap_writable;
thread_units_t global_unprotected_units;
thread_units_t global_reachable_units; /* Used if !REACHABLE_HEAP() */
} heap_management_t;
/* For bootstrapping until we can allocate our real heapmgt (case 8074).
* temp_heapmgt.lock is initialized in vmm_heap_unit_init().
*/
static heap_management_t temp_heapmgt;
static heap_management_t *heapmgt = &temp_heapmgt; /* initial value until alloced */
static bool vmm_heap_exited = false; /* FIXME: used only to thwart stack_free from trying,
should change the interface for the last stack
*/
#define MEMORY_FILE_NAME "dynamorio_dual_map"
static vm_addr_t
vmm_heap_reserve_blocks(vm_heap_t *vmh, size_t size_in, byte *base, which_vmm_t which);
static bool
vmm_heap_commit(vm_addr_t p, size_t size, uint prot, heap_error_code_t *error_code,
which_vmm_t which);
static inline uint
vmm_addr_to_block(vm_heap_t *vmh, vm_addr_t p)
{
ASSERT(
CHECK_TRUNCATE_TYPE_uint((p - vmh->start_addr) / DYNAMO_OPTION(vmm_block_size)));
return (uint)((p - vmh->start_addr) / DYNAMO_OPTION(vmm_block_size));
}
static inline vm_addr_t
vmm_block_to_addr(vm_heap_t *vmh, uint block)
{
ASSERT(block >= 0 && block < vmh->num_blocks);
return (vm_addr_t)(vmh->start_addr + block * DYNAMO_OPTION(vmm_block_size));
}
static bool
vmm_in_same_block(vm_heap_t *vmh, vm_addr_t p1, vm_addr_t p2)
{
return vmm_addr_to_block(vmh, p1) == vmm_addr_to_block(vmh, p2);
}
#if defined(DEBUG) && defined(INTERNAL)
static void
vmm_dump_map(vm_heap_t *vmh)
{
uint i;
bitmap_element_t *b = vmh->blocks;
uint bitmap_size = vmh->num_blocks;
uint last_i = 0;
bool is_used = bitmap_test(b, 0) == 0;
LOG(GLOBAL, LOG_HEAP, 3, "vmm_dump_map(" PFX ")\n", vmh);
/* We used to do raw dumps but with the shift to 4K blocks, this is just way
* too big. We disable but leave the capability to enable one-off use.
*/
DOLOG(20, LOG_HEAP, {
dump_buffer_as_bytes(GLOBAL, b,
BITMAP_INDEX(bitmap_size) * sizeof(bitmap_element_t),
DUMP_RAW | DUMP_ADDRESS);
});
LOG(GLOBAL, LOG_HEAP, 1, "\nvmm_dump_map(" PFX ") virtual regions\n", vmh);
# define VMM_DUMP_MAP_LOG(i, last_i) \
LOG(GLOBAL, LOG_HEAP, 1, PFX "-" PFX " size=%d %s\n", \
vmm_block_to_addr(vmh, last_i), \
vmm_block_to_addr(vmh, i - 1) + DYNAMO_OPTION(vmm_block_size) - 1, \
(i - last_i) * DYNAMO_OPTION(vmm_block_size), \
is_used ? "reserved" : "free");
for (i = 0; i < bitmap_size; i++) {
/* start counting at free/used boundaries */
if (is_used != (bitmap_test(b, i) == 0)) {
VMM_DUMP_MAP_LOG(i, last_i);
is_used = (bitmap_test(b, i) == 0);
last_i = i;
}
}
VMM_DUMP_MAP_LOG(bitmap_size, last_i);
}
#endif /* DEBUG */
static inline void
print_vmh_data(vm_heap_t *vmh, file_t outf)
{
d_r_mutex_lock(&vmh->lock);
print_file(outf, "VM heap: addr range " PFX "--" PFX ", # free blocks %d\n",
vmh->start_addr, vmh->end_addr, vmh->num_free_blocks);
d_r_mutex_unlock(&vmh->lock);
}
void
print_vmm_heap_data(file_t outf)
{
if (heapmgt->vmheap.start_addr != NULL)
print_vmh_data(&heapmgt->vmheap, outf);
if (heapmgt->vmcode.start_addr != NULL)
print_vmh_data(&heapmgt->vmcode, outf);
}
static inline void
vmm_heap_initialize_unusable(vm_heap_t *vmh)
{
vmh->start_addr = vmh->end_addr = NULL;
vmh->num_free_blocks = vmh->num_blocks = 0;
}
static void
report_w_xor_x_fatal_error_and_exit(void)
{
REPORT_FATAL_ERROR_AND_EXIT(FAILED_TO_SATISFY_W_XOR_X, 2, get_application_name(),
get_application_pid());
ASSERT_NOT_REACHED();
}
static void
vmm_place_vmcode(vm_heap_t *vmh, size_t size, heap_error_code_t *error_code)
{
ptr_uint_t preferred = 0;
#ifdef X64
/* -heap_in_lower_4GB takes top priority and has already set heap_allowable_region_*.
* Next comes -vm_base_near_app. It will fail for -vm_size=2G, which we document.
*/
if (DYNAMO_OPTION(vm_base_near_app)) {
/* Required for STATIC_LIBRARY: must be near app b/c clients are there.
* Non-static: still a good idea for fewer rip-rel manglings.
* Asking for app base means we'll prefer before the app, which
* has less of an impact on its heap.
*/
app_pc app_base = get_application_base();
app_pc app_end = get_application_end();
/* To avoid ignoring -vm_base and -vm_max_offset we fall through to that
* code if the app base is near -vm_base.
*/
if (!REL32_REACHABLE(app_base, (app_pc)DYNAMO_OPTION(vm_base)) ||
!REL32_REACHABLE(app_base,
(app_pc)DYNAMO_OPTION(vm_base) +
DYNAMO_OPTION(vm_max_offset)) ||
((app_pc)DYNAMO_OPTION(vm_base) < app_end &&
(app_pc)DYNAMO_OPTION(vm_base) + DYNAMO_OPTION(vm_max_offset) > app_base)) {
byte *reach_base = MAX(REACHABLE_32BIT_START(app_base, app_end),
heap_allowable_region_start);
byte *reach_end =
MIN(REACHABLE_32BIT_END(app_base, app_end), heap_allowable_region_end);
if (reach_base < reach_end) {
size_t add_for_align = DYNAMO_OPTION(vmm_block_size);
if (DYNAMO_OPTION(vmm_block_size) == PAGE_SIZE) {
/* No need for extra space for alignment. */
add_for_align = 0;
}
vmh->alloc_start = os_heap_reserve_in_region(
(void *)ALIGN_FORWARD(reach_base, PAGE_SIZE),
(void *)ALIGN_BACKWARD(reach_end, PAGE_SIZE), size + add_for_align,
error_code, true /*+x*/);
if (vmh->alloc_start != NULL) {
vmh->start_addr = (heap_pc)ALIGN_FORWARD(
vmh->alloc_start, DYNAMO_OPTION(vmm_block_size));
if (add_for_align == 0) {
ASSERT(ALIGNED(vmh->alloc_start, DYNAMO_OPTION(vmm_block_size)));
ASSERT(vmh->start_addr == vmh->alloc_start);
}
request_region_be_heap_reachable(app_base, app_end - app_base);
LOG(GLOBAL, LOG_HEAP, 1, "vmm_heap_unit_init: placed %s near app\n",
vmh->name);
}
}
}
}
#endif /* X64 */
/* Next we try the -vm_base value plus a random offset. */
if (vmh->start_addr == NULL) {
/* Out of 32 bits = 12 bits are page offset, windows wastes 4 more
* since its allocation base is 64KB, and if we want to stay
* safely in say 0x20000000-0x2fffffff we're left with only 12
* bits of randomness - which may be too little. On the other
* hand changing any of the lower 16 bits will make our bugs
* non-deterministic. */
/* Make sure we don't waste the lower bits from our random number */
preferred = (DYNAMO_OPTION(vm_base) +
get_random_offset(DYNAMO_OPTION(vm_max_offset) /
DYNAMO_OPTION(vmm_block_size)) *
DYNAMO_OPTION(vmm_block_size));
preferred = ALIGN_FORWARD(preferred, OS_ALLOC_GRANULARITY);
/* overflow check: w/ vm_base shouldn't happen so debug-only check */
ASSERT(!POINTER_OVERFLOW_ON_ADD(preferred, size));
/* let's assume a single chunk is sufficient to reserve */
#ifdef X64
if ((byte *)preferred < heap_allowable_region_start ||
(byte *)preferred + size > heap_allowable_region_end) {
*error_code = HEAP_ERROR_NOT_AT_PREFERRED;
LOG(GLOBAL, LOG_HEAP, 1,
"vmm_heap_unit_init preferred=" PFX " too far from " PFX "-" PFX "\n",
preferred, heap_allowable_region_start, heap_allowable_region_end);
} else {
#endif
vmh->alloc_start =
os_heap_reserve((void *)preferred, size, error_code, true /*+x*/);
vmh->start_addr = vmh->alloc_start;
LOG(GLOBAL, LOG_HEAP, 1,
"vmm_heap_unit_init preferred=" PFX " got start_addr=" PFX "\n",
preferred, vmh->start_addr);
#ifdef X64
}
#endif
}
while (vmh->start_addr == NULL && DYNAMO_OPTION(vm_allow_not_at_base)) {
/* Since we prioritize low-4GB or near-app over -vm_base, we do not
* syslog or assert here
*/
/* need extra size to ensure alignment */
vmh->alloc_size = size + DYNAMO_OPTION(vmm_block_size);
#ifdef X64
/* PR 215395, make sure allocation satisfies heap reachability contraints */
vmh->alloc_start = os_heap_reserve_in_region(
(void *)ALIGN_FORWARD(heap_allowable_region_start, PAGE_SIZE),
(void *)ALIGN_BACKWARD(heap_allowable_region_end, PAGE_SIZE),
size + DYNAMO_OPTION(vmm_block_size), error_code, true /*+x*/);
#else
vmh->alloc_start = (heap_pc)os_heap_reserve(
NULL, size + DYNAMO_OPTION(vmm_block_size), error_code, true /*+x*/);
#endif
vmh->start_addr =
(heap_pc)ALIGN_FORWARD(vmh->alloc_start, DYNAMO_OPTION(vmm_block_size));
LOG(GLOBAL, LOG_HEAP, 1,
"vmm_heap_unit_init unable to allocate at preferred=" PFX
" letting OS place sz=%dM addr=" PFX "\n",
preferred, size / (1024 * 1024), vmh->start_addr);
if (vmh->alloc_start == NULL && DYNAMO_OPTION(vm_allow_smaller)) {
/* Just a little smaller might fit */
size_t sub = (size_t)ALIGN_FORWARD(size / 16, 1024 * 1024);
SYSLOG_INTERNAL_WARNING_ONCE("Full size vmm heap allocation failed");
if (size > sub)
size -= sub;
else
break;
} else
break;
}
#ifdef X64
if (DYNAMO_OPTION(satisfy_w_xor_x)) {
/* Rather than replacing the 3 os_heap_reserve* calls above with os_map_file
* whose MAP_FILE_REACHABLE relies on VMM (us!) being initialized, which is
* tricky, we simply do the standard reserve above and then map our file
* on top. TODO i#3566: We need a different strategy on Windows.
*/
/* Ensure os_map_file ignores vmcode: */
ASSERT(!is_vmm_reserved_address(vmh->start_addr, size, NULL, NULL));
size_t map_size = vmh->alloc_size;
byte *map_base =
os_map_file(heapmgt->dual_map_file, &map_size, 0, vmh->alloc_start,
MEMPROT_NONE, MAP_FILE_VMM_COMMIT | MAP_FILE_FIXED);
if (map_base != vmh->alloc_start || map_size != vmh->alloc_size) {
report_w_xor_x_fatal_error_and_exit();
ASSERT_NOT_REACHED();
}
}
/* ensure future out-of-block heap allocations are reachable from this allocation */
if (vmh->start_addr != NULL) {
ASSERT(vmh->start_addr >= heap_allowable_region_start &&
!POINTER_OVERFLOW_ON_ADD(vmh->start_addr, size) &&
vmh->start_addr + size <= heap_allowable_region_end);
request_region_be_heap_reachable(vmh->start_addr, size);
}
#endif
ASSERT(ALIGNED(vmh->start_addr, DYNAMO_OPTION(vmm_block_size)));
}
/* Does not return. */
static void
vmm_heap_unit_init_failed(vm_heap_t *vmh, heap_error_code_t error_code, const char *name)
{
LOG(GLOBAL, LOG_HEAP, 1, "vmm_heap_unit_init %s: failed to allocate memory!\n", name);
vmm_heap_initialize_unusable(vmh);
/* We couldn't even reserve initial virtual memory - we're out of luck. */
report_low_on_memory(VMM_HEAP, OOM_INIT, error_code);
ASSERT_NOT_REACHED();
}
static void
vmm_heap_unit_init(vm_heap_t *vmh, size_t size, bool is_vmcode, const char *name)
{
heap_error_code_t error_code = 0;
ASSIGN_INIT_LOCK_FREE(vmh->lock, vmh_lock);
/* We need to get the lock into the process list before we copy out of
* temp_heapmgt, else it will point to freed memory when we go back to temp_heapmgt
* for lock cleanup code.
*/
d_r_mutex_lock(&vmh->lock);
d_r_mutex_unlock(&vmh->lock);
size = ALIGN_FORWARD(size, DYNAMO_OPTION(vmm_block_size));
vmh->alloc_size = size;
vmh->start_addr = NULL;
vmh->name = name;
if (size == 0) {
vmm_heap_initialize_unusable(vmh);
return;
}
if (is_vmcode) {
/* This is our must-be-reachable alloc whose placement matters and is
* controlled by runtime options.
*/
if (DYNAMO_OPTION(satisfy_w_xor_x)) {
heapmgt->dual_map_file = os_create_memory_file(MEMORY_FILE_NAME, size);
if (heapmgt->dual_map_file == INVALID_FILE) {
report_w_xor_x_fatal_error_and_exit();
ASSERT_NOT_REACHED();
}
}
vmm_place_vmcode(vmh, size, &error_code);
if (DYNAMO_OPTION(satisfy_w_xor_x)) {
size_t map_size = vmh->alloc_size;
heapmgt->vmcode_writable_alloc =
os_map_file(heapmgt->dual_map_file, &map_size, 0, NULL, MEMPROT_NONE, 0);
ASSERT(map_size == vmh->alloc_size);
if (heapmgt->vmcode_writable_alloc == 0) {
LOG(GLOBAL, LOG_HEAP, 1,
"vmm_heap_unit_init %s: failed to allocate writable vmcode!\n");
vmm_heap_initialize_unusable(vmh);
report_low_on_memory(VMM_CACHE | VMM_REACHABLE, OOM_INIT, error_code);
ASSERT_NOT_REACHED();
}
heapmgt->vmcode_writable_base = (heap_pc)ALIGN_FORWARD(
heapmgt->vmcode_writable_alloc, DYNAMO_OPTION(vmm_block_size));
LOG(GLOBAL, LOG_HEAP, 1,
"vmm_heap_unit_init vmcode+w reservation: [" PFX "," PFX ")\n",
heapmgt->vmcode_writable_base, heapmgt->vmcode_writable_base + size);
}
} else {
/* These days every OS provides ASLR, so we do not bother to do our own
* for this second reservation and rely on the OS.
*/
vmh->alloc_size = size + DYNAMO_OPTION(vmm_block_size);
vmh->alloc_start = (heap_pc)os_heap_reserve(
NULL, size + DYNAMO_OPTION(vmm_block_size), &error_code, false /*-x*/);
vmh->start_addr =
(heap_pc)ALIGN_FORWARD(vmh->alloc_start, DYNAMO_OPTION(vmm_block_size));
}
if (vmh->start_addr == 0) {
vmm_heap_unit_init_failed(vmh, error_code, name);
ASSERT_NOT_REACHED();
}
vmh->end_addr = vmh->start_addr + size;
ASSERT_TRUNCATE(vmh->num_blocks, uint, size / DYNAMO_OPTION(vmm_block_size));
vmh->num_blocks = (uint)(size / DYNAMO_OPTION(vmm_block_size));
size_t blocks_sz_bytes = BITMAP_INDEX(vmh->num_blocks) * sizeof(bitmap_element_t);
blocks_sz_bytes = ALIGN_FORWARD(blocks_sz_bytes, DYNAMO_OPTION(vmm_block_size));
/* We place the bitmap at the start of the (writable) vmm region. */
vmh->blocks = (bitmap_element_t *)vmh->start_addr;
if (is_vmcode)
vmh->blocks = (bitmap_element_t *)vmcode_get_writable_addr((byte *)vmh->blocks);
vmh->num_free_blocks = vmh->num_blocks;
LOG(GLOBAL, LOG_HEAP, 1,
"vmm_heap_unit_init %s reservation: [" PFX "," PFX ") total=%d free=%d\n", name,
vmh->start_addr, vmh->end_addr, vmh->num_blocks, vmh->num_free_blocks);
/* Make sure the vmm area is properly aligned on block boundaries.
* The size was aligned above.
*/
ASSERT(ALIGNED(vmh->blocks, DYNAMO_OPTION(vmm_block_size)));
which_vmm_t which = VMM_HEAP | (is_vmcode ? VMM_REACHABLE : 0);
/* We have to commit first which our code does support. */
vmm_heap_commit((vm_addr_t)vmh->blocks, blocks_sz_bytes, MEMPROT_READ | MEMPROT_WRITE,
&error_code, which);
if (error_code != 0) {
vmm_heap_unit_init_failed(vmh, error_code, name);
ASSERT_NOT_REACHED();
}
bitmap_initialize_free(vmh->blocks, vmh->num_blocks);
vmm_heap_reserve_blocks(vmh, blocks_sz_bytes, vmh->start_addr, which);
DOLOG(1, LOG_HEAP, { vmm_dump_map(vmh); });
ASSERT(bitmap_check_consistency(vmh->blocks, vmh->num_blocks, vmh->num_free_blocks));
}
static void
vmm_heap_unit_exit(vm_heap_t *vmh)
{
LOG(GLOBAL, LOG_HEAP, 1, "vmm_heap_unit_exit %s [" PFX "," PFX ") total=%d free=%d\n",
vmh->name, vmh->start_addr, vmh->end_addr, vmh->num_blocks, vmh->num_free_blocks);
/* we assume single thread in DR at this point */
DELETE_LOCK(vmh->lock);
if (vmh->start_addr == NULL)
return;
DOLOG(1, LOG_HEAP, { vmm_dump_map(vmh); });
ASSERT(bitmap_check_consistency(vmh->blocks, vmh->num_blocks, vmh->num_free_blocks));
ASSERT(vmh->num_blocks * DYNAMO_OPTION(vmm_block_size) ==
(ptr_uint_t)(vmh->end_addr - vmh->start_addr));
/* In case there are no tombstones we can just free the unit and
* that is what we'll do, otherwise it will stay up forever.
*/
bool free_heap = vmh->num_free_blocks == vmh->num_blocks;
#ifdef UNIX
/* On unix there's no fear of leftover tombstones, and as long as we're
* doing a detach we can be sure our stack is not actually in the heap.
*/
if (doing_detach) {
DODEBUG({
byte *sp;
GET_STACK_PTR(sp);
ASSERT(!(sp >= vmh->start_addr && sp < vmh->end_addr));
});
free_heap = true;
}
#endif
if (free_heap) {
heap_error_code_t error_code;
os_heap_free(vmh->alloc_start, vmh->alloc_size, &error_code);
ASSERT(error_code == HEAP_ERROR_SUCCESS);
if (DYNAMO_OPTION(satisfy_w_xor_x) && vmh == &heapmgt->vmcode) {
os_heap_free(heapmgt->vmcode_writable_alloc, vmh->alloc_size, &error_code);
ASSERT(error_code == HEAP_ERROR_SUCCESS);
os_delete_memory_file(MEMORY_FILE_NAME, heapmgt->dual_map_file);
heapmgt->dual_map_file = INVALID_FILE;
}
} else {
/* FIXME: doing nothing for now - we only care about this in
* detach scenarios where we should try to clean up from the
* virtual address space
*/
}
vmm_heap_initialize_unusable(vmh);
}
/* Returns whether within the region we reserved from the OS for doling
* out internally via our vm_heap_t; asserts that the address was also
* logically reserved within the vm_heap_t.
*/
static bool
vmm_is_reserved_unit(vm_heap_t *vmh, vm_addr_t p, size_t size)
{
size = ALIGN_FORWARD(size, DYNAMO_OPTION(vmm_block_size));
if (p < vmh->start_addr || vmh->end_addr < p /*overflow*/ ||
vmh->end_addr < (p + size))
return false;
ASSERT(CHECK_TRUNCATE_TYPE_uint(size / DYNAMO_OPTION(vmm_block_size)));
ASSERT(bitmap_are_reserved_blocks(vmh->blocks, vmh->num_blocks,
vmm_addr_to_block(vmh, p),
(uint)(size / DYNAMO_OPTION(vmm_block_size))));
return true;
}
static inline bool
is_vmh_reserved_address(vm_heap_t *vmh, byte *pc, size_t size,
DR_PARAM_OUT byte **region_start, DR_PARAM_OUT byte **region_end)
{
/* Case 10293: we don't call vmm_is_reserved_unit to avoid its
* assert, which we want to maintain for callers only dealing with
* DR-allocated addresses, while this routine is called w/ random
* addresses
*/
if (pc >= vmh->start_addr && !POINTER_OVERFLOW_ON_ADD(pc, size) &&
(pc + size) <= vmh->end_addr) {
if (region_start != NULL)
*region_start = vmh->start_addr;
if (region_end != NULL)
*region_end = vmh->end_addr;
return true;
}
return false;
}
/* Returns whether entirely within a region we reserve from the OS for doling
* out internally via our vm_heap_t. Optionally returns the bounds of the region.
* Does not consider memory we allocate once we run out of our original reservations.
*/
bool
is_vmm_reserved_address(byte *pc, size_t size, DR_PARAM_OUT byte **region_start,
DR_PARAM_OUT byte **region_end)
{
ASSERT(heapmgt != NULL);
if (heapmgt->vmheap.start_addr != NULL &&
is_vmh_reserved_address(&heapmgt->vmheap, pc, size, region_start, region_end))
return true;
if (heapmgt->vmcode.start_addr != NULL &&
is_vmh_reserved_address(&heapmgt->vmcode, pc, size, region_start, region_end))
return true;
if (heapmgt->vmcode_writable_base != NULL &&
is_vmh_reserved_address(&heapmgt->vmcode, vmcode_get_executable_addr(pc), size,
region_start, region_end)) {
if (region_start != NULL)
*region_start = vmcode_get_writable_addr(*region_start);
if (region_end != NULL)
*region_end = vmcode_get_writable_addr(*region_end);
return true;
}
return false;
}
byte *
vmcode_get_start(void)
{
if (heapmgt->vmcode.start_addr != NULL)
return heapmgt->vmcode.start_addr;
if (heapmgt->vmheap.start_addr != NULL)
return heapmgt->vmheap.start_addr;
return NULL;
}
byte *
vmcode_get_end(void)
{
if (heapmgt->vmcode.start_addr != NULL)
return heapmgt->vmcode.end_addr;
if (heapmgt->vmheap.start_addr != NULL)
return heapmgt->vmheap.end_addr;
return NULL;
}
static vm_heap_t *
vmheap_for_which(which_vmm_t which)
{
if (TEST(VMM_REACHABLE, which) || REACHABLE_HEAP())
return &heapmgt->vmcode;
else
return &heapmgt->vmheap;
}
byte *
vmcode_get_writable_addr(byte *exec_addr)
{
/* XXX i#5383: Audit these calls and ensure they cover all scenarios, are placed
* at the most efficient level, and are always properly paired.
*/
PTHREAD_JIT_WRITE();
if (!DYNAMO_OPTION(satisfy_w_xor_x))
return exec_addr;
/* If we want this to be an assert instead to catch superfluous calls, we'll need
* to change things like set_selfmod_sandbox_offsets()'s call to
* encode_with_patch_list() into a stack buffer.
*/
if (exec_addr < heapmgt->vmcode.start_addr || exec_addr >= heapmgt->vmcode.end_addr)
return exec_addr;
return (exec_addr - heapmgt->vmcode.start_addr) + heapmgt->vmcode_writable_base;
}
byte *
vmcode_get_executable_addr(byte *write_addr)
{
if (!DYNAMO_OPTION(satisfy_w_xor_x))
return write_addr;
if (write_addr < heapmgt->vmcode_writable_base ||
write_addr >= heapmgt->vmcode_writable_base +
(heapmgt->vmcode.end_addr - heapmgt->vmcode.start_addr))
return write_addr;
return (write_addr - heapmgt->vmcode_writable_base) + heapmgt->vmcode.start_addr;
}
#ifdef DEBUG_MEMORY
static inline byte *
vmm_get_writable_addr(byte *exec_addr, which_vmm_t which)
{
vm_heap_t *vmh = vmheap_for_which(which);
if (vmh == &heapmgt->vmcode)
return vmcode_get_writable_addr(exec_addr);
return exec_addr;
}
#endif
/* The caller must first ensure this is a vmcode address. Returns p_writable. */
static inline vm_addr_t
vmm_normalize_addr(vm_heap_t *vmh, DR_PARAM_INOUT vm_addr_t *p_exec)
{
vm_addr_t p = *p_exec;
if (p < vmh->start_addr || p >= vmh->end_addr) {
/* This is a writable addr. */
p = (p - heapmgt->vmcode_writable_base) + vmh->start_addr;
*p_exec = p;
}
return (p - vmh->start_addr) + heapmgt->vmcode_writable_base;
}
#ifdef WINDOWS
static byte *
vmheap_get_start(void)
{
if (heapmgt->vmheap.start_addr != NULL)
return heapmgt->vmheap.start_addr;
if (heapmgt->vmcode.start_addr != NULL)
return heapmgt->vmcode.start_addr;
return NULL;
}
#endif
static inline bool
has_guard_pages(which_vmm_t which)
{
if (!DYNAMO_OPTION(guard_pages))
return false;
if (TEST(VMM_PER_THREAD, which) && !DYNAMO_OPTION(per_thread_guard_pages))
return false;
return true;
}
void
iterate_vmm_regions(void (*cb)(byte *region_start, byte *region_end, void *user_data),
void *user_data)
{
if (heapmgt->vmcode.start_addr != NULL)
(*cb)(heapmgt->vmcode.start_addr, heapmgt->vmcode.end_addr, user_data);
if (heapmgt->vmheap.start_addr != NULL)
(*cb)(heapmgt->vmheap.start_addr, heapmgt->vmheap.end_addr, user_data);
if (heapmgt->vmcode_writable_base != NULL) {
(*cb)(heapmgt->vmcode_writable_base,
heapmgt->vmcode_writable_base +
(heapmgt->vmcode.end_addr - heapmgt->vmcode.start_addr),
user_data);
}
}
byte *
vmcode_unreachable_pc(void)
{
#ifdef X86_64
/* This is used to indicate something that is unreachable from *everything*
* for DR_CLEANCALL_INDIRECT, so ideally we want to not just provide an
* address that vmcode can't reach.
* We use a non-canonical address for x86_64.
*/
return (byte *)0x8000000100000000ULL;
#else
/* This is not really used for aarch* so we just go with vmcode reachability. */
ptr_uint_t start = (ptr_uint_t)vmcode_get_start();
ptr_uint_t end = (ptr_uint_t)vmcode_get_end();
if (start > INT_MAX)
return NULL;
else {
/* We do not use -1 to avoid wraparound from thinking it's reachable. */
return (byte *)end + INT_MAX + PAGE_SIZE;
}
#endif
}
bool
rel32_reachable_from_vmcode(byte *tgt)
{
#ifdef X64
/* To handle beyond-vmm-reservation allocs, we must compare to the allowable
* heap range and not just the vmcode range (i#1479).
*/
ptr_int_t new_offs = (tgt > heap_allowable_region_start)
? (tgt - heap_allowable_region_start)
: (heap_allowable_region_end - tgt);
ASSERT(vmcode_get_start() >= heap_allowable_region_start ||
!DYNAMO_OPTION(vm_reserve));
ASSERT(vmcode_get_end() <= heap_allowable_region_end + 1 /*closed*/ ||
!DYNAMO_OPTION(vm_reserve));
return REL32_REACHABLE_OFFS(new_offs);
#else
return true;
#endif
}
bool
rel32_reachable_from_current_vmcode(byte *tgt)
{
#ifdef X64
ptr_int_t new_offs = (tgt > must_reach_region_start) ? (tgt - must_reach_region_start)
: (must_reach_region_end - tgt);
return REL32_REACHABLE_OFFS(new_offs);
#else
return true;
#endif
}
static inline void
vmm_update_block_stats(which_vmm_t which, uint num_blocks, bool add)
{
/* We do not split the stats for cache (always reachable) nor stack (never reachable).
* We confirm our assumptions here.
*/
ASSERT(!TESTALL(VMM_REACHABLE | VMM_STACK, which) &&
(TEST(VMM_REACHABLE, which) || !TEST(VMM_CACHE, which)));
/* XXX: find some way to make a stats array */
if (add) {
if (TEST(VMM_HEAP, which)) {
if (TEST(VMM_REACHABLE, which))
RSTATS_ADD_PEAK(vmm_blocks_reach_heap, num_blocks);
else
RSTATS_ADD_PEAK(vmm_blocks_unreach_heap, num_blocks);
} else if (TEST(VMM_CACHE, which))
RSTATS_ADD_PEAK(vmm_blocks_reach_cache, num_blocks);
else if (TEST(VMM_STACK, which))
RSTATS_ADD_PEAK(vmm_blocks_unreach_stack, num_blocks);
else if (TEST(VMM_SPECIAL_HEAP, which)) {
if (TEST(VMM_REACHABLE, which))
RSTATS_ADD_PEAK(vmm_blocks_reach_special_heap, num_blocks);
else
RSTATS_ADD_PEAK(vmm_blocks_unreach_special_heap, num_blocks);
} else if (TEST(VMM_SPECIAL_MMAP, which)) {
if (TEST(VMM_REACHABLE, which))
RSTATS_ADD_PEAK(vmm_blocks_reach_special_mmap, num_blocks);
else
RSTATS_ADD_PEAK(vmm_blocks_unreach_special_mmap, num_blocks);
}
} else {
if (TEST(VMM_HEAP, which)) {
if (TEST(VMM_REACHABLE, which))
RSTATS_SUB(vmm_blocks_reach_heap, num_blocks);
else
RSTATS_SUB(vmm_blocks_unreach_heap, num_blocks);
} else if (TEST(VMM_CACHE, which))
RSTATS_SUB(vmm_blocks_reach_cache, num_blocks);
else if (TEST(VMM_STACK, which))
RSTATS_SUB(vmm_blocks_unreach_stack, num_blocks);
else if (TEST(VMM_SPECIAL_HEAP, which)) {
if (TEST(VMM_REACHABLE, which))
RSTATS_SUB(vmm_blocks_reach_special_heap, num_blocks);
else
RSTATS_SUB(vmm_blocks_unreach_special_heap, num_blocks);
} else if (TEST(VMM_SPECIAL_MMAP, which)) {
if (TEST(VMM_REACHABLE, which))
RSTATS_SUB(vmm_blocks_reach_special_mmap, num_blocks);
else
RSTATS_SUB(vmm_blocks_unreach_special_mmap, num_blocks);
}
}
}
/* Reservations here are done with DYNAMO_OPTION(vmm_block_size) alignment
* (e.g. 64KB) but the caller is not forced to request at that
* alignment. We explicitly synchronize reservations and decommits
* within the vm_heap_t.
* Returns NULL if the VMMHeap is full or too fragmented to satisfy
* the request.
*/
static vm_addr_t
vmm_heap_reserve_blocks(vm_heap_t *vmh, size_t size_in, byte *base, which_vmm_t which)
{
vm_addr_t p;
uint request;
uint first_block;
size_t size;
uint must_start;
size = ALIGN_FORWARD(size_in, DYNAMO_OPTION(vmm_block_size));
ASSERT_TRUNCATE(request, uint, size / DYNAMO_OPTION(vmm_block_size));
request = (uint)(size / DYNAMO_OPTION(vmm_block_size));
if (base != NULL)
must_start = vmm_addr_to_block(vmh, base);
else
must_start = UINT_MAX;
LOG(GLOBAL, LOG_HEAP, 2,
"vmm_heap_reserve_blocks %s: size=%d => %d in blocks=%d free_blocks=%d\n",
vmh->name, size_in, size, request, vmh->num_free_blocks);
d_r_mutex_lock(&vmh->lock);
if (vmh->num_free_blocks < request) {
d_r_mutex_unlock(&vmh->lock);
return NULL;
}
first_block =
bitmap_allocate_blocks(vmh->blocks, vmh->num_blocks, request, must_start);
if (first_block != BITMAP_NOT_FOUND) {
vmh->num_free_blocks -= request;
}
d_r_mutex_unlock(&vmh->lock);
if (first_block != BITMAP_NOT_FOUND) {
p = vmm_block_to_addr(vmh, first_block);
RSTATS_ADD_PEAK(vmm_vsize_used, size);
STATS_ADD_PEAK(vmm_vsize_blocks_used, request);
STATS_ADD_PEAK(vmm_vsize_wasted, size - size_in);
vmm_update_block_stats(which, request, true /*add*/);
DOSTATS({
if (request > 1) {
STATS_INC(vmm_multi_block_allocs);
STATS_ADD(vmm_multi_blocks, request);
}
});
} else {
p = NULL;
}
LOG(GLOBAL, LOG_HEAP, 2,
"vmm_heap_reserve_blocks %s: size=%d blocks=%d p=" PFX " index=%u\n", vmh->name,
size, request, p, first_block);
DOLOG(5, LOG_HEAP, { vmm_dump_map(vmh); });
return p;
}
/* We explicitly synchronize reservations and decommits within the vm_heap_t.
* Update bookkeeping information about the freed region.
*/
static void
vmm_heap_free_blocks(vm_heap_t *vmh, vm_addr_t p, size_t size_in, which_vmm_t which)
{
uint first_block = vmm_addr_to_block(vmh, p);
uint request;
size_t size;
size = ALIGN_FORWARD(size_in, DYNAMO_OPTION(vmm_block_size));
ASSERT_TRUNCATE(request, uint, size / DYNAMO_OPTION(vmm_block_size));
request = (uint)(size / DYNAMO_OPTION(vmm_block_size));
LOG(GLOBAL, LOG_HEAP, 2, "vmm_heap_free_blocks %s: size=%d blocks=%d p=" PFX "\n",
vmh->name, size, request, p);
d_r_mutex_lock(&vmh->lock);
bitmap_free_blocks(vmh->blocks, vmh->num_blocks, first_block, request);
vmh->num_free_blocks += request;
d_r_mutex_unlock(&vmh->lock);
ASSERT(vmh->num_free_blocks <= vmh->num_blocks);
RSTATS_SUB(vmm_vsize_used, size);
STATS_SUB(vmm_vsize_blocks_used, request);
vmm_update_block_stats(which, request, false /*sub*/);
STATS_SUB(vmm_vsize_wasted, size - size_in);
}
/* This is the proper interface for the rest of heap.c to the os_heap_* functions */
/* place all the local-scope static vars (from DO_THRESHOLD) into .fspdata to avoid
* protection changes */
START_DATA_SECTION(FREQ_PROTECTED_SECTION, "w");
static bool
at_reset_at_vmm_limit(vm_heap_t *vmh)
{
return (DYNAMO_OPTION(reset_at_vmm_percent_free_limit) != 0 &&
100 * vmh->num_free_blocks <
DYNAMO_OPTION(reset_at_vmm_percent_free_limit) * vmh->num_blocks) ||
(DYNAMO_OPTION(reset_at_vmm_free_limit) != 0 &&
vmh->num_free_blocks * DYNAMO_OPTION(vmm_block_size) <
DYNAMO_OPTION(reset_at_vmm_free_limit));
}
static void
reached_beyond_vmm(which_vmm_t which)
{
DODEBUG(ever_beyond_vmm = true;);
/* Stats can be very useful to diagnose why we hit OOM. */
if (INTERNAL_OPTION(rstats_to_stderr))
dump_global_rstats_to_stderr();
char message[256];
if (DYNAMO_OPTION(satisfy_w_xor_x) &&
(TEST(VMM_REACHABLE, which) || REACHABLE_HEAP())) {
/* We do not bother to try to mirror separate from-OS allocs: the user
* should set -vm_size 2G instead and take the rip-rel mangling hit
* (see i#3570).
*/
snprintf(
message, BUFFER_SIZE_ELEMENTS(message),
"Alloc type: 0x%x. -satisfy_w_xor_x requires VMM memory: try '-vm_size 2G'",
which);
NULL_TERMINATE_BUFFER(message);
REPORT_FATAL_ERROR_AND_EXIT(OUT_OF_VMM_CANNOT_USE_OS, 3, get_application_name(),
get_application_pid(), message);
ASSERT_NOT_REACHED();
} else {
snprintf(message, BUFFER_SIZE_ELEMENTS(message), "Alloc type: 0x%x.", which);
NULL_TERMINATE_BUFFER(message);
SYSLOG(SYSLOG_WARNING, OUT_OF_VMM_CANNOT_USE_OS, 3, get_application_name(),
get_application_pid(), message);
}
}
void
vmm_heap_handle_pending_low_on_memory_event_trigger()
{
bool trigger = false;
acquire_recursive_lock(&low_on_memory_pending_lock);
if (low_on_memory_pending) {
bool value = false;
ATOMIC_1BYTE_WRITE(&low_on_memory_pending, value, false);
trigger = true;
}
release_recursive_lock(&low_on_memory_pending_lock);
if (trigger)
instrument_low_on_memory();
}
static void
schedule_low_on_memory_event_trigger()
{
bool value = true;
ATOMIC_1BYTE_WRITE(&low_on_memory_pending, value, false);
}
/* Reserve virtual address space without committing swap space for it */
static vm_addr_t
vmm_heap_reserve(size_t size, heap_error_code_t *error_code, bool executable,
which_vmm_t which)
{
vm_addr_t p;
vm_heap_t *vmh = vmheap_for_which(which);
/* should only be used on sizable aligned pieces */
ASSERT(size > 0 && ALIGNED(size, PAGE_SIZE));
ASSERT(!OWN_MUTEX(&reset_pending_lock));
if (DYNAMO_OPTION(vm_reserve)) {
/* FIXME: should we make this an external option? */
if (INTERNAL_OPTION(vm_use_last) ||
(DYNAMO_OPTION(switch_to_os_at_vmm_reset_limit) &&
at_reset_at_vmm_limit(vmh))) {
DO_ONCE({
if (DYNAMO_OPTION(reset_at_switch_to_os_at_vmm_limit)) {
schedule_reset(RESET_ALL);
}
schedule_low_on_memory_event_trigger();
DOCHECK(1, {
if (!INTERNAL_OPTION(vm_use_last)) {
ASSERT_CURIOSITY(false && "running low on vm reserve");
}
});
/* FIXME - for our testing would be nice to have some release build
* notification of this ... */
});
reached_beyond_vmm(which);
#ifdef X64
if (TEST(VMM_REACHABLE, which) || REACHABLE_HEAP()) {
/* PR 215395, make sure allocation satisfies heap reachability
* contraints */
p = os_heap_reserve_in_region(
(void *)ALIGN_FORWARD(heap_allowable_region_start, PAGE_SIZE),
(void *)ALIGN_BACKWARD(heap_allowable_region_end, PAGE_SIZE), size,
error_code, executable);
/* ensure future heap allocations are reachable from this allocation */
if (p != NULL)
request_region_be_heap_reachable(p, size);
} else
p = os_heap_reserve(NULL, size, error_code, executable);
#else
p = os_heap_reserve(NULL, size, error_code, executable);
#endif
if (p != NULL)
return p;
LOG(GLOBAL, LOG_HEAP, 1, "vmm_heap_reserve %s: failed " PFX "\n", vmh->name,
*error_code);
}
if (at_reset_at_vmm_limit(vmh)) {
/* We're running low on our reservation, trigger a reset */
schedule_low_on_memory_event_trigger();
if (schedule_reset(RESET_ALL)) {
STATS_INC(reset_low_vmm_count);
DO_THRESHOLD_SAFE(
DYNAMO_OPTION(report_reset_vmm_threshold), FREQ_PROTECTED_SECTION,
{ /* < max - nothing */ },
{ /* >= max */
/* FIXME - do we want to report more then once to give some idea of
* how much thrashing there is? */
DO_ONCE({
SYSLOG_CUSTOM_NOTIFY(SYSLOG_WARNING, MSG_LOW_ON_VMM_MEMORY, 2,
"Potentially thrashing on low virtual "
"memory resetting.",
get_application_name(),
get_application_pid());
/* want QA to notice */
ASSERT_CURIOSITY(false && "vmm heap limit reset thrashing");
});
});
}
}
p = vmm_heap_reserve_blocks(vmh, size, NULL, which);
LOG(GLOBAL, LOG_HEAP, 2, "vmm_heap_reserve %s: size=%d p=" PFX "\n", vmh->name,
size, p);
if (p != NULL) {
if (DYNAMO_OPTION(satisfy_w_xor_x) && vmh == &heapmgt->vmcode &&
!executable) {
/* Pass back the writable address, not the executable.
* Then things like reachable heap do not need to convert to
* writable all over the place.
*/
p = (p - vmh->start_addr) + heapmgt->vmcode_writable_base;
}
return p;
}
DO_ONCE({
DODEBUG({ out_of_vmheap_once = true; });
if (!INTERNAL_OPTION(skip_out_of_vm_reserve_curiosity)) {
/* this maybe unsafe for early services w.r.t. case 666 */
SYSLOG_INTERNAL_WARNING("Out of %s reservation - reserving %dKB. "
"Falling back onto OS allocation",
(TEST(VMM_REACHABLE, which) || REACHABLE_HEAP())
? "vmcode"
: "vmheap",
size / 1024);
ASSERT_CURIOSITY(false && "Out of vmheap reservation");
}
/* This actually-out trigger is only trying to help issues like a
* thread-private configuration being a memory hog (and thus we use up
* our reserve). Reset needs memory, and this is asynchronous, so no
* guarantees here anyway (the app may have already reserved all memory
* beyond our reservation, see sqlsrvr.exe and cisvc.exe for ex.) which is
* why we have -reset_at_vmm_threshold to make reset success more likely. */
if (DYNAMO_OPTION(reset_at_vmm_full)) {
schedule_reset(RESET_ALL);
}
});
}
/* if we fail to allocate from our reservation we fall back to the OS */
reached_beyond_vmm(which);
#ifdef X64
if (TEST(VMM_REACHABLE, which) || REACHABLE_HEAP()) {
/* PR 215395, make sure allocation satisfies heap reachability contraints */
p = os_heap_reserve_in_region(
(void *)ALIGN_FORWARD(heap_allowable_region_start, PAGE_SIZE),
(void *)ALIGN_BACKWARD(heap_allowable_region_end, PAGE_SIZE), size,
error_code, executable);
/* ensure future heap allocations are reachable from this allocation */
if (p != NULL)
request_region_be_heap_reachable(p, size);
} else
p = os_heap_reserve(NULL, size, error_code, executable);
#else
p = os_heap_reserve(NULL, size, error_code, executable);
#endif
return p;
}
/* Commit previously reserved pages, returns false when out of memory
* This is here just to complement the vmm interface, in fact it is
* almost an alias for os_heap_commit. (If we had strict types then
* here we'd convert a vm_addr_t into a heap_pc.)
*/
static inline bool
vmm_heap_commit(vm_addr_t p, size_t size, uint prot, heap_error_code_t *error_code,
which_vmm_t which)
{
bool res = true;
vm_heap_t *vmh = vmheap_for_which(which);
LOG(GLOBAL, LOG_HEAP, 3, "vmm_heap_commit %s: size=%d p=" PFX " prot=%x\n", vmh->name,
size, p, prot);
if (DYNAMO_OPTION(satisfy_w_xor_x) && vmh == &heapmgt->vmcode) {
vm_addr_t p_writable = vmm_normalize_addr(vmh, &p);
/* We blindly shadow even if prot is -w to simplify de-alloc. -w is rare. */
uint shadow_prot = prot & ~(MEMPROT_EXEC);
res = os_heap_commit(p_writable, size, shadow_prot, error_code);
prot &= ~(MEMPROT_WRITE);
if (res) {
/* We use mmap instead of mprotect since W^X policies often only allow
* execution from regions allocated executable, not changed to executable.
* There is a downside: IMA policies can cause a significant (~5s) delay
* while a hash is computed of our vmcode region on the first +x mmap.
* Today os_create_memory_file() does a temporary +x mmap for us, avoiding
* any cost here.
*/
size_t map_size = size;
size_t map_offs = p - vmh->start_addr;
vm_addr_t map_addr =
os_map_file(heapmgt->dual_map_file, &map_size, map_offs, p, prot,
MAP_FILE_VMM_COMMIT | MAP_FILE_FIXED);
ASSERT(map_size == size);
res = (map_addr != NULL);
ASSERT(map_addr == NULL || map_addr == p);
}
} else
res = os_heap_commit(p, size, prot, error_code);
size_t commit_used, commit_limit;
ASSERT(!OWN_MUTEX(&reset_pending_lock));
if ((DYNAMO_OPTION(reset_at_commit_percent_free_limit) != 0 ||
DYNAMO_OPTION(reset_at_commit_free_limit) != 0) &&
os_heap_get_commit_limit(&commit_used, &commit_limit)) {
size_t commit_left = commit_limit - commit_used;
ASSERT(commit_used <= commit_limit);
/* FIXME - worry about overflow in the multiplies below? With 4kb pages isn't
* an issue till 160GB of committable memory. */
if ((DYNAMO_OPTION(reset_at_commit_free_limit) != 0 &&
commit_left < DYNAMO_OPTION(reset_at_commit_free_limit) / PAGE_SIZE) ||
(DYNAMO_OPTION(reset_at_commit_percent_free_limit) != 0 &&
100 * commit_left <
DYNAMO_OPTION(reset_at_commit_percent_free_limit) * commit_limit)) {
/* Machine is getting low on memory, trigger a reset */
/* FIXME - if we aren't the ones hogging committed memory (rougue app) then
* do we want a version of reset that doesn't de-commit our already grabbed
* memory to avoid someone else stealing it (or perhaps keep just a minimal
* level to ensure we make some progress)? */
/* FIXME - the commit limit is for the whole system; we have no good way of
* telling if we're running in a job and if so what the commit limit for the
* job is. */
/* FIXME - if a new process is started under dr while the machine is already
* past the threshold we will just spin resetting here and not make any
* progress, may be better to only reset when we have a reasonable amount of
* non-persistent memory to free (so that we can at least make some progress
* before resetting again). */
/* FIXME - the threshold is calculated at the current page file size, but
* it's possible that the pagefile is expandable (dependent on disk space of
* course) and thus we're preventing a potentially beneficial (to us)
* upsizing of the pagefile here. See "HKLM\SYSTEM\CCS\ControlSession /
* Manager\Memory Management" for the initial/max size of the various page
* files (query SystemPafefileInformation only gets you the current size). */
/* xref case 345 on fixmes (and link to wiki discussion) */
if (schedule_reset(RESET_ALL)) {
STATS_INC(reset_low_commit_count);
DO_THRESHOLD_SAFE(
DYNAMO_OPTION(report_reset_commit_threshold), FREQ_PROTECTED_SECTION,
{ /* < max - nothing */ },
{ /* >= max */
/* FIXME - do we want to report more then once to give some idea of
* how much thrashing there is? */
DO_ONCE({
SYSLOG_CUSTOM_NOTIFY(
SYSLOG_WARNING, MSG_LOW_ON_COMMITTABLE_MEMORY, 2,
"Potentially thrashing on low committable "
"memory resetting.",
get_application_name(), get_application_pid());
/* want QA to notice */
ASSERT_CURIOSITY(false && "commit limit reset thrashing");
});
});
}
}
}
if (!res && DYNAMO_OPTION(oom_timeout) != 0 &&
!(DYNAMO_OPTION(satisfy_w_xor_x) && vmh == &heapmgt->vmcode)) {
DEBUG_DECLARE(heap_error_code_t old_error_code = *error_code;)
ASSERT(old_error_code != HEAP_ERROR_SUCCESS);
/* check whether worth retrying */
if (!os_heap_systemwide_overcommit(*error_code)) {
/* FIXME: we should check whether current process is the hog */
/* unless we have used the memory, there is still a
* miniscule chance another thread will free up some or
* will attempt suicide, so could retry even if current
* process has a leak */
ASSERT_NOT_IMPLEMENTED(false);
/* retry */
}
SYSLOG_INTERNAL_WARNING("vmm_heap_commit oom: timeout and retry");
/* let's hope a memory hog dies in the mean time */
os_timeout(DYNAMO_OPTION(oom_timeout));
res = os_heap_commit(p, size, prot, error_code);
DODEBUG({
if (res) {
SYSLOG_INTERNAL_WARNING("vmm_heap_commit retried, got away! old=" PFX
" new=" PFX "\n",
old_error_code, *error_code);
} else {
SYSLOG_INTERNAL_WARNING("vmm_heap_commit retrying, no luck. old=" PFX
" new=" PFX "\n",
old_error_code, *error_code);
}
});
}
return res;
}
/* back to normal section */
END_DATA_SECTION()
/* Free previously reserved and possibly committed memory. Check if
* it is within the memory managed by the virtual memory manager we
* only decommit back to the OS, and we remove the vmm reservation.
* Keep in mind that this can be called on units that are not fully
* committed, e.g. guard pages are added to this - as long as the
* os_heap_decommit interface can handle this we're OK
*/
static void
vmm_heap_free(vm_addr_t p, size_t size, heap_error_code_t *error_code, which_vmm_t which)
{
vm_heap_t *vmh = vmheap_for_which(which);
LOG(GLOBAL, LOG_HEAP, 2, "vmm_heap_free %s: size=%d p=" PFX " is_reserved=%d\n",
vmh->name, size, p, vmm_is_reserved_unit(vmh, p, size));
vm_addr_t p_writable = p;
if (DYNAMO_OPTION(satisfy_w_xor_x) && vmh == &heapmgt->vmcode)
p_writable = vmm_normalize_addr(vmh, &p);
/* The memory doesn't have to be within our VM reserve if it
* was allocated as an extra OS call when if we ran out.
*/
if (DYNAMO_OPTION(vm_reserve)) {
if (vmm_is_reserved_unit(vmh, p, size)) {
if (DYNAMO_OPTION(satisfy_w_xor_x) && vmh == &heapmgt->vmcode)
os_heap_decommit(p_writable, size, error_code);
os_heap_decommit(p, size, error_code);
vmm_heap_free_blocks(vmh, p, size, which);
LOG(GLOBAL, LOG_HEAP, 2, "vmm_heap_free %s: freed size=%d p=" PFX "\n",
vmh->name, size, p);
return;
} else {
/* FIXME: check if this is stack_free getting in the way, then ignore it */
/* FIXME: could do this by overriding the meaning of the vmheap fields
after cleanup to a different combination that start_pc = end_pc = NULL
*/
/* FIXME: see vmm_heap_unit_exit for the current stack_free problem */
if (vmm_heap_exited) {
*error_code = HEAP_ERROR_SUCCESS;
return;
}
}
}
if (DYNAMO_OPTION(satisfy_w_xor_x) && vmh == &heapmgt->vmcode)
os_heap_free(p_writable, size, error_code);
os_heap_free(p, size, error_code);
}
static void
vmm_heap_decommit(vm_addr_t p, size_t size, heap_error_code_t *error_code,
which_vmm_t which)
{
LOG(GLOBAL, LOG_HEAP, 2, "vmm_heap_decommit: size=%d p=" PFX " is_reserved=%d\n",
size, p, is_vmm_reserved_address(p, size, NULL, NULL));
if (DYNAMO_OPTION(satisfy_w_xor_x)) {
vm_heap_t *vmh = vmheap_for_which(which);
if (vmh == &heapmgt->vmcode) {
vm_addr_t p_writable = vmm_normalize_addr(vmh, &p);
os_heap_decommit(p_writable, size, error_code);
}
}
os_heap_decommit(p, size, error_code);
/* nothing to be done to vmm blocks */
}
/* Caller is required to handle thread synchronization and to update dynamo vm areas.
* size must be PAGE_SIZE-aligned.
* Returns NULL if fails to allocate memory!
*/
static void *
vmm_heap_alloc(size_t size, uint prot, heap_error_code_t *error_code, which_vmm_t which)
{
vm_addr_t p = vmm_heap_reserve(size, error_code, TEST(MEMPROT_EXEC, prot), which);
if (!p)
return NULL; /* out of reserved memory */
if (!vmm_heap_commit(p, size, prot, error_code, which))
return NULL; /* out of committed memory */
return p;
}
/* virtual memory manager initialization */
void
vmm_heap_init()
{
IF_WINDOWS(ASSERT(ALIGNED(OS_ALLOC_GRANULARITY, DYNAMO_OPTION(vmm_block_size))));
#ifdef X64
/* add reachable regions before we allocate the heap, xref PR 215395 */
/* i#774, i#901: we no longer need the DR library nor ntdll.dll to be
* reachable by the vmheap reservation. But, for -heap_in_lower_4GB,
* we must call request_region_be_heap_reachable() up front.
* This is a hard requirement so we set it prior to locating the vmm region.
*/
if (DYNAMO_OPTION(heap_in_lower_4GB))
request_region_be_heap_reachable(0, 0x80000000);
#endif
if (DYNAMO_OPTION(vm_reserve)) {
vmm_heap_unit_init(&heapmgt->vmcode, DYNAMO_OPTION(vm_size), true, "vmcode");
if (!REACHABLE_HEAP()) {
vmm_heap_unit_init(
&heapmgt->vmheap,
/* Use vmheap_size_wow64 if target is WoW64 windows process. */
IF_WINDOWS_ELSE(IF_X64_ELSE(is_wow64_process(NT_CURRENT_PROCESS)
? DYNAMO_OPTION(vmheap_size_wow64)
: DYNAMO_OPTION(vmheap_size),
DYNAMO_OPTION(vmheap_size)),
DYNAMO_OPTION(vmheap_size)),
false, "vmheap");
}
}
}
static void
vmh_exit(vm_heap_t *vmh, bool contains_stacks)
{
/* We have three regions that are not explicitly deallocated: current stack, init
* stack, global_do_syscall.
*/
DOCHECK(1, {
uint perstack =
(uint)(ALIGN_FORWARD_UINT(
DYNAMO_OPTION(stack_size) +
(has_guard_pages(VMM_STACK | VMM_PER_THREAD)
? (2 * PAGE_SIZE)
: (DYNAMO_OPTION(stack_guard_pages) ? PAGE_SIZE : 0)),
DYNAMO_OPTION(vmm_block_size)) /
DYNAMO_OPTION(vmm_block_size));
uint unfreed_blocks;
if (!contains_stacks || standalone_library)
unfreed_blocks = 0;
else {
unfreed_blocks = perstack * 1 /* d_r_initstack */ +
/* current stack */
perstack * ((doing_detach IF_APP_EXPORTS(|| dr_api_exit)) ? 0 : 1);
}
/* Our bitmap does not get freed. */
size_t blocks_sz_bytes =
ALIGN_FORWARD_UINT(BITMAP_INDEX(vmh->num_blocks) * sizeof(bitmap_element_t),
DYNAMO_OPTION(vmm_block_size));
unfreed_blocks += (uint)(blocks_sz_bytes / DYNAMO_OPTION(vmm_block_size));
/* XXX: On detach, arch_thread_exit should explicitly mark as
* left behind all TPCs needed so then we can assert even for
* detach.
*/
ASSERT(IF_WINDOWS(doing_detach ||) /* not deterministic when detaching */
vmh->num_free_blocks == vmh->num_blocks - unfreed_blocks ||
/* >=, not ==, b/c if we hit the vmm limit the cur dstack
* could be outside of vmm (i#1164).
*/
((ever_beyond_vmm
/* This also happens for dstacks up high for DrMi#1723. */
IF_WINDOWS(|| get_os_version() >= WINDOWS_VERSION_8_1)) &&
vmh->num_free_blocks >= vmh->num_blocks - unfreed_blocks));
});
/* On process exit we are currently executing off a
* stack in this region so we cannot free the whole allocation.
* XXX: Any tombstone allocations will have to use a
* different interface than the generic heap_mmap() which is
* sometimes used to leave things behind. FIXME: Currently
* we'll leave behind the whole vm unit if any tombstones are
* left - which in fact is always the case, no matter whether
* thread private code needs to be left or not.
* global_do_syscall 32 byte allocation should be part of our
* dll and won't have to be left.
* The current stack is the main problem because it is later
* cleaned up in cleanup_and_terminate by calling stack_free which
* in turn gets all the way to vmm_heap_free. Therefore we add an
* explicit test for vmm_heap_exited, so that we can otherwise free
* bookkeeping information and delete the lock now.
* Potential solution to most of these problems is to have
* cleanup_and_terminate call vmm_heap_exit when cleaning up
* the process, or to just leave the vm mapping behind and
* simply pass a different argument to stack_free.
*/
vmm_heap_unit_exit(vmh);
}
void
vmm_heap_exit()
{
/* virtual memory manager exit */
if (DYNAMO_OPTION(vm_reserve)) {
if (heapmgt->vmcode.start_addr != NULL)
vmh_exit(&heapmgt->vmcode, heapmgt->vmheap.start_addr == NULL);
if (heapmgt->vmheap.start_addr != NULL)
vmh_exit(&heapmgt->vmheap, true);
vmm_heap_exited = true;
}
}
#ifdef UNIX
void
vmm_heap_fork_pre(dcontext_t *dcontext)
{
if (!DYNAMO_OPTION(satisfy_w_xor_x))
return;
/* The child wants a private copy of our dual mapping setup, rather than
* sharing the parent's. Unfortunately that requires copying the entire
* vmcode contents into new mappings. To avoid a race while the child makes
* this copy from our live mappings, we create a temp copy now. The
* disadvantage is that we need a bunch of free memory (and address space:
* but this is 64-bit-only). The alternative is to have the parent wait for
* the child but that seems too disruptive to scheduling.
*/
thread_heap_t *th = (thread_heap_t *)dcontext->heap_field;
heap_error_code_t error_code;
/* We store in a dcontext field to avoid races with other threads doing forks. */
th->fork_copy_size = heapmgt->vmcode.alloc_size;
th->fork_copy_start =
os_heap_reserve(NULL, th->fork_copy_size, &error_code, true /*+x*/);
if (th->fork_copy_start == NULL) {
report_w_xor_x_fatal_error_and_exit();
ASSERT_NOT_REACHED();
}
/* Copy each mapping. We also need to record the +*x protections (because some
* are +rw (ELF data segments), some are +rx, and some are +r (reachable
* (non-exec) heap)). We can't use the actual page prot of the copy to store
* what the vmcode prot should be, because some W^X implementations remove +x
* from a +wx region, and we require +w to make our copy. Thus we store the
* mapping prots in a vmvector.
*/
VMVECTOR_ALLOC_VECTOR(th->fork_copy_areas, dcontext,
VECTOR_NEVER_MERGE | VECTOR_NO_LOCK, innermost_lock);
memquery_iter_t iter;
if (!memquery_iterator_start(&iter, heapmgt->vmcode.alloc_start,
true /*using heap*/)) {
report_w_xor_x_fatal_error_and_exit();
ASSERT_NOT_REACHED();
}
while (memquery_iterator_next(&iter) && iter.vm_start < heapmgt->vmcode.end_addr) {
if (iter.vm_start < heapmgt->vmcode.alloc_start || iter.prot == MEMPROT_NONE)
continue;
byte *new_start =
iter.vm_start - heapmgt->vmcode.alloc_start + th->fork_copy_start;
vmvector_add(th->fork_copy_areas, new_start,
new_start + (iter.vm_end - iter.vm_start),
(void *)(ptr_uint_t)iter.prot);
if (!os_heap_commit(new_start, iter.vm_end - iter.vm_start,
MEMPROT_READ | MEMPROT_WRITE, &error_code)) {
report_w_xor_x_fatal_error_and_exit();
ASSERT_NOT_REACHED();
}
memcpy(new_start, iter.vm_start, iter.vm_end - iter.vm_start);
LOG(GLOBAL, LOG_HEAP, 2, "%s: copied %p-%p %x to %p-%p\n", __FUNCTION__,
iter.vm_start, iter.vm_end, iter.prot, new_start,
new_start + (iter.vm_end - iter.vm_start));
}
memquery_iterator_stop(&iter);
}
void
vmm_heap_fork_post(dcontext_t *dcontext, bool parent)
{
if (!DYNAMO_OPTION(satisfy_w_xor_x) || !parent)
return;
thread_heap_t *th = (thread_heap_t *)dcontext->heap_field;
heap_error_code_t error_code;
os_heap_free(th->fork_copy_start, th->fork_copy_size, &error_code);
if (error_code != HEAP_ERROR_SUCCESS) {
report_w_xor_x_fatal_error_and_exit();
ASSERT_NOT_REACHED();
}
th->fork_copy_start = NULL;
th->fork_copy_size = 0;
vmvector_reset_vector(dcontext, th->fork_copy_areas);
vmvector_delete_vector(dcontext, th->fork_copy_areas);
th->fork_copy_areas = NULL;
}
void
vmm_heap_fork_init(dcontext_t *dcontext)
{
if (!DYNAMO_OPTION(satisfy_w_xor_x))
return;
/* We want a private copy of our dual mapping setup, rather than sharing the
* parent's. Unfortunately that requires copying the entire vmcode contents
* into new mappings. The parent has made a temp copy for us to avoid races
* if we tried to copy its live memory.
*/
/* First, make a new file. */
int old_fd = heapmgt->dual_map_file;
heapmgt->dual_map_file =
os_create_memory_file(MEMORY_FILE_NAME, heapmgt->vmcode.alloc_size);
if (heapmgt->dual_map_file == INVALID_FILE)
goto vmm_heap_fork_init_failed;
LOG(GLOBAL, LOG_HEAP, 2, "%s: new dual_map_file is %d\n", __FUNCTION__,
heapmgt->dual_map_file);
/* Second, make a new +w region and copy the old protections and contents. */
size_t map_size = heapmgt->vmcode.alloc_size;
byte *map_base =
os_map_file(heapmgt->dual_map_file, &map_size, 0, heapmgt->vmcode_writable_alloc,
MEMPROT_NONE, MAP_FILE_VMM_COMMIT | MAP_FILE_FIXED);
if (map_base != heapmgt->vmcode_writable_alloc ||
map_size != heapmgt->vmcode.alloc_size)
goto vmm_heap_fork_init_failed;
heap_error_code_t error_code;
thread_heap_t *th = (thread_heap_t *)dcontext->heap_field;
vmvector_iterator_t vmvi;
vmvector_iterator_start(th->fork_copy_areas, &vmvi);
while (vmvector_iterator_hasnext(&vmvi)) {
byte *start, *end;
uint prot = (uint)(ptr_uint_t)vmvector_iterator_next(&vmvi, &start, &end);
byte *new_start = start - th->fork_copy_start + heapmgt->vmcode_writable_alloc;
uint new_prot = (prot & ~(MEMPROT_EXEC)) | MEMPROT_WRITE;
if (!os_heap_commit(new_start, end - start, new_prot, &error_code))
goto vmm_heap_fork_init_failed;
memcpy(new_start, start, end - start);
LOG(GLOBAL, LOG_HEAP, 2, "%s: re-mapped %p-%p %x; copied from %p-%p %x\n",
__FUNCTION__, new_start, new_start + (end - start), new_prot, start, end,
prot);
}
vmvector_iterator_stop(&vmvi);
/* Third, make a new +x region and set up the right protections and mappings. */
map_size = heapmgt->vmcode.alloc_size;
map_base =
os_map_file(heapmgt->dual_map_file, &map_size, 0, heapmgt->vmcode.alloc_start,
MEMPROT_NONE, MAP_FILE_VMM_COMMIT | MAP_FILE_FIXED);
if (map_base != heapmgt->vmcode.alloc_start || map_size != heapmgt->vmcode.alloc_size)
goto vmm_heap_fork_init_failed;
vmvector_iterator_start(th->fork_copy_areas, &vmvi);
while (vmvector_iterator_hasnext(&vmvi)) {
byte *start, *end;
uint prot = (uint)(ptr_uint_t)vmvector_iterator_next(&vmvi, &start, &end);
byte *new_start = start - th->fork_copy_start + heapmgt->vmcode.alloc_start;
map_size = end - start;
map_base =
os_map_file(heapmgt->dual_map_file, &map_size, start - th->fork_copy_start,
new_start, prot, MAP_FILE_VMM_COMMIT | MAP_FILE_FIXED);
if (map_base != new_start || map_size != end - start)
goto vmm_heap_fork_init_failed;
LOG(GLOBAL, LOG_HEAP, 2, "%s: re-mapped %p-%p %x\n", __FUNCTION__, new_start,
new_start + map_size, prot);
}
vmvector_iterator_stop(&vmvi);
os_heap_free(th->fork_copy_start, th->fork_copy_size, &error_code);
if (error_code != HEAP_ERROR_SUCCESS)
goto vmm_heap_fork_init_failed;
th->fork_copy_start = NULL;
th->fork_copy_size = 0;
vmvector_reset_vector(dcontext, th->fork_copy_areas);
vmvector_delete_vector(dcontext, th->fork_copy_areas);
th->fork_copy_areas = NULL;
/* XXX: We don't want to unlink any tmpfs file so we don't use
* os_delete_memory_file(). This may not work on Windows if that function needs to do
* more.
*/
os_close_protected(old_fd);
return;
vmm_heap_fork_init_failed:
report_w_xor_x_fatal_error_and_exit();
ASSERT_NOT_REACHED();
}
#endif
/* checks for compatibility among heap options, returns true if
* modified the value of any options to make them compatible
*/
bool
heap_check_option_compatibility()
{
bool ret = false;
ret = check_param_bounds(&dynamo_options.vm_size, MIN_VMM_HEAP_UNIT_SIZE,
MAX_VMCODE_SIZE, "vm_size") ||
ret;
ret = check_param_bounds(&dynamo_options.vmheap_size, MIN_VMM_HEAP_UNIT_SIZE,
MAX_VMHEAP_SIZE, "vmheap_size") ||
ret;
#ifdef INTERNAL
/* if max_heap_unit_size is too small you may get a funny message
* "initial_heap_unit_size must be >= 8229 and <= 4096" but in
* release build we will take the min and then complain about
* max_heap_unit_size and set it to the min also, so it all works
* out w/o needing an extra check() call.
*/
/* case 7626: don't short-circuit checks, as later ones may be needed */
ret = check_param_bounds(&dynamo_options.initial_heap_unit_size,
/* if have units smaller than a page we end up
* allocating 64KB chunks for "oversized" units
* for just about every alloc! so round up to
* at least a page.
*/
ALIGN_FORWARD(UNITOVERHEAD + 1, (uint)PAGE_SIZE),
HEAP_UNIT_MAX_SIZE, "initial_heap_unit_size") ||
ret;
ret = check_param_bounds(&dynamo_options.initial_global_heap_unit_size,
ALIGN_FORWARD(UNITOVERHEAD + 1, (uint)PAGE_SIZE),
HEAP_UNIT_MAX_SIZE, "initial_global_heap_unit_size") ||
ret;
ret = check_param_bounds(&dynamo_options.max_heap_unit_size,
MAX(HEAP_UNIT_MIN_SIZE, GLOBAL_UNIT_MIN_SIZE), INT_MAX,
"max_heap_unit_size") ||
ret;
#endif
return ret;
}
/* thread-shared initialization that should be repeated after a reset */
void
heap_reset_init()
{
threadunits_init(GLOBAL_DCONTEXT, &heapmgt->global_nonpersistent_units,
GLOBAL_UNIT_MIN_SIZE, false);
}
/* initialization */
void
d_r_heap_init()
{
int i;
DEBUG_DECLARE(uint prev_sz = 0;)
LOG(GLOBAL, LOG_TOP | LOG_HEAP, 2, "Heap bucket sizes are:\n");
/* make sure we'll preserve alignment */
ASSERT(ALIGNED(HEADER_SIZE, HEAP_ALIGNMENT));
/* make sure free list pointers will fit */
ASSERT(BLOCK_SIZES[0] >= sizeof(heap_pc *));
/* since sizes depend on size of structs, make sure they're in order */
for (i = 0; i < BLOCK_TYPES; i++) {
ASSERT(BLOCK_SIZES[i] > prev_sz);
/* we assume all of our heap allocs are aligned */
ASSERT(i == BLOCK_TYPES - 1 || ALIGNED(BLOCK_SIZES[i], HEAP_ALIGNMENT));
DODEBUG(prev_sz = BLOCK_SIZES[i];);
LOG(GLOBAL, LOG_TOP | LOG_HEAP, 2, "\t%d bytes\n", BLOCK_SIZES[i]);
}
/* we assume writes to some static vars are atomic,
* i.e., the vars don't cross cache lines. they shouldn't since
* they should all be 4-byte-aligned in the data segment.
* FIXME: ensure that release build aligns ok?
* I would be quite surprised if static vars were not 4-byte-aligned!
*/
ASSERT(ALIGN_BACKWARD(&heap_exiting, CACHE_LINE_SIZE()) ==
ALIGN_BACKWARD(&heap_exiting + 1, CACHE_LINE_SIZE()));
ASSERT(ALIGN_BACKWARD(&heap_unit_lock.owner, CACHE_LINE_SIZE()) ==
ALIGN_BACKWARD(&heap_unit_lock.owner + 1, CACHE_LINE_SIZE()));
/* For simplicity we go through our normal heap mechanism to allocate
* our post-init heapmgt struct
*/
ASSERT(heapmgt == &temp_heapmgt);
heapmgt->global_heap_writable = true; /* this is relied on in global_heap_alloc */
threadunits_init(GLOBAL_DCONTEXT, &heapmgt->global_units, GLOBAL_UNIT_MIN_SIZE,
false);
heapmgt =
HEAP_TYPE_ALLOC(GLOBAL_DCONTEXT, heap_management_t, ACCT_MEM_MGT, PROTECTED);
ASSERT(sizeof(temp_heapmgt) == sizeof(*heapmgt));
memcpy(heapmgt, &temp_heapmgt, sizeof(temp_heapmgt));
threadunits_init(GLOBAL_DCONTEXT, &heapmgt->global_unprotected_units,
GLOBAL_UNIT_MIN_SIZE, false);
if (!REACHABLE_HEAP()) { /* If off, all heap is reachable. */
threadunits_init(GLOBAL_DCONTEXT, &heapmgt->global_reachable_units,
GLOBAL_UNIT_MIN_SIZE, true);
}
heap_reset_init();
#ifdef WINDOWS
/* PR 250294: As part of 64-bit hook work, hook reachability was addressed
* using landing pads (see win32/callback.c for more explanation). Landing
* pad areas are a type of special heap, so they should be initialized
* during heap init.
* Each landing pad area has its own allocation pointer, so they shouldn't
* be merged automatically.
*/
VMVECTOR_ALLOC_VECTOR(landing_pad_areas, GLOBAL_DCONTEXT,
VECTOR_SHARED | VECTOR_NEVER_MERGE, landing_pad_areas_lock);
#endif
}
/* need to not remove from vmareas on process exit -- vmareas has already exited! */
static void
really_free_unit(heap_unit_t *u)
{
RSTATS_SUB(heap_capacity, UNIT_COMMIT_SIZE(u));
STATS_ADD(heap_reserved_only,
(stats_int_t)(UNIT_COMMIT_SIZE(u) - UNIT_RESERVED_SIZE(u)));
/* remember that u itself is inside unit, not separately allocated */
release_guarded_real_memory((vm_addr_t)u, UNIT_RESERVED_SIZE(u),
false /*do not update DR areas now*/, true, u->which);
}
/* Free all thread-shared state not critical to forward progress;
* heap_reset_init() will be called before continuing.
*/
void
heap_reset_free()
{
heap_unit_t *u, *next_u;
/* FIXME: share some code w/ heap_exit -- currently only called by reset */
ASSERT(DYNAMO_OPTION(enable_reset));
/* we must grab this lock before heap_unit_lock to avoid rank
* order violations when freeing
*/
dynamo_vm_areas_lock();
/* for combining stats into global_units we need this lock
* FIXME: remove if we go to separate stats sum location
*/
DODEBUG({ acquire_recursive_lock(&global_alloc_lock); });
acquire_recursive_lock(&heap_unit_lock);
LOG(GLOBAL, LOG_HEAP, 1, "Pre-reset, global heap unit stats:\n");
/* FIXME: free directly rather than putting on dead list first */
threadunits_exit(&heapmgt->global_nonpersistent_units, GLOBAL_DCONTEXT);
/* free all dead units */
u = heapmgt->heap.dead;
while (u != NULL) {
next_u = u->next_global;
LOG(GLOBAL, LOG_HEAP, 1, "\tfreeing dead unit " PFX "-" PFX " [-" PFX "]\n", u,
UNIT_COMMIT_END(u), UNIT_RESERVED_END(u));
RSTATS_DEC(heap_num_free);
really_free_unit(u);
u = next_u;
}
heapmgt->heap.dead = NULL;
heapmgt->heap.num_dead = 0;
release_recursive_lock(&heap_unit_lock);
DODEBUG({ release_recursive_lock(&global_alloc_lock); });
dynamo_vm_areas_unlock();
}
/* atexit cleanup */
void
d_r_heap_exit()
{
heap_unit_t *u, *next_u;
heap_management_t *temp;
heap_exiting = true;
/* FIXME: we shouldn't need either lock if executed last */
dynamo_vm_areas_lock();
acquire_recursive_lock(&heap_unit_lock);
#ifdef WINDOWS
release_landing_pad_mem(); /* PR 250294 */
#endif
LOG(GLOBAL, LOG_HEAP, 1, "Global unprotected heap unit stats:\n");
threadunits_exit(&heapmgt->global_unprotected_units, GLOBAL_DCONTEXT);
LOG(GLOBAL, LOG_HEAP, 1, "Global nonpersistent heap unit stats:\n");
threadunits_exit(&heapmgt->global_nonpersistent_units, GLOBAL_DCONTEXT);
if (!REACHABLE_HEAP()) { /* If off, all heap is reachable. */
LOG(GLOBAL, LOG_HEAP, 1, "Global reachable heap unit stats:\n");
threadunits_exit(&heapmgt->global_reachable_units, GLOBAL_DCONTEXT);
}
/* Now we need to go back to the static struct to clean up */
ASSERT(heapmgt != &temp_heapmgt);
/* We need to maintain the lock process list which was using the temp_heapmgt
* lock structure.
*/
mutex_t temp_vmcode = temp_heapmgt.vmcode.lock;
mutex_t temp_vmheap = temp_heapmgt.vmheap.lock;
memcpy(&temp_heapmgt, heapmgt, sizeof(temp_heapmgt));
temp_heapmgt.vmcode.lock = temp_vmcode;
temp_heapmgt.vmheap.lock = temp_vmheap;
temp = heapmgt;
heapmgt = &temp_heapmgt;
HEAP_TYPE_FREE(GLOBAL_DCONTEXT, temp, heap_management_t, ACCT_MEM_MGT, PROTECTED);
LOG(GLOBAL, LOG_HEAP, 1, "Global heap unit stats:\n");
threadunits_exit(&heapmgt->global_units, GLOBAL_DCONTEXT);
/* free heap for all unfreed units */
LOG(GLOBAL, LOG_HEAP, 1, "Unfreed units:\n");
u = heapmgt->heap.units;
while (u != NULL) {
next_u = u->next_global;
LOG(GLOBAL, LOG_HEAP, 1, "\tfreeing live unit " PFX "-" PFX " [-" PFX "]\n", u,
UNIT_COMMIT_END(u), UNIT_RESERVED_END(u));
RSTATS_DEC(heap_num_live);
really_free_unit(u);
u = next_u;
}
heapmgt->heap.units = NULL;
u = heapmgt->heap.dead;
while (u != NULL) {
next_u = u->next_global;
LOG(GLOBAL, LOG_HEAP, 1, "\tfreeing dead unit " PFX "-" PFX " [-" PFX "]\n", u,
UNIT_COMMIT_END(u), UNIT_RESERVED_END(u));
RSTATS_DEC(heap_num_free);
really_free_unit(u);
u = next_u;
}
heapmgt->heap.dead = NULL;
heapmgt->global_heap_writable = false; /* This is relied on in global_heap_alloc. */
release_recursive_lock(&heap_unit_lock);
dynamo_vm_areas_unlock();
DELETE_RECURSIVE_LOCK(heap_unit_lock);
DELETE_RECURSIVE_LOCK(global_alloc_lock);
DELETE_RECURSIVE_LOCK(low_on_memory_pending_lock);
#ifdef X64
DELETE_LOCK(request_region_be_heap_reachable_lock);
#endif
if (doing_detach) {
heapmgt = &temp_heapmgt;
IF_X64(reset_heap_reachable_bounds());
}
}
void
heap_post_exit()
{
heap_exiting = false;
}
/* FIXME:
* detect if the app is who we're fighting for memory, if so, don't
* free memory, else the app will just keep grabbing more.
* need a test for hitting 2GB (or 3GB!) user mode limit.
*/
static void
heap_low_on_memory()
{
/* free some memory! */
heap_unit_t *u, *next_u;
DEBUG_DECLARE(size_t freed = 0;)
LOG(GLOBAL, LOG_CACHE | LOG_STATS, 1,
"heap_low_on_memory: about to free dead list units\n");
/* WARNING: this routine is called at arbitrary allocation failure points,
* so we have to be careful what locks we grab
* However, no allocation site can hold a lock weaker in rank than
* heap_unit_lock, b/c it could deadlock on the allocation itself!
* So we're safe.
*/
/* must grab this lock prior to heap_unit_lock if affecting DR vm areas
* this is recursive so ok if we ran out of memory while holding DR vm area lock
*/
ASSERT(safe_to_allocate_or_free_heap_units());
dynamo_vm_areas_lock();
acquire_recursive_lock(&heap_unit_lock);
u = heapmgt->heap.dead;
while (u != NULL) {
next_u = u->next_global;
DODEBUG(freed += UNIT_COMMIT_SIZE(u););
/* FIXME: if out of committed pages only, could keep our reservations */
LOG(GLOBAL, LOG_HEAP, 1, "\tfreeing dead unit " PFX "-" PFX " [-" PFX "]\n", u,
UNIT_COMMIT_END(u), UNIT_RESERVED_END(u));
RSTATS_DEC(heap_num_free);
really_free_unit(u);
u = next_u;
heapmgt->heap.num_dead--;
}
heapmgt->heap.dead = NULL;
release_recursive_lock(&heap_unit_lock);
dynamo_vm_areas_unlock();
LOG(GLOBAL, LOG_CACHE | LOG_STATS, 1, "heap_low_on_memory: freed %d KB\n",
freed / 1024);
/* FIXME: we don't keep a list of guard pages, which we may decide to throw
* out or compact at this time.
*/
/* FIXME: should also fix up the allocator to look in other free lists
* of sizes larger than asked for, we may have plenty of memory available
* in other lists! see comments in common_heap_alloc
*/
}
static const char *
get_oom_source_name(oom_source_t source)
{
/* currently only single character codenames,
* (still as a string though)
*/
const char *code_name = "?";
switch (source) {
case OOM_INIT: code_name = "I"; break;
case OOM_RESERVE: code_name = "R"; break;
case OOM_COMMIT: code_name = "C"; break;
case OOM_EXTEND: code_name = "E"; break;
default: ASSERT_NOT_REACHED();
}
return code_name;
}
static bool
silent_oom_for_process(oom_source_t source)
{
if (TESTANY(OOM_COMMIT | OOM_EXTEND, source) &&
!IS_STRING_OPTION_EMPTY(silent_commit_oom_list)) {
bool onlist;
const char *process_name = get_short_name(get_application_name());
string_option_read_lock();
onlist = check_filter_with_wildcards(DYNAMO_OPTION(silent_commit_oom_list),
process_name);
string_option_read_unlock();
if (onlist) {
SYSLOG_INTERNAL_WARNING("not reporting last words of executable %s",
process_name);
return true;
}
}
return false;
}
/* oom_source_t identifies the action we were taking, os_error_code is
* the returned value from the last system call - opaque at this OS
* independent layer.
*/
static void
report_low_on_memory(which_vmm_t which, oom_source_t source,
heap_error_code_t os_error_code)
{
if (TESTANY(DYNAMO_OPTION(silent_oom_mask), source) ||
silent_oom_for_process(source)) {
SYSLOG_INTERNAL_WARNING("Mostly silent OOM: %s " PFX ".\n",
get_oom_source_name(source), os_error_code);
/* still produce an ldmp for internal use */
if (TEST(DUMPCORE_OUT_OF_MEM_SILENT, DYNAMO_OPTION(dumpcore_mask)))
os_dump_core("Out of memory, silently aborting program.");
} else {
const char *oom_source_code = get_oom_source_name(source);
char type_hex[19];
snprintf(type_hex, BUFFER_SIZE_ELEMENTS(type_hex), PFX, which);
NULL_TERMINATE_BUFFER(type_hex);
char status_hex[19];
snprintf(status_hex, BUFFER_SIZE_ELEMENTS(status_hex), PFX, os_error_code);
NULL_TERMINATE_BUFFER(status_hex);
/* SYSLOG first */
SYSLOG(SYSLOG_CRITICAL, OUT_OF_MEMORY, 4, get_application_name(),
get_application_pid(), oom_source_code, type_hex, status_hex);
/* Stats can be very useful to diagnose why we hit OOM. */
if (INTERNAL_OPTION(rstats_to_stderr))
dump_global_rstats_to_stderr();
/* XXX: case 7296 - ldmp even if we have decided not to produce an event above */
if (TEST(DUMPCORE_OUT_OF_MEM, DYNAMO_OPTION(dumpcore_mask)))
os_dump_core("Out of memory, aborting program.");
/* pass only status code to XML where we should have a stack dump and callstack */
report_diagnostics("Out of memory", status_hex, NO_VIOLATION_BAD_INTERNAL_STATE);
}
os_terminate(NULL, TERMINATE_PROCESS);
ASSERT_NOT_REACHED();
}
/* update statistics for committed memory, and add to vm_areas */
static inline void
account_for_memory(void *p, size_t size, uint prot, bool add_vm,
bool image _IF_DEBUG(const char *comment))
{
RSTATS_ADD_PEAK(memory_capacity, size);
/* case 3045: areas inside the vmheap reservation are not added to the list
* for clients that use DR-allocated memory, we have get_memory_info()
* query from the OS to see inside
*/
if (is_vmm_reserved_address(p, size, NULL, NULL)) {
return;
}
if (add_vm) {
add_dynamo_vm_area(p, ((app_pc)p) + size, prot, image _IF_DEBUG(comment));
} else {
/* due to circular dependencies bet vmareas and global heap we do not call
* add_dynamo_vm_area here, instead we indicate that something has changed
*/
mark_dynamo_vm_areas_stale();
/* NOTE: 'prot' info is lost about this region, but is needed in
* heap_vmareas_synch_units to update all_memory_areas. Currently
* heap_create_unit is the only place that passes 'false' with prot rw-.
*/
ASSERT(TESTALL(MEMPROT_READ | MEMPROT_WRITE, prot));
}
}
/* remove_vm MUST be false iff this is heap memory, which is updated separately */
static void
update_dynamo_areas_on_release(app_pc start, app_pc end, bool remove_vm)
{
if (!vm_areas_exited && !heap_exiting) { /* avoid problems when exiting */
/* case 3045: areas inside the vmheap reservation are not added to the list
* for clients that use DR-allocated memory, we have get_memory_info()
* query from the OS to see inside
*/
if (is_vmm_reserved_address(start, end - start, NULL, NULL)) {
return;
}
if (remove_vm) {
remove_dynamo_vm_area(start, end);
} else {
/* Due to cyclic dependencies bet heap and vmareas we cannot remove
* incrementally. The pending set is protected by the same lock
* needed to synch the vm areas, so we will never mis-identify free
* memory as DR memory.
*/
mark_dynamo_vm_areas_stale();
dynamo_areas_pending_remove = true;
}
}
}
bool
lockwise_safe_to_allocate_memory()
{
/* check whether it's safe to hold a lock that normally can be held
* for memory allocation -- i.e., check whether we hold the
* global_alloc_lock
*/
return !self_owns_recursive_lock(&global_alloc_lock);
}
/* Reserves space inside the VMM region which can be used by the caller for mapping
* mapping a file. First attempts to reserve at "preferred" but if that fails it
* attempts at any available location.
*/
byte *
heap_reserve_for_external_mapping(byte *preferred, size_t size, which_vmm_t which)
{
#ifdef WINDOWS
/* TODO i#3570: Add Windows support, which is complex as we cannot map a file
* on top of an existing reservation; nor can we un-reserve a piece of a
* reservation. See the issue for solution ideas.
*/
ASSERT_NOT_IMPLEMENTED(false && "i#3570");
return NULL;
#endif
vm_addr_t p = NULL;
vm_heap_t *vmh = vmheap_for_which(which);
ASSERT(size > 0);
size = ALIGN_FORWARD(size, PAGE_SIZE);
if (!DYNAMO_OPTION(vm_reserve))
return NULL;
if (preferred >= vmh->start_addr && preferred + size <= vmh->end_addr)
p = vmm_heap_reserve_blocks(vmh, size, preferred, which);
if (p == NULL)
p = vmm_heap_reserve_blocks(vmh, size, NULL, which);
LOG(GLOBAL, LOG_HEAP, 2, "%s %s: size=%d p=" PFX "\n", __FUNCTION__, vmh->name, size,
p);
return p;
}
/* Before calling this function, the caller must restore [p,p+size) to
* its state from before heap_reserve_for_external_mapping() was
* called: reserved but not committed.
*/
bool
heap_unreserve_for_external_mapping(byte *p, size_t size, which_vmm_t which)
{
#ifdef WINDOWS
/* TODO i#3570: Add Windows support, which is complex as we cannot map a file
* on top of an existing reservation; nor can we un-reserve a piece of a
* reservation. See the issue for solution ideas.
*/
ASSERT_NOT_IMPLEMENTED(false && "i#3570");
return false;
#endif
vm_heap_t *vmh = vmheap_for_which(which);
ASSERT(size > 0);
size = ALIGN_FORWARD(size, PAGE_SIZE);
if (!DYNAMO_OPTION(vm_reserve) || !is_vmm_reserved_address(p, size, NULL, NULL))
return false;
vmm_heap_free_blocks(vmh, p, size, which);
LOG(GLOBAL, LOG_HEAP, 2, "%s %s: size=%d p=" PFX "\n", __FUNCTION__, vmh->name, size,
p);
return true;
}
/* we indirect all os memory requests through here so we have a central place
* to handle the out-of-memory condition.
* add_vm MUST be false iff this is heap memory, which is updated separately.
*/
static void *
get_real_memory(size_t size, uint prot, bool add_vm,
which_vmm_t which _IF_DEBUG(const char *comment))
{
void *p;
heap_error_code_t error_code;
/* must round up to page sizes, else vmm_heap_alloc assert triggers */
size = ALIGN_FORWARD(size, PAGE_SIZE);
/* memory alloc/dealloc and updating DR list must be atomic */
dynamo_vm_areas_lock(); /* if already hold lock this is a nop */
p = vmm_heap_alloc(size, prot, &error_code, which);
if (p == NULL) {
SYSLOG_INTERNAL_WARNING_ONCE("Out of memory -- cannot reserve or "
"commit %dKB. Trying to recover.",
size / 1024);
/* we should be ok here, shouldn't come in here holding global_alloc_lock
* or heap_unit_lock w/o first having grabbed DR areas lock
*/
ASSERT(safe_to_allocate_or_free_heap_units());
heap_low_on_memory();
fcache_low_on_memory();
/* try again
* FIXME: have more sophisticated strategy of freeing a little, then getting
* more drastic with each subsequent failure
* FIXME: can only free live fcache units for current thread w/ current
* impl...should we wait a while and try again if out of memory, hoping
* other threads have freed some?!?!
*/
p = vmm_heap_alloc(size, prot, &error_code, which);
if (p == NULL) {
report_low_on_memory(which, OOM_RESERVE, error_code);
}
SYSLOG_INTERNAL_WARNING_ONCE("Out of memory -- but still alive after "
"emergency free.");
}
account_for_memory(p, size, prot, add_vm, false _IF_DEBUG(comment));
dynamo_vm_areas_unlock();
return p;
}
static void
release_memory_and_update_areas(app_pc p, size_t size, bool decommit, bool remove_vm,
which_vmm_t which)
{
heap_error_code_t error_code;
/* these two operations need to be atomic wrt DR area updates */
dynamo_vm_areas_lock(); /* if already hold lock this is a nop */
/* ref case 3035, we must remove from dynamo_areas before we free in case
* we end up allocating memory in the process of removing the area
* (we don't want to end up getting the memory we just freed since that
* would lead to errors in the list when we finally did remove it)
*/
update_dynamo_areas_on_release(p, p + size, remove_vm);
if (decommit)
vmm_heap_decommit(p, size, &error_code, which);
else
vmm_heap_free(p, size, &error_code, which);
ASSERT(error_code == HEAP_ERROR_SUCCESS);
dynamo_vm_areas_unlock();
}
/* remove_vm MUST be false iff this is heap memory, which is updated separately */
static void
release_real_memory(void *p, size_t size, bool remove_vm, which_vmm_t which)
{
/* must round up to page sizes for vmm_heap_free */
size = ALIGN_FORWARD(size, PAGE_SIZE);
release_memory_and_update_areas((app_pc)p, size, false /*free*/, remove_vm, which);
/* avoid problem w/ being called by cleanup_and_terminate after dynamo_process_exit */
if (IF_DEBUG_ELSE(!dynamo_exited_log_and_stats, true))
RSTATS_SUB(memory_capacity, size);
}
static void
extend_commitment(vm_addr_t p, size_t size, uint prot, bool initial_commit,
which_vmm_t which)
{
heap_error_code_t error_code;
ASSERT(ALIGNED(p, PAGE_SIZE));
size = ALIGN_FORWARD(size, PAGE_SIZE);
if (!vmm_heap_commit(p, size, prot, &error_code, which)) {
SYSLOG_INTERNAL_WARNING_ONCE("Out of memory - cannot extend commit "
"%dKB. Trying to recover.",
size / 1024);
heap_low_on_memory();
fcache_low_on_memory();
/* see low-memory ideas in get_real_memory */
if (!vmm_heap_commit(p, size, prot, &error_code, which)) {
report_low_on_memory(which, initial_commit ? OOM_COMMIT : OOM_EXTEND,
error_code);
}
SYSLOG_INTERNAL_WARNING_ONCE("Out of memory in extend - still alive "
"after emergency free.");
}
}
/* A wrapper around get_real_memory that adds a guard page on each side of the
* requested unit. These should consume only uncommitted virtual address and
* should not use any physical memory.
* add_vm MUST be false iff this is heap memory, which is updated separately.
* Non-NULL min_addr is only supported for stack allocations (DrMi#1723).
*/
static vm_addr_t
get_guarded_real_memory(size_t reserve_size, size_t commit_size, uint prot, bool add_vm,
bool guarded, byte *min_addr,
which_vmm_t which _IF_DEBUG(const char *comment))
{
vm_addr_t p = NULL;
uint guard_size = (uint)PAGE_SIZE;
heap_error_code_t error_code;
bool try_vmm = true;
ASSERT(reserve_size >= commit_size);
if (!guarded || !has_guard_pages(which)) {
if (reserve_size == commit_size)
return get_real_memory(reserve_size, prot, add_vm, which _IF_DEBUG(comment));
guard_size = 0;
}
reserve_size = ALIGN_FORWARD(reserve_size, PAGE_SIZE);
commit_size = ALIGN_FORWARD(commit_size, PAGE_SIZE);
reserve_size += 2 * guard_size; /* add top and bottom guards */
/* memory alloc/dealloc and updating DR list must be atomic */
dynamo_vm_areas_lock(); /* if already hold lock this is a nop */
#ifdef WINDOWS
/* DrMi#1723: if we swap TEB stack fields, a client (or a DR app mem touch)
* can trigger an app guard
* page. We have to ensure that the kernel will update TEB.StackLimit in that
* case, which requires our dstack to be higher than the app stack.
* This results in more fragmentation and larger dynamo_areas so we avoid
* if we can. We could consider a 2nd vm_reserve region just for stacks.
*/
if (SWAP_TEB_STACKBASE() && (!DYNAMO_OPTION(vm_reserve) && min_addr > NULL) ||
(DYNAMO_OPTION(vm_reserve) && min_addr > vmheap_get_start())) {
try_vmm = false;
}
#endif
if (try_vmm)
p = vmm_heap_reserve(reserve_size, &error_code, TEST(MEMPROT_EXEC, prot), which);
#ifdef WINDOWS
if (!try_vmm || p < (vm_addr_t)min_addr) {
if (p != NULL)
vmm_heap_free(p, reserve_size, &error_code, which);
p = os_heap_reserve_in_region((void *)ALIGN_FORWARD(min_addr, PAGE_SIZE),
(void *)PAGE_START(POINTER_MAX), reserve_size,
&error_code, TEST(MEMPROT_EXEC, prot));
/* No reason to update heap-reachable b/c stack doesn't need to reach
* (min_addr != NULL assumed to be stack).
*/
ASSERT(!DYNAMO_OPTION(stack_shares_gencode)); /* would break reachability */
/* If it fails we can't do much: we fall back to within-vmm, if possible,
* and rely on our other best-effort TEB.StackLimit updating checks
* (check_app_stack_limit()).
*/
if (p == NULL) {
SYSLOG_INTERNAL_WARNING_ONCE("Unable to allocate dstack above app stack");
if (!try_vmm) {
p = vmm_heap_reserve(reserve_size, &error_code, TEST(MEMPROT_EXEC, prot),
which);
}
}
}
#endif
if (p == NULL) {
/* Very unlikely to happen: we have to reach at least 2GB reserved memory. */
SYSLOG_INTERNAL_WARNING_ONCE("Out of memory - cannot reserve %dKB. "
"Trying to recover.",
reserve_size / 1024);
heap_low_on_memory();
fcache_low_on_memory();
p = vmm_heap_reserve(reserve_size, &error_code, TEST(MEMPROT_EXEC, prot), which);
if (p == NULL) {
report_low_on_memory(which, OOM_RESERVE, error_code);
}
SYSLOG_INTERNAL_WARNING_ONCE("Out of memory on reserve - but still "
"alive after emergency free.");
}
/* includes guard pages if add_vm -- else, heap_vmareas_synch_units() will
* add guard pages in by assuming one page on each side of every heap unit
* if dynamo_options.guard_pages
*/
account_for_memory((void *)p, reserve_size, prot, add_vm, false _IF_DEBUG(comment));
dynamo_vm_areas_unlock();
STATS_ADD_PEAK(reserved_memory_capacity, reserve_size);
STATS_ADD_PEAK(guard_pages, 2);
p += guard_size;
extend_commitment(p, commit_size, prot, true /* initial commit */, which);
return p;
}
/* A wrapper around get_release_memory that also frees the guard pages on each
* side of the requested unit. remove_vm MUST be false iff this is heap memory,
* which is updated separately.
*/
static void
release_guarded_real_memory(vm_addr_t p, size_t size, bool remove_vm, bool guarded,
which_vmm_t which)
{
if (!guarded || !has_guard_pages(which)) {
release_real_memory(p, size, remove_vm, which);
return;
}
size = ALIGN_FORWARD(size, PAGE_SIZE);
size += PAGE_SIZE * 2; /* add top and bottom guards */
p -= PAGE_SIZE;
release_memory_and_update_areas((app_pc)p, size, false /*free*/, remove_vm, which);
/* avoid problem w/ being called by cleanup_and_terminate after dynamo_process_exit */
if (IF_DEBUG_ELSE(!dynamo_exited_log_and_stats, true)) {
RSTATS_SUB(memory_capacity, size);
STATS_SUB(reserved_memory_capacity, size);
STATS_ADD(guard_pages, -2);
}
}
/* use heap_mmap to allocate large chunks of executable memory
* it's mainly used to allocate our fcache units
*/
void *
heap_mmap_ex(size_t reserve_size, size_t commit_size, uint prot, bool guarded,
which_vmm_t which)
{
void *p = get_guarded_real_memory(reserve_size, commit_size, prot, true, guarded,
NULL, which _IF_DEBUG("heap_mmap"));
#ifdef DEBUG_MEMORY
if (TEST(MEMPROT_WRITE, prot))
memset(vmm_get_writable_addr(p, which), HEAP_ALLOCATED_BYTE, commit_size);
#endif
/* We rely on this for freeing _post_stack in absence of dcontext */
ASSERT(!DYNAMO_OPTION(vm_reserve) || !DYNAMO_OPTION(stack_shares_gencode) ||
(ptr_uint_t)p - (guarded ? (GUARD_PAGE_ADJUSTMENT / 2) : 0) ==
ALIGN_BACKWARD(p, DYNAMO_OPTION(vmm_block_size)) ||
at_reset_at_vmm_limit(vmheap_for_which(which)));
LOG(GLOBAL, LOG_HEAP, 2, "heap_mmap: %d bytes [/ %d] @ " PFX "\n", commit_size,
reserve_size, p);
STATS_ADD_PEAK(mmap_capacity, commit_size);
STATS_ADD_PEAK(mmap_reserved_only, (reserve_size - commit_size));
return p;
}
/* Use heap_mmap to allocate large chunks of memory. */
void *
heap_mmap_reserve(size_t reserve_size, size_t commit_size, uint prot, which_vmm_t which)
{
return heap_mmap_ex(reserve_size, commit_size, prot, true, which);
}
/* It is up to the caller to ensure commit_size is a page size multiple,
* and that it does not extend beyond the initial reservation.
*/
void