blob: c05660ca8af325ea78031b9a2b5deacaa911488b [file] [log] [blame]
/* **********************************************************
* Copyright (c) 2011-2014 Google, Inc. All rights reserved.
* Copyright (c) 2000-2010 VMware, Inc. All rights reserved.
* **********************************************************/
/*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* * Neither the name of VMware, Inc. nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
/* Copyright (c) 2003-2007 Determina Corp. */
/* Copyright (c) 2001-2003 Massachusetts Institute of Technology */
/* Copyright (c) 2000-2001 Hewlett-Packard Company */
/*
* fcache.c - fcache manager
*/
#include "globals.h"
#include "link.h"
#include "fragment.h"
#include "fcache.h"
#include "monitor.h"
#ifdef HOT_PATCHING_INTERFACE
# include "hotpatch.h"
#endif
#include <string.h> /* for memcpy */
#include <stddef.h> /* for offsetof */
#include <limits.h> /* for UCHAR_MAX */
#include "perscache.h"
#include "synch.h"
#include "instrument.h"
/* A code cache is made up of multiple separate mmapped units
* We grow a unit by resizing, shifting, and relinking, up to a maximum size,
* at which point we create a separate unit if we need more space
* Cache is extremely flexible in allowing resizing (hard to support)
* and separate units of different sizes, in any combination.
* We will build a unit larger than cache_{bb,trace}_unit_max for a single large request,
* up to the max cache size.
* To save memory, we don't make units larger than 64KB. Not much advantage
* to have huge units.
*/
/* to make it easy to switch to INTERNAL_OPTION */
#define FCACHE_OPTION(o) dynamo_options.o
/*
* unit initial size is FCACHE_OPTION(cache_{bb,trace}_unit_init, default is 32*1024
* it grows by 4X steps up to FCACHE_OPTION(cache_{bb,trace}_unit_quadruple, default is 32*1024
* unit max size is FCACHE_OPTION(cache_{bb,trace}_unit_max, default is 64*1024
* once at max size, we make new units, all of max size
*
* thus default is to do no quadrupling, just a single doubling and then no more resizing
* FIXME: should we stop resizing altogether and just have variable-sized
* separate units? it's not like a 32K unit is too small to keep around...
* OTOH, we want the flexibility of resizing, for servers apps with lots of threads
* we may move the initial unit size smaller
*/
/* Invariant: a cache unit is always at least this constant times the largest
* fragment inside it in size (this can make it larger than cache_{bb,trace}_unit_max)
*/
#define MAX_SINGLE_MULTIPLE 2
/* adaptive working set reactive cache expansion default parameters:
* first expansion(s) free
* FCACHE_OPTION(cache_{bb,trace}_unit_upgrade, default is 64KB so 32=>64 is free
* after that, only expand if regenerated/replaced ratio matches these
* numbers (no floating-point, so we use 2 ints):
* dynamo_options.cache_{bb,trace}_regen default = 10
* dynamo_options.cache_{bb,trace}_replace default = 50
* special cases:
* if cache_{bb,trace}_regen == 0, never increases cache size after free upgrade
* if cache_{bb,trace}_replace == 0, always increases (effectively
* disabling adaptive working set, although the nice way to disable
* is to use -no_finite_{bb,trace}_cache)
*/
/*
* Maximum cache sizes are stored in these two options:
* dynamo_options.cache_bb_max
* dynamo_options.cache_trace_max
* A value of 0 means "infinite"
*/
/* Minimum size to leave as an empty hole, which will be prepended to FIFO
* for private cache and so will be filled even if it means bumping adjacent guys.
* For shared cache, we currently don't fill empty slots, but once we do we will
* only fill with a fragment that fits (case 4485). Shared empties are not prepended
* but rather are placed in the same location in the FIFO as the deleted guy.
* Should be the minimum common fragment size.
*/
#define MIN_EMPTY_HOLE(cache) \
MAX(((cache)->is_trace ? 64U : \
((!(cache)->is_shared && DYNAMO_OPTION(separate_private_stubs)) ? 20U : \
(((cache)->is_shared && DYNAMO_OPTION(separate_shared_stubs)) ? 20U : 64U))), \
MIN_FCACHE_SLOT_SIZE(cache))
/* Minimum end-of-cache hole size -- anything smaller and the cache is "full"
* This is 2x the smallest fragment size
* FIXME: use larger size for trace cache?
*/
#define MIN_UNIT_END_HOLE(cache) ((uint) 2 * MIN_EMPTY_HOLE(cache))
/* This is ignored for coarse fragments */
#define START_PC_ALIGNMENT 4
/* Alignment: we assume basic blocks don't care so much about alignment, we go
* to 4 to avoid sub-word fetches. NOTE we also need at least START_PC_ALIGNMENT
* byte alignment for the start_pc padding for -pad_jmps_shift_{bb,trace} support
* (need at least that much even without the option since we back align the
* start_pc to get the header)
*/
#define SLOT_ALIGNMENT(cache) \
( (cache)->is_trace ? DYNAMO_OPTION(cache_trace_align) : \
((cache)->is_coarse ? DYNAMO_OPTION(cache_coarse_align) : \
DYNAMO_OPTION(cache_bb_align)) )
/* We use a header to have a backpointer to the fragment_t */
/* FIXME: currently this abstraction type is unused, we rather use
* *(fragment_t **) when working with the backpointer. If are to add
* more fields should use this. Although fragment_t may be a better
* place to keep such information. */
typedef struct _live_header_t {
/* FIXME: sizeof(live_header_t) should match HEADER_SIZE */
fragment_t *f;
} live_header_t;
/* NOTE this must be a multiple of START_PC_ALIGNMENT bytes so that the
* start_pc is properly aligned (for -pad_jmps_shift_{bb,trace} support) */
#define HEADER_SIZE(f) \
(TEST(FRAG_COARSE_GRAIN, (f)->flags) ? 0 : (sizeof(fragment_t*)))
#define HEADER_SIZE_FROM_CACHE(cache) \
((cache)->is_coarse ? 0 : (sizeof(fragment_t*)))
/**************************************************
* We use a FIFO replacement strategy
* Empty slots in the cache are always at the front of the list, and they
* take the form of the empty_slot_t struct.
* Target fragments to delete are represented in the FIFO by their fragment_t
* struct, using the next_fcache field to chain them.
* We use a header for each fragment so we can delete adjacent-in-cache
* (different from indirected FIFO list) to make contiguous space available.
* Padding for alignment is always at the end of the fragment, so it can be
* combined with empty space eaten up when deleting a fragment but only needing
* some of its room. We assume everything starts out aligned, and the same alignment
* on the end of each slot keeps it aligned.
* Thus:
* ------
* header
* <up to START_PC_ALIGNMENT-1 bytes padding, stored in the alignment of start_pc>
* start_pc: prefix
* body
* stubs
* <padding for alignment>
* ------
* header
* <up to START_PC_ALIGNMENT-1 bytes padding, stored in the alignment of start_pc>
* start_pc: ...
*/
typedef struct _empty_slot_t {
cache_pc start_pc; /* very top of location in fcache */
/* flags MUST be at same location as fragment_t->flags
* we use flags==FRAG_IS_EMPTY_SLOT to indicate an empty slot
*/
uint flags;
fragment_t *next_fcache; /* for chaining fragments in fcache unit */
fragment_t *prev_fcache; /* for chaining fragments in fcache unit */
uint fcache_size; /* size rounded up to cache line boundaries;
* not just ushort so we can merge adjacents;
* not size_t since each unit assumed <4GB. */
} empty_slot_t;
/* macros to make dealing with both fragment_t and empty_slot_t easier */
#define FRAG_EMPTY(f) (TEST(FRAG_IS_EMPTY_SLOT, (f)->flags))
#define FRAG_START(f) (TEST(FRAG_IS_EMPTY_SLOT, (f)->flags) ? \
((empty_slot_t *)(f))->start_pc : (f)->start_pc)
/* N.B.: must hold cache lock across any set of a fragment's start_pc or size
* once that fragment is in a cache, as contig-cache-walkers need a consistent view!
* FIXME: we can't assert as we can't do a unit lookup at all use sites
*/
#define FRAG_START_ASSIGN(f, val) do { \
if (TEST(FRAG_IS_EMPTY_SLOT, (f)->flags)) \
((empty_slot_t *)(f))->start_pc = (val); \
else \
(f)->start_pc = (val); \
} while (0)
/* for -pad_jmps_shift_{bb,trace} we may have shifted the start_pc forward by up
* to START_PC_ALIGNMENT-1 bytes, back align to get the right header pointer */
#define FRAG_START_PADDING(f) \
((TEST(FRAG_IS_EMPTY_SLOT, (f)->flags) || !PAD_JMPS_SHIFT_START((f)->flags)) ? \
0 : (ASSERT(CHECK_TRUNCATE_TYPE_uint((ptr_uint_t) \
((f)->start_pc - ALIGN_BACKWARD((f)->start_pc, START_PC_ALIGNMENT)))), \
(uint)((ptr_uint_t)(f)->start_pc - \
ALIGN_BACKWARD((f)->start_pc, START_PC_ALIGNMENT))))
#define FRAG_HDR_START(f) (FRAG_START(f) - HEADER_SIZE(f) - FRAG_START_PADDING(f))
#define FRAG_SIZE(f) ((TEST(FRAG_IS_EMPTY_SLOT, (f)->flags)) ? \
((empty_slot_t *)(f))->fcache_size : \
(f)->size + (uint)(f)->fcache_extra + FRAG_START_PADDING(f))
/* N.B.: must hold cache lock across any set of a fragment's start_pc or size
* once that fragment is in a cache, as contig-cache-walkers need a consistent view!
* FIXME: we can't assert as we can't do a unit lookup at all use sites
*/
#define FRAG_SIZE_ASSIGN(f, val) do { \
if (TEST(FRAG_IS_EMPTY_SLOT, (f)->flags)) { \
ASSERT_TRUNCATE(((empty_slot_t *)(f))->fcache_size, uint, val); \
((empty_slot_t *)(f))->fcache_size = (val); \
} else { \
/* cl has string limit so need temp to get ASSERT to compile */ \
uint extra_tmp = ((val) - ((f)->size + FRAG_START_PADDING(f))); \
ASSERT_TRUNCATE((f)->fcache_extra, byte, extra_tmp); \
(f)->fcache_extra = (byte) extra_tmp; \
} \
} while (0)
#define FIFO_NEXT(f) ((TEST(FRAG_IS_EMPTY_SLOT, (f)->flags)) ? \
((empty_slot_t *)(f))->next_fcache : (ASSERT(!TEST(FRAG_SHARED, (f)->flags)), \
((private_fragment_t *)(f))->next_fcache))
#define FIFO_NEXT_ASSIGN(f, val) do { \
if (TEST(FRAG_IS_EMPTY_SLOT, (f)->flags)) \
((empty_slot_t *)(f))->next_fcache = (val); \
else { \
ASSERT(!TEST(FRAG_SHARED, (f)->flags)); \
((private_fragment_t *)(f))->next_fcache = (val); \
} \
} while (0)
#define FIFO_PREV(f) ((TEST(FRAG_IS_EMPTY_SLOT, (f)->flags)) ? \
((empty_slot_t *)(f))->prev_fcache : (ASSERT(!TEST(FRAG_SHARED, (f)->flags)), \
((private_fragment_t *)(f))->prev_fcache))
#define FIFO_PREV_ASSIGN(f, val) do { \
if (TEST(FRAG_IS_EMPTY_SLOT, (f)->flags)) \
((empty_slot_t *)(f))->prev_fcache = (val); \
else { \
ASSERT(!TEST(FRAG_SHARED, (f)->flags)); \
((private_fragment_t *)(f))->prev_fcache = (val); \
} \
} while (0)
#define FRAG_TAG(f) ((TEST(FRAG_IS_EMPTY_SLOT, (f)->flags)) ? \
((empty_slot_t *)(f))->start_pc : (f)->tag)
#ifdef DEBUG
# define FRAG_ID(f) ((TEST(FRAG_IS_EMPTY_SLOT, (f)->flags)) ? -1 : (f)->id)
#endif
#define FIFO_UNIT(f) \
(fcache_lookup_unit((TEST(FRAG_IS_EMPTY_SLOT, (f)->flags)) ? \
(((empty_slot_t *)(f))->start_pc) : ((f)->start_pc)))
/* Shared fragments do NOT use a FIFO as they cannot easily replace existing
* fragments. Instead they use a free list (below).
*/
#define USE_FIFO(f) (!TEST(FRAG_SHARED, (f)->flags))
#define USE_FREE_LIST(f) (TEST(FRAG_SHARED, (f)->flags) && \
!TEST(FRAG_COARSE_GRAIN, (f)->flags))
#define USE_FIFO_FOR_CACHE(c) (!(c)->is_shared)
#define USE_FREE_LIST_FOR_CACHE(c) ((c)->is_shared && !(c)->is_coarse)
/**************************************************
* Free empty slot lists of different sizes for shared caches, where we
* cannot easily delete victims to make room in too-small slots.
*/
static const uint FREE_LIST_SIZES[] = {
/* Since we have variable sizes, and we do not pad to bucket
* sizes, each bucket contains free slots that are in
* [SIZE[bucket], SIZE[bucket+1]) where the top one is infinite.
* case 7318 about further extensions
*
* Free slots in the current unit are added only if in the middle
* of the unit (the last ones are turned back into unclaimed space).
*
* Tuned for bbs: smallest are 40 (with stubs), plurality are 56, distribution trails
* off slowly beyond that. Note that b/c of pad_jmps we request more than
* the final sizes.
* FIXME: these #s are for inlined stubs, should re-tune w/ separate stubs
* (case 7163)
*/
0, 44, 52, 56, 64, 72, 80, 112, 172
};
#define FREE_LIST_SIZES_NUM (BUFFER_SIZE_ELEMENTS(FREE_LIST_SIZES))
/* To support physical cache contiguity walking we store both a
* next-free and prev-free pointer and a size at the top of the empty slot, and
* don't waste memory with an empty_slot_t data structure.
* We also use flags field to allow distinguishing free list slots
* from live fragment_t's (see notes by flags field below).
*
* Our free list coalescing assumes that a fragment_t that follows a
* free list entry has the FRAG_FOLLOWS_FREE_ENTRY flag set.
*
* FIXME: could avoid heap w/ normal empty_slot_t scheme: if don't have
* separate stubs, empty_slot_t @ 20 bytes (no start_pc) should fit in
* any cache slot. Could save a few MB of heap on large apps.
* (This is case 4937.)
*
* FIXME: If free lists work well we could try using for private
* caches as well, instead of the empty_slot_t-struct-on-FIFO scheme.
*
* FIXME: unit pointer may be useful to avoid fcache_lookup_unit
*/
typedef struct _free_list_header_t {
struct _free_list_header_t *next;
/* We arrange these two so that the FRAG_FCACHE_FREE_LIST flag will be set
* at the proper bit as though these were a "uint flags" at the same offset
* in the struct as fragment_t.flags. Since no one else examines a free list
* as though it might be a fragment_t, we don't care about the other flags.
* We have an ASSERT in fcache_init() to ensure the byte ordering is right.
*
* Since we compare a fragment_t* to this inlined struct, we're really
* comparing a fragment_t* to the first field, the next pointer. So when we
* de-reference the flags we're looking at the flags of the next entry in
* the free list. Thus to identify a free list entry we must check for
* either NULL or for the FRAG_FCACHE_FREE_LIST flag.
*/
ushort flags;
ushort size;
struct _free_list_header_t *prev;
} free_list_header_t;
/* We also place a size field at the end of the free list slot.
* Combined w/ the FRAG_FOLLOWS_FREE_ENTRY flag this allows us to coalesce
* new free list entries with existing previous entries.
*/
typedef struct _free_list_footer_t {
ushort size;
} free_list_footer_t;
#define MAX_FREE_ENTRY_SIZE USHRT_MAX
/* See notes above: since f is either fragment_t* or free_list_header_t.next,
* we're checking the next free list entry's flags by dereferencing, forcing a
* check for NULL as well (== end of list).
*/
#define FRAG_IS_FREE_LIST(f) ((f) == NULL || TEST(FRAG_FCACHE_FREE_LIST, (f)->flags))
/* De-references the fragment header stored at the start of the next
* fcache slot, given a pc and a size for the current slot.
*/
#define FRAG_NEXT_SLOT(pc, size) (*((fragment_t **)((pc) + (size))))
/* Caller must know that the next slot is a free slot! */
#define FRAG_NEXT_FREE(pc, size) ((free_list_header_t *)((pc) + (size)))
/* FIXME: for non-free-list-using caches we could shrink this.
* Current smallest bb is 5 bytes (single jmp) align-4 + header is 12,
* and we're at 16 here, so we are wasting some space, but very few
* fragments are under 16 bytes (see request_size_histogram[])
*/
#define MIN_FCACHE_SLOT_SIZE(cache) \
((cache)->is_coarse ? 0 : (sizeof(free_list_header_t)+sizeof(free_list_footer_t)))
/**************************************************
* single mmapped piece of cache
*/
struct _fcache;
#define UNIT_RESERVED_SIZE(u) ((size_t)((u)->reserved_end_pc - (u)->start_pc))
typedef struct _fcache_unit_t {
cache_pc start_pc; /* start address of fcache storage */
cache_pc end_pc; /* end address of committed storage, open-ended */
cache_pc cur_pc; /* if not filled up yet, bottom of cache */
cache_pc reserved_end_pc; /* reservation end address, open-ended */
size_t size; /* committed size: equals (end_pc - start_pc) */
bool full; /* to tell whether cache is filled to end */
struct _fcache *cache; /* up-pointer to parent cache */
#if defined(SIDELINE) || defined(WINDOWS_PC_SAMPLE)
dcontext_t *dcontext;
#endif
bool writable; /* remember state of cache memory protection */
#ifdef WINDOWS_PC_SAMPLE
/* We cache these values for units_to_{flush,free} units whose cache
* field has been invalidated
*/
bool was_trace;
bool was_shared;
profile_t *profile;
#endif
bool pending_free; /* was entire unit flushed and slated for free? */
#ifdef DEBUG
bool pending_flush; /* indicates in-limbo unit pre-flush is still live */
#endif
uint flushtime; /* free this unit when this flushtime is freed --
* used only for units_to_free list, else 0 */
struct _fcache_unit_t *next_global; /* used to link all units */
struct _fcache_unit_t *prev_global; /* used to link all units */
struct _fcache_unit_t *next_local; /* used to link an fcache_t's units */
} fcache_unit_t;
#ifdef DEBUG
/* study request allocation sizes */
enum {
HISTOGRAM_GRANULARITY = 4,
HISTOGRAM_MAX_SIZE = 256,
};
#endif /* DEBUG */
/**************************************************
* one "code cache" of a single type of fragment, made up of potentially
* multiple FcacheUnits
*/
typedef struct _fcache {
/* FIXME: do we want space or perf here (bitfield vs full field)? */
bool is_trace:1; /* for varying alignment, etc. */
bool is_shared:1;
#ifdef DEBUG
/* A local cache's pointer has not escaped to any other thread.
* We only use this flag to get around lock ordering issues w/
* persistent caches and we don't bother to set it for all
* private caches.
*/
bool is_local:1; /* no lock needed since only known to this thread */
#endif
/* Is this a dedicated coarse-grain cache unit */
bool is_coarse:1;
fragment_t *fifo; /* the FIFO list of fragments to delete.
* also includes empty slots as EmptySlots
* (all empty slots are at front of FIFO) */
fcache_unit_t *units; /* list of all units, also FIFO -- the front
* of the list is the only potentially
* non-full unit */
size_t size; /* sum of sizes of all units */
/* can't rely on bb_building_lock b/c shared deletion doesn't hold it,
* and cleaner to have dedicated lock
*/
mutex_t lock;
#ifdef DEBUG
const char *name;
bool consistent; /* used to avoid fcache_fragment_pclookup problems */
#endif
/* backpointer for mapping cache pc to coarse info for inter-unit unlink */
coarse_info_t *coarse_info;
/* we cache parameter here so we don't have to dispatch on bb/trace
* type every time -- this also allows flexibility if we ever want
* to have different parameters per thread or something.
* not much of a space hit at all since there are 2 caches per thread
* and then 2 global caches.
*/
uint max_size; /* maximum sum of sizes */
uint max_unit_size;
uint max_quadrupled_unit_size;
uint free_upgrade_size;
uint init_unit_size;
bool finite_cache;
uint regen_param;
uint replace_param;
/* for adaptive working set: */
uint num_regenerated;
uint num_replaced; /* for shared cache, simply number created */
/* for fifo caches, wset_check is simply an optimization to avoid
* too many checks when parameters are such that regen<<replace
*/
int wset_check;
/* for non-fifo caches, this flag indicates we should start
* recording num_regenerated and num_replaced
*/
bool record_wset;
free_list_header_t *free_list[FREE_LIST_SIZES_NUM];
#ifdef DEBUG
uint free_stats_freed[FREE_LIST_SIZES_NUM]; /* occurrences */
uint free_stats_reused[FREE_LIST_SIZES_NUM]; /* occurrences */
uint free_stats_coalesced[FREE_LIST_SIZES_NUM]; /* occurrences */
uint free_stats_split[FREE_LIST_SIZES_NUM]; /* entry split, occurrences */
uint free_stats_charge[FREE_LIST_SIZES_NUM]; /* bytes on free list */
/* sizes of real requests and frees */
uint request_size_histogram[HISTOGRAM_MAX_SIZE/HISTOGRAM_GRANULARITY];
uint free_size_histogram[HISTOGRAM_MAX_SIZE/HISTOGRAM_GRANULARITY];
#endif
} fcache_t;
/**************************************************
* per-thread structure
*/
typedef struct _fcache_thread_units_t {
fcache_t *bb; /* basic block fcache */
fcache_t *trace; /* trace fcache */
/* we delay unmapping units, but only one at a time: */
cache_pc pending_unmap_pc;
size_t pending_unmap_size;
/* are there units waiting to be flushed at a safe spot? */
bool pending_flush;
} fcache_thread_units_t;
#define ALLOC_DC(dc, cache) ((cache)->is_shared ? GLOBAL_DCONTEXT : (dc))
/* We cannot acquire shared_cache_lock while allsynch-flushing as we then hold
* the lower-ranked shared_vm_areas lock, but the allsynch makes it safe to not
* acquire it.
*/
#define PROTECT_CACHE(cache, op) do { \
if ((cache)->is_shared && !is_self_allsynch_flushing()) { \
mutex_##op(&(cache)->lock); \
} \
} while (0);
#define CACHE_PROTECTED(cache) \
(!(cache)->is_shared || (cache)->is_local || \
OWN_MUTEX(&(cache)->lock) || dynamo_all_threads_synched)
/**************************************************
* global, unique thread-shared structure:
*/
typedef struct _fcache_list_t {
/* These lists are protected by allunits_lock. */
fcache_unit_t *units; /* list of all allocated fcache units */
fcache_unit_t *dead; /* list of deleted units ready for re-allocation */
/* FIXME: num_dead duplicates stats->fcache_num_free, but we want num_dead
* for release build too, so it's separate...can we do better?
*/
uint num_dead;
/* Global lists of cache units to flush and to free, chained by next_local
* and kept on the live units list. Protected by unit_flush_lock, NOT by
* allunits_lock.
* We keep these list pointers on the heap for selfprot (case 8074).
*/
/* units to be flushed once at a safe spot */
fcache_unit_t *units_to_flush;
/* units to be freed once their contents are, kept sorted in
* increasing flushtime, with a tail pointer to make appends easy
*/
fcache_unit_t *units_to_free;
fcache_unit_t *units_to_free_tail;
} fcache_list_t;
/* Kept on the heap for selfprot (case 7957). */
static fcache_list_t *allunits;
/* FIXME: rename to fcache_unit_lock? */
DECLARE_CXTSWPROT_VAR(static mutex_t allunits_lock, INIT_LOCK_FREE(allunits_lock));
DECLARE_CXTSWPROT_VAR(static mutex_t unit_flush_lock, INIT_LOCK_FREE(unit_flush_lock));
static fcache_t *shared_cache_bb;
static fcache_t *shared_cache_trace;
/* To locate the fcache_unit_t corresponding to a fragment or empty slot
* we use an interval data structure rather than waste space with a
* backpointer in each fragment.
* Non-static so that synch routines in os.c can check its lock before calling
* is_pc_recreatable which calls in_fcache.
* Kept on the heap for selfprot (case 7957).
*/
vm_area_vector_t *fcache_unit_areas;
/* indicates a reset is in progress (whereas dynamo_resetting indicates that
* all threads are suspended and so no synch is needed)
*/
DECLARE_FREQPROT_VAR(bool reset_in_progress, false);
/* protects reset triggers: reset_pending, reset_in_progress, reset_at_nth_thread
* FIXME: use separate locks for separate triggers?
* reset_at_nth_thread is wholly inside dynamo.c, e.g.
*/
DECLARE_CXTSWPROT_VAR(mutex_t reset_pending_lock, INIT_LOCK_FREE(reset_pending_lock));
/* indicates a call to fcache_reset_all_caches_proactively() is pending in dispatch */
DECLARE_FREQPROT_VAR(uint reset_pending, 0);
/* these cannot be per-cache since caches are reset so we have them act globally.
* protected by allunits_lock since only read during unit creation.
*/
enum {
CACHE_BB = 0,
CACHE_TRACE,
CACHE_NUM_TYPES,
};
DECLARE_FREQPROT_VAR(uint reset_at_nth_unit[CACHE_NUM_TYPES], {0,});
DECLARE_FREQPROT_VAR(uint reset_every_nth_unit[CACHE_NUM_TYPES], {0,});
#define STATS_FCACHE_ADD(cache, stat, val) DOSTATS({ \
if (cache->is_shared) { \
if (cache->is_trace) \
STATS_ADD(fcache_shared_trace_##stat, val);\
else \
STATS_ADD(fcache_shared_bb_##stat, val); \
} \
else if (cache->is_trace) \
STATS_ADD(fcache_trace_##stat, val); \
else \
STATS_ADD(fcache_bb_##stat, val); \
})
/* convenience routine to avoid casts to signed types everywhere */
#define STATS_FCACHE_SUB(cache, stat, val) \
STATS_FCACHE_ADD(cache, stat, -(stats_int_t)(val))
#define STATS_FCACHE_MAX(cache, stat1, stat2) DOSTATS({ \
if (cache->is_shared) { \
if (cache->is_trace) \
STATS_MAX(fcache_shared_trace_##stat1, fcache_shared_trace_##stat2); \
else \
STATS_MAX(fcache_shared_bb_##stat1, fcache_shared_bb_##stat2); \
} \
else if (cache->is_trace) \
STATS_MAX(fcache_trace_##stat1, fcache_trace_##stat2); \
else \
STATS_MAX(fcache_bb_##stat1, fcache_bb_##stat2); \
})
#ifdef DEBUG
/* forward decl */
# ifdef INTERNAL
static void
verify_fifo(dcontext_t *dcontext, fcache_t *cache);
static void
print_fifo(dcontext_t *dcontext, fcache_t *cache);
# endif
static void
fcache_cache_stats(dcontext_t *dcontext, fcache_t *cache);
#endif
static fcache_t *
fcache_cache_init(dcontext_t *dcontext, uint flags, bool initial_unit);
static void
fcache_cache_free(dcontext_t *dcontext, fcache_t *cache, bool free_units);
static void
add_to_free_list(dcontext_t *dcontext, fcache_t *cache, fcache_unit_t *unit,
fragment_t *f, cache_pc start_pc, uint size);
static void
fcache_free_unit(dcontext_t *dcontext, fcache_unit_t *unit, bool dealloc_or_reuse);
#define CHECK_PARAMS(who, name, ret) do { \
/* make it easier to set max */ \
if (FCACHE_OPTION(cache_##who##_max) > 0 && \
FCACHE_OPTION(cache_##who##_max) < FCACHE_OPTION(cache_##who##_unit_max)) { \
FCACHE_OPTION(cache_##who##_unit_max) = FCACHE_OPTION(cache_##who##_max); \
FCACHE_OPTION(cache_##who##_unit_init) = FCACHE_OPTION(cache_##who##_max); \
FCACHE_OPTION(cache_##who##_unit_quadruple) = FCACHE_OPTION(cache_##who##_max); \
FCACHE_OPTION(cache_##who##_unit_upgrade) = FCACHE_OPTION(cache_##who##_max); \
} \
/* case 7626: don't short-circuit checks, as later ones may be needed */ \
ret = check_param_bounds(&FCACHE_OPTION(cache_##who##_max), \
PAGE_SIZE, \
0, \
name" cache max size") \
|| ret; \
/* N.B.: we assume cache unit max fits in uint */ \
ret = check_param_bounds(&FCACHE_OPTION(cache_##who##_unit_max), \
FCACHE_OPTION(cache_##who##_unit_init), \
FCACHE_OPTION(cache_##who##_max), \
name" cache unit max size") \
|| ret; \
ret = check_param_bounds(&FCACHE_OPTION(cache_##who##_unit_quadruple),\
FCACHE_OPTION(cache_##who##_unit_init), \
FCACHE_OPTION(cache_##who##_max), \
name" cache unit quadruple-to size") \
|| ret; \
ret = check_param_bounds(&FCACHE_OPTION(cache_##who##_unit_upgrade), \
FCACHE_OPTION(cache_##who##_unit_init), \
FCACHE_OPTION(cache_##who##_max), \
name" cache unit free upgrade size") \
|| ret; \
ret = check_param_bounds(&FCACHE_OPTION(cache_##who##_unit_init), \
/* x64 does not support resizing fcache units */ \
IF_X64_ELSE(FCACHE_OPTION(cache_##who##_unit_max), \
PAGE_SIZE), \
FCACHE_OPTION(cache_##who##_unit_max), \
name" cache unit init size") \
|| ret; \
ret = check_param_bounds(&FCACHE_OPTION(cache_commit_increment), \
PAGE_SIZE, \
FCACHE_OPTION(cache_##who##_unit_init), \
name" cache_commit_increment") \
|| ret; \
} while (0);
#define CHECK_WSET_PARAM(param, ret) do { \
if (dynamo_options.cache_##param##_regen < 0) { \
USAGE_ERROR("-cache_"#param"_regen must be >= 0, is %d, setting to 0", \
dynamo_options.cache_##param##_regen); \
dynamo_options.cache_##param##_regen = 0; \
ret = true; \
} \
if (dynamo_options.cache_##param##_replace < 0) { \
USAGE_ERROR("-cache_"#param"_replace must be >= 0, id %d, setting to 0", \
dynamo_options.cache_##param##_replace); \
dynamo_options.cache_##param##_replace = 0; \
ret = true; \
} \
if (dynamo_options.cache_##param##_replace != 0 && \
dynamo_options.cache_##param##_regen > dynamo_options.cache_##param##_replace) { \
USAGE_ERROR("-cache_"#param"_regen (currently %d) must be <= " \
"-cache_"#param"_replace (currently %d) (if -cache_" \
#param"_replace > 0), setting regen to equal replace",\
dynamo_options.cache_##param##_regen, \
dynamo_options.cache_##param##_replace); \
dynamo_options.cache_##param##_regen = dynamo_options.cache_##param##_replace; \
ret = true; \
} \
} while (0);
/* pulled out from fcache_init, checks for compatibility among the fcache
* options, returns true if modified the value of any options to make them
* compatible. This is called while the options are writable. */
bool
fcache_check_option_compatibility()
{
bool ret = false;
uint i;
CHECK_PARAMS(bb, "Basic block", ret);
CHECK_PARAMS(trace, "Trace", ret);
CHECK_WSET_PARAM(bb, ret);
CHECK_WSET_PARAM(trace, ret);
if (DYNAMO_OPTION(shared_bbs)) {
if (DYNAMO_OPTION(cache_shared_bb_max) > 0) {
/* case 8203: NYI */
USAGE_ERROR("-cache_shared_bb_max != 0 not supported");
dynamo_options.cache_shared_bb_max = 0;
ret = true;
}
CHECK_PARAMS(shared_bb, "Shared bb", ret);
CHECK_WSET_PARAM(shared_bb, ret);
/* FIXME: cannot handle resizing of cache, separate units only */
/* case 7626: don't short-circuit checks, as later ones may be needed */
ret = check_param_bounds(&FCACHE_OPTION(cache_shared_bb_unit_init),
FCACHE_OPTION(cache_shared_bb_unit_max),
FCACHE_OPTION(cache_shared_bb_unit_max),
"cache_shared_bb_unit_init should equal cache_shared_bb_unit_max")
|| ret;
}
if (DYNAMO_OPTION(shared_traces)) {
if (DYNAMO_OPTION(cache_shared_trace_max) > 0) {
/* case 8203: NYI */
USAGE_ERROR("-cache_shared_trace_max != 0 not supported");
dynamo_options.cache_shared_trace_max = 0;
ret = true;
}
CHECK_PARAMS(shared_trace, "Shared trace", ret);
CHECK_WSET_PARAM(shared_trace, ret);
/* FIXME: cannot handle resizing of cache, separate units only */
ret = check_param_bounds(&FCACHE_OPTION(cache_shared_trace_unit_init),
FCACHE_OPTION(cache_shared_trace_unit_max),
FCACHE_OPTION(cache_shared_trace_unit_max),
"cache_shared_trace_unit_init should equal cache_shared_trace_unit_max")
|| ret;
}
if (INTERNAL_OPTION(pad_jmps_shift_bb) &&
DYNAMO_OPTION(cache_bb_align) < START_PC_ALIGNMENT) {
USAGE_ERROR("if -pad_jmps_shift_bb, -cache_bb_align must be >= %d",
START_PC_ALIGNMENT);
dynamo_options.cache_bb_align = START_PC_ALIGNMENT;
ret = true;
}
/* (case 8647: cache_coarse_align can be anything as we don't pad jmps) */
if (INTERNAL_OPTION(pad_jmps_shift_trace) &&
DYNAMO_OPTION(cache_trace_align) < START_PC_ALIGNMENT) {
USAGE_ERROR("if -pad_jmps_shift_trace, -cache_trace_align must be >= %d",
START_PC_ALIGNMENT);
dynamo_options.cache_trace_align = START_PC_ALIGNMENT;
ret = true;
}
reset_at_nth_unit[CACHE_BB] = DYNAMO_OPTION(reset_at_nth_bb_unit);
reset_every_nth_unit[CACHE_BB] = DYNAMO_OPTION(reset_every_nth_bb_unit);
reset_at_nth_unit[CACHE_TRACE] = DYNAMO_OPTION(reset_at_nth_trace_unit);
reset_every_nth_unit[CACHE_TRACE] = DYNAMO_OPTION(reset_every_nth_trace_unit);
/* yes can set both to different values -- but every won't kick in until
* after first at
*/
for (i=0; i<CACHE_NUM_TYPES; i++) {
if (reset_every_nth_unit[i] > 0 && reset_at_nth_unit[i] == 0)
reset_at_nth_unit[i] = reset_every_nth_unit[i];
}
return ret;
}
/* thread-shared initialization that should be repeated after a reset */
static void
fcache_reset_init(void)
{
/* case 7966: don't initialize at all for hotp_only & thin_client
* FIXME: could set initial sizes to 0 for all configurations, instead
*/
if (RUNNING_WITHOUT_CODE_CACHE())
return;
if (DYNAMO_OPTION(shared_bbs)) {
shared_cache_bb = fcache_cache_init(GLOBAL_DCONTEXT, FRAG_SHARED, true);
ASSERT(shared_cache_bb != NULL);
LOG(GLOBAL, LOG_CACHE, 1, "Initial shared bb cache is %d KB\n",
shared_cache_bb->init_unit_size/1024);
}
if (DYNAMO_OPTION(shared_traces)) {
shared_cache_trace = fcache_cache_init(GLOBAL_DCONTEXT,
FRAG_SHARED|FRAG_IS_TRACE, true);
ASSERT(shared_cache_trace != NULL);
LOG(GLOBAL, LOG_CACHE, 1, "Initial shared trace cache is %d KB\n",
shared_cache_trace->init_unit_size/1024);
}
}
/* initialization -- needs no locks */
void
fcache_init()
{
ASSERT(offsetof(fragment_t, flags) == offsetof(empty_slot_t, flags));
DOCHECK(1, {
/* ensure flag in ushort is at same spot as in uint */
static free_list_header_t free;
free.flags = FRAG_FAKE | FRAG_FCACHE_FREE_LIST;
ASSERT(TEST(FRAG_FCACHE_FREE_LIST, ((fragment_t*)(&free))->flags));
/* ensure treating fragment_t* as next will work */
ASSERT(offsetof(free_list_header_t, next) == offsetof(live_header_t, f));
});
/* we rely on this */
ASSERT(FREE_LIST_SIZES[0] == 0);
VMVECTOR_ALLOC_VECTOR(fcache_unit_areas, GLOBAL_DCONTEXT,
VECTOR_SHARED | VECTOR_NEVER_MERGE,
fcache_unit_areas);
allunits = HEAP_TYPE_ALLOC(GLOBAL_DCONTEXT, fcache_list_t, ACCT_OTHER, PROTECTED);
allunits->units = NULL;
allunits->dead = NULL;
allunits->num_dead = 0;
allunits->units_to_flush = NULL;
allunits->units_to_free = NULL;
allunits->units_to_free_tail = NULL;
fcache_reset_init();
}
#ifdef WINDOWS_PC_SAMPLE
static void
fcache_unit_profile_stop(fcache_unit_t *u)
{
int sum;
stop_profile(u->profile);
sum = sum_profile(u->profile);
if (sum > 0) {
bool shared, trace;
if (u->cache == NULL) {
shared = u->was_shared;
trace = u->was_trace;
} else {
shared = u->cache->is_shared;
trace = u->cache->is_trace;
}
mutex_lock(&profile_dump_lock);
if (shared) {
print_file(profile_file,
"\nDumping fcache %s unit profile (Shared)\n%d hits\n",
trace ? "trace" : "bb",
sum);
} else {
print_file(profile_file,
"\nDumping fcache %s unit profile (Thread %d)\n%d hits\n",
trace ? "trace" : "bb",
u->dcontext->owning_thread, sum);
}
dump_profile(profile_file, u->profile);
mutex_unlock(&profile_dump_lock);
}
}
#endif
static inline void
remove_unit_from_cache(fcache_unit_t *u)
{
ASSERT(u->cache != NULL);
u->cache->size -= u->size;
RSTATS_DEC(fcache_num_live);
STATS_FCACHE_SUB(u->cache, capacity, u->size);
#ifdef WINDOWS_PC_SAMPLE
/* Units moved to units_to_{flush,free} can't have their profile stopped
* until they are really freed, so we must cache their type here.
* We do need to clear their cache field to support private or other
* deletable flushable unit types (though w/ default ops today no flushable
* unit will have its cache deleted).
*/
u->was_trace = u->cache->is_trace;
u->was_shared = u->cache->is_shared;
#endif
u->cache = NULL;
}
static void
fcache_really_free_unit(fcache_unit_t *u, bool on_dead_list, bool dealloc_unit)
{
if (TEST(SELFPROT_CACHE, dynamo_options.protect_mask) && !u->writable) {
change_protection((void*)u->start_pc, u->size, WRITABLE);
}
#ifdef WINDOWS_PC_SAMPLE
if (u->profile != NULL) {
if (!on_dead_list)
fcache_unit_profile_stop(u);
free_profile(u->profile);
u->profile = NULL;
}
#endif
if (u->cache != NULL)
remove_unit_from_cache(u);
if (on_dead_list) {
ASSERT(u->cache == NULL);
allunits->num_dead--;
RSTATS_DEC(fcache_num_free);
STATS_SUB(fcache_free_capacity, u->size);
}
STATS_SUB(fcache_combined_capacity, u->size);
/* remove from interval data struct first to avoid races w/ it
* being re-used and not showing up in in_fcache
*/
vmvector_remove(fcache_unit_areas, u->start_pc, u->reserved_end_pc);
if (dealloc_unit)
heap_munmap((void*)u->start_pc, UNIT_RESERVED_SIZE(u));
/* always dealloc the metadata */
nonpersistent_heap_free(GLOBAL_DCONTEXT, u, sizeof(fcache_unit_t)
HEAPACCT(ACCT_MEM_MGT));
}
#ifdef DEBUG
/* needs to be called before fragment_exit */
void
fcache_stats_exit()
{
if (DYNAMO_OPTION(shared_bbs)) {
fcache_t *cache = shared_cache_bb;
/* cache may be NULL, for stats called after fcache_exit() */
if (cache != NULL) {
/* FIXME: report_dynamorio_problem() calls
* dump_global_stats() which currently regularly calls
* this, so any ASSERTs on this path will deadlock
* (workaround is to be vigilant and use msgbox_mask).
*/
ASSERT_DO_NOT_OWN_MUTEX(cache->is_shared, &cache->lock);
PROTECT_CACHE(cache, lock);
fcache_cache_stats(GLOBAL_DCONTEXT, cache);
PROTECT_CACHE(cache, unlock);
}
}
if (DYNAMO_OPTION(shared_traces)) {
fcache_t *cache = shared_cache_trace;
if (cache != NULL) {
ASSERT_DO_NOT_OWN_MUTEX(cache->is_shared, &cache->lock);
PROTECT_CACHE(cache, lock);
fcache_cache_stats(GLOBAL_DCONTEXT, cache);
PROTECT_CACHE(cache, unlock);
}
}
}
#endif
/* Free all thread-shared state not critical to forward progress;
* fcache_reset_init() will be called before continuing.
*/
static void
fcache_reset_free(void)
{
fcache_unit_t *u, *next_u;
/* case 7966: don't initialize at all for hotp_only & thin_client
* FIXME: could set initial sizes to 0 for all configurations, instead
*/
if (RUNNING_WITHOUT_CODE_CACHE())
return;
/* FIXME: for reset (not exit), optimize to avoid calling
* fcache_really_free_unit() to move units onto dead list only to delete
* here: should directly delete, but maintain fcache stats.
*/
/* we do not acquire each shared cache's lock for reset, assuming no
* synch issues (plus the lock will be deleted)
*/
if (DYNAMO_OPTION(shared_bbs)) {
fcache_cache_free(GLOBAL_DCONTEXT, shared_cache_bb, true);
shared_cache_bb = NULL;
}
if (DYNAMO_OPTION(shared_traces)) {
fcache_cache_free(GLOBAL_DCONTEXT, shared_cache_trace, true);
shared_cache_trace = NULL;
}
/* there may be units stranded on the to-flush list.
* we must free the units here as they are unreachable elsewhere.
* their fragments will be freed by the fragment htable walk.
*/
mutex_lock(&unit_flush_lock);
u = allunits->units_to_flush;
while (u != NULL) {
next_u = u->next_local;
LOG(GLOBAL, LOG_CACHE, 2,
"@ reset-free freeing to-be-flushed unit "PFX"-"PFX"\n",
u->start_pc, u->end_pc);
fcache_free_unit(GLOBAL_DCONTEXT, u, true);
u = next_u;
}
allunits->units_to_flush = NULL;
mutex_unlock(&unit_flush_lock);
/* should be freed via vm_area_check_shared_pending() */
ASSERT(allunits->units_to_free == NULL);
mutex_lock(&allunits_lock);
u = allunits->dead;
while (u != NULL) {
next_u = u->next_global;
fcache_really_free_unit(u, true/*on dead list*/, true/*dealloc*/);
u = next_u;
}
/* clear fields for reset_init() */
allunits->dead = NULL;
allunits->num_dead = 0;
mutex_unlock(&allunits_lock);
}
/* atexit cleanup -- needs no locks */
void
fcache_exit()
{
fcache_unit_t *u, *next_u;
DOSTATS({
LOG(GLOBAL, LOG_TOP|LOG_THREADS, 1,
"fcache_exit: before fcache cleanup\n");
DOLOG(1, LOG_CACHE, fcache_stats_exit(););
});
fcache_reset_free();
/* free heap for all live units (reset did dead ones) */
mutex_lock(&allunits_lock);
u = allunits->units;
while (u != NULL) {
next_u = u->next_global;
fcache_really_free_unit(u, false/*live*/, true/*dealloc*/);
u = next_u;
}
mutex_unlock(&allunits_lock);
ASSERT(vmvector_empty(fcache_unit_areas));
vmvector_delete_vector(GLOBAL_DCONTEXT, fcache_unit_areas);
HEAP_TYPE_FREE(GLOBAL_DCONTEXT, allunits, fcache_list_t, ACCT_OTHER, PROTECTED);
DELETE_LOCK(allunits_lock);
DELETE_LOCK(reset_pending_lock);
DELETE_LOCK(unit_flush_lock);
}
#if defined(WINDOWS_PC_SAMPLE) && !defined(DEBUG)
/* for fast exit path only, normal path taken care of in free unit*/
void
fcache_profile_exit()
{
fcache_unit_t *u;
mutex_lock(&allunits_lock);
for (u = allunits->units; u != NULL; u = u->next_global) {
if (u->profile) {
fcache_unit_profile_stop(u);
free_profile(u->profile);
u->profile = NULL;
}
}
mutex_unlock(&allunits_lock);
}
#endif
static fcache_unit_t *
fcache_lookup_unit(cache_pc pc)
{
/* let's see if this becomes frequent enough to be a perf hit */
STATS_INC(fcache_unit_lookups);
return (fcache_unit_t *) vmvector_lookup(fcache_unit_areas, pc);
}
/* Returns the fragment_t whose body (not cache slot) contains lookup_pc */
fragment_t *
fcache_fragment_pclookup(dcontext_t *dcontext, cache_pc lookup_pc, fragment_t *wrapper)
{
fragment_t *f, *found = NULL;
cache_pc pc;
fcache_unit_t *unit = fcache_lookup_unit(lookup_pc);
if (unit == NULL)
return NULL;
LOG(THREAD, LOG_CACHE, 5, "fcache_fragment_pclookup "PFX" -> "PFX"-"PFX"\n",
lookup_pc, unit->start_pc, unit->end_pc);
if (unit->cache->is_coarse) {
/* No metadata in cache so we must walk the htable. We shouldn't need
* to lock the cache itself.
*/
coarse_info_t *info = unit->cache->coarse_info;
cache_pc body;
app_pc tag;
ASSERT(info != NULL);
tag = fragment_coarse_pclookup(dcontext, info, lookup_pc, &body);
ASSERT(wrapper != NULL);
fragment_coarse_wrapper(wrapper, tag, body);
return wrapper;
}
PROTECT_CACHE(unit->cache, lock);
DODEBUG({
if (!unit->cache->consistent) {
/* We're in the middle of an fcache operation during which we cannot
* physically walk the cache. ASSUMPTION: this only happens for
* debug builds when we pclookup on disassembly.
*/
PROTECT_CACHE(unit->cache, unlock);
return fragment_pclookup_by_htable(dcontext, lookup_pc, wrapper);
}
});
pc = unit->start_pc;
while (pc < unit->cur_pc && pc < lookup_pc) {
f = *((fragment_t **)pc);
LOG(THREAD, LOG_CACHE, 6, "\treading "PFX" -> "PFX"\n", pc, f);
if (!USE_FIFO_FOR_CACHE(unit->cache)) {
if (FRAG_IS_FREE_LIST(f)) {
pc += ((free_list_header_t *) pc)->size;
continue;
}
}
ASSERT(f != NULL);
ASSERT(FIFO_UNIT(f) == unit);
ASSERT(FRAG_HDR_START(f) == pc);
if (!FRAG_EMPTY(f) &&
lookup_pc < (f->start_pc + f->size) && lookup_pc >= f->start_pc) {
found = f;
LOG(THREAD, LOG_CACHE, 5, "\tfound F%d ("PFX")."PFX"\n",
f->id, f->tag, f->start_pc);
break;
}
/* advance to contiguously-next fragment_t in cache */
pc += FRAG_SIZE(f);
}
PROTECT_CACHE(unit->cache, unlock);
return found;
}
#ifdef DEBUG
static bool
fcache_pc_in_live_unit(fcache_t *cache, cache_pc pc)
{
fcache_unit_t *unit;
for (unit = cache->units; unit != NULL; unit = unit->next_local) {
if (pc >= unit->start_pc && pc < unit->end_pc)
return true;
}
unit = fcache_lookup_unit(pc);
/* pending flush is still considered live: removed from all lists
* just prior to flush synch
*/
if (unit != NULL && unit->pending_flush)
return true;
return false;
}
#endif
bool
fcache_is_writable(fragment_t *f)
{
fcache_unit_t *unit = fcache_lookup_unit(f->start_pc);
ASSERT(unit != NULL);
return unit->writable;
}
/* If f is null, changes protection of entire fcache
* Else, does the unit f is part of
*/
void
fcache_change_fragment_protection(dcontext_t *dcontext, fragment_t *f, bool writable)
{
fcache_unit_t *u;
ASSERT(TEST(SELFPROT_CACHE, dynamo_options.protect_mask));
if (f != NULL) {
u = fcache_lookup_unit(f->start_pc);
ASSERT(u != NULL);
if (u->writable == writable)
return;
change_protection((void*)u->start_pc, u->size, writable);
u->writable = writable;
} else {
/* else, do entire fcache
* win32 does not allow single protection change call on units that
* were allocated with separate calls so we don't try to combine
* adjacent units here
*/
/* FIXME: right now no synch here, so one thread could unprot, another prots,
* and the first segfaults
*/
mutex_lock(&allunits_lock);
u = allunits->units;
while (u != NULL) {
if (u->writable != writable) {
change_protection((void*)u->start_pc, u->size, writable);
u->writable = writable;
}
u = u->next_global;
}
mutex_unlock(&allunits_lock);
}
}
/* return true if pc is in the fcache address space */
/* This routine can be called with a thread suspended in an unknown state.
* Currently only the fcache_unit_areas write lock is checked, so if
* this routine is changed
* to grab any other locks, or call a routine that does, then the
* at_safe_spot() routine in os.c must be updated */
bool
in_fcache(void * pc)
{
return fcache_lookup_unit((cache_pc)pc) != NULL;
}
/* Pass NULL for pc if this routine should allocate the cache space.
* If pc is non-NULL, this routine assumes that size is fully
* committed and initializes accordingly.
*/
static fcache_unit_t *
fcache_create_unit(dcontext_t *dcontext, fcache_t *cache, cache_pc pc, size_t size)
{
fcache_unit_t *u = NULL;
uint cache_type;
ASSERT(CACHE_PROTECTED(cache));
/* currently we assume for FIFO empties that we can't have a single
* contiguous empty slot that overflows a uint.
* minor: all our stats only take ints, as well
*/
ASSERT(CHECK_TRUNCATE_TYPE_uint(size));
ASSERT(ALIGNED(size, PAGE_SIZE));
if (pc == NULL) {
/* take from dead list if possible */
mutex_lock(&allunits_lock);
if (allunits->dead != NULL) {
fcache_unit_t *prev_u = NULL;
u = allunits->dead;
while (u != NULL) {
if (u->size >= size &&
(cache->max_size == 0 || cache->size + u->size <= cache->max_size)) {
/* remove from dead list */
if (prev_u == NULL)
allunits->dead = u->next_global;
else
prev_u->next_global = u->next_global;
LOG(THREAD, LOG_CACHE, 1,
"\tFound unit "PFX" of size %d (need %d) on dead list\n",
u->start_pc, u->size/1024, size/1024);
allunits->num_dead--;
RSTATS_DEC(fcache_num_free);
STATS_SUB(fcache_free_capacity, u->size);
#ifdef WINDOWS_PC_SAMPLE
if (u->profile) {
reset_profile(u->profile);
start_profile(u->profile);
}
#endif
break;
}
prev_u = u;
u = u->next_global;
}
}
mutex_unlock(&allunits_lock);
}
if (u == NULL) {
size_t commit_size;
/* use global heap b/c this can be re-used by later threads */
u = (fcache_unit_t *)
nonpersistent_heap_alloc(GLOBAL_DCONTEXT, sizeof(fcache_unit_t)
HEAPACCT(ACCT_MEM_MGT));
if (pc != NULL) {
u->start_pc = pc;
commit_size = size;
STATS_FCACHE_ADD(cache, claimed, size);
STATS_ADD(fcache_combined_claimed, size);
} else {
/* allocate new unit */
commit_size = DYNAMO_OPTION(cache_commit_increment);
ASSERT(commit_size <= size);
u->start_pc = (cache_pc) heap_mmap_reserve(size, commit_size);
}
ASSERT(u->start_pc != NULL);
ASSERT(proc_is_cache_aligned((void *)u->start_pc));
LOG(THREAD, LOG_HEAP, 3, "fcache_create_unit -> "PFX"\n", u->start_pc);
u->size = commit_size;
u->end_pc = u->start_pc + commit_size;
u->reserved_end_pc = u->start_pc + size;
vmvector_add(fcache_unit_areas, u->start_pc, u->reserved_end_pc, (void *) u);
STATS_ADD(fcache_combined_capacity, u->size);
STATS_MAX(peak_fcache_combined_capacity, fcache_combined_capacity);
#ifdef WINDOWS_PC_SAMPLE
if (dynamo_options.profile_pcs &&
dynamo_options.prof_pcs_fcache >= 2 &&
dynamo_options.prof_pcs_fcache <= 32) {
u->profile = create_profile(u->start_pc, u->reserved_end_pc,
dynamo_options.prof_pcs_fcache, NULL);
start_profile(u->profile);
} else
u->profile = NULL;
#endif
}
cache->size += u->size;
u->cur_pc = u->start_pc;
u->full = false;
u->cache = cache;
#if defined(SIDELINE) || defined(WINDOWS_PC_SAMPLE)
u->dcontext = dcontext;
#endif
u->writable = true;
u->pending_free = false;
DODEBUG({ u->pending_flush = false; });
u->flushtime = 0;
RSTATS_ADD_PEAK(fcache_num_live, 1);
STATS_FCACHE_ADD(u->cache, capacity, u->size);
STATS_FCACHE_MAX(u->cache, capacity_peak, capacity);
u->next_local = NULL; /* must be set by caller */
mutex_lock(&allunits_lock);
if (allunits->units != NULL)
allunits->units->prev_global = u;
u->next_global = allunits->units;
u->prev_global = NULL;
allunits->units = u;
cache_type = cache->is_trace ? CACHE_TRACE : CACHE_BB;
if (reset_at_nth_unit[cache_type] > 0) {
/* reset on nth NEW unit (ignoring total unit count, whether
* some were flushed, etc.)
*/
reset_at_nth_unit[cache_type]--;
if (reset_at_nth_unit[cache_type] == 0) {
schedule_reset(RESET_ALL);
if (reset_every_nth_unit[cache_type] > 0)
reset_at_nth_unit[cache_type] = reset_every_nth_unit[cache_type];
}
}
mutex_unlock(&allunits_lock);
return u;
}
/* this routine does NOT remove the unit from its local cache list */
static void
fcache_free_unit(dcontext_t *dcontext, fcache_unit_t *unit, bool dealloc_or_reuse)
{
DODEBUG({
if (unit->flushtime > 0) {
ASSERT_OWN_MUTEX(true, &unit_flush_lock);
/* we set to 0 to avoid this assert on fcache_reset_exit()
* when freeing dead units -- not needed for release build
*/
unit->flushtime = 0;
} else {
ASSERT(dynamo_exited || dynamo_resetting || CACHE_PROTECTED(unit->cache));
}
});
mutex_lock(&allunits_lock);
/* remove from live list */
if (unit->prev_global != NULL)
unit->prev_global->next_global = unit->next_global;
else
allunits->units = unit->next_global;
if (unit->next_global != NULL)
unit->next_global->prev_global = unit->prev_global;
STATS_FCACHE_SUB(unit->cache, claimed, (unit->cur_pc - unit->start_pc));
STATS_FCACHE_SUB(unit->cache, empty, (unit->cur_pc - unit->start_pc));
STATS_SUB(fcache_combined_claimed, (unit->cur_pc - unit->start_pc));
if (!dealloc_or_reuse) {
/* up to caller to dealloc */
mutex_unlock(&allunits_lock);
/* we do want to update cache->size and fcache_unit_areas: */
fcache_really_free_unit(unit, false/*live*/, false/*do not dealloc unit*/);
}
/* heuristic: don't keep around more dead units than max(5, 1/4 num threads) */
else if (allunits->num_dead < 5 ||
allunits->num_dead * 4U <= (uint) get_num_threads()) {
/* Keep dead list sorted small-to-large to avoid grabbing large
* when can take small and then needing to allocate when only
* have small left. Helps out with lots of small threads.
*/
fcache_unit_t *u, *prev_u;
for (u = allunits->dead, prev_u = NULL;
u != NULL && u->size < unit->size;
prev_u = u, u = u->next_global)
;
/* prev_global and next_local are not used in the dead list */
unit->prev_global = NULL;
unit->next_local = NULL;
if (prev_u == NULL) {
unit->next_global = allunits->dead;
allunits->dead = unit;
} else {
unit->next_global = u;
prev_u->next_global = unit;
}
allunits->num_dead++;
RSTATS_ADD_PEAK(fcache_num_free, 1);
STATS_ADD(fcache_free_capacity, unit->size);
#ifdef WINDOWS_PC_SAMPLE
if (unit->profile)
fcache_unit_profile_stop(unit);
#endif
/* this is done by fcache_really_free_unit for else path */
remove_unit_from_cache(unit);
mutex_unlock(&allunits_lock);
} else {
mutex_unlock(&allunits_lock);
fcache_really_free_unit(unit, false/*live*/, true/*dealloc*/);
}
}
/* assuming size will either be aligned at VM_ALLOCATION_BOUNDARY or
* smaller where no adjustment is necessary
*/
#define FCACHE_GUARDED(size) \
((size) - \
((DYNAMO_OPTION(guard_pages) && \
((size) >= VM_ALLOCATION_BOUNDARY - 2 * PAGE_SIZE)) \
? (2 * PAGE_SIZE) : 0))
#define SET_CACHE_PARAMS(cache, which) do { \
cache->max_size = \
FCACHE_GUARDED(FCACHE_OPTION(cache_##which##_max)); \
cache->max_unit_size = \
FCACHE_GUARDED(FCACHE_OPTION(cache_##which##_unit_max)); \
cache->max_quadrupled_unit_size = \
FCACHE_GUARDED(FCACHE_OPTION(cache_##which##_unit_quadruple)); \
cache->free_upgrade_size = \
FCACHE_GUARDED(FCACHE_OPTION(cache_##which##_unit_upgrade)); \
cache->init_unit_size = \
FCACHE_GUARDED(FCACHE_OPTION(cache_##which##_unit_init)); \
cache->finite_cache = dynamo_options.finite_##which##_cache; \
cache->regen_param = dynamo_options.cache_##which##_regen; \
cache->replace_param = dynamo_options.cache_##which##_replace; \
} while (0);
static fcache_t *
fcache_cache_init(dcontext_t *dcontext, uint flags, bool initial_unit)
{
fcache_t *cache = (fcache_t *)
nonpersistent_heap_alloc(dcontext, sizeof(fcache_t) HEAPACCT(ACCT_MEM_MGT));
cache->fifo = NULL;
cache->size = 0;
cache->is_trace = TEST(FRAG_IS_TRACE, flags);
cache->is_shared = TEST(FRAG_SHARED, flags);
cache->is_coarse = TEST(FRAG_COARSE_GRAIN, flags);
DODEBUG({ cache->is_local = false; });
cache->coarse_info = NULL;
DODEBUG({ cache->consistent = true; });
if (cache->is_shared) {
ASSERT(dcontext == GLOBAL_DCONTEXT);
if (TEST(FRAG_IS_TRACE, flags)) {
DODEBUG({ cache->name = "Trace (shared)"; });
SET_CACHE_PARAMS(cache, shared_trace);
} else if (cache->is_coarse) {
DODEBUG({ cache->name = "Coarse basic block (shared)"; });
SET_CACHE_PARAMS(cache, coarse_bb);
} else {
DODEBUG({ cache->name = "Basic block (shared)"; });
SET_CACHE_PARAMS(cache, shared_bb);
}
} else {
ASSERT(dcontext != GLOBAL_DCONTEXT);
if (TEST(FRAG_IS_TRACE, flags)) {
DODEBUG({ cache->name = "Trace (private)"; });
SET_CACHE_PARAMS(cache, trace);
} else {
DODEBUG({ cache->name = "Basic block (private)"; });
SET_CACHE_PARAMS(cache, bb);
}
}
#ifdef DISALLOW_CACHE_RESIZING
/* cannot handle resizing of cache, separate units only */
cache->init_unit_size = cache->max_unit_size;
#endif
if (cache->is_shared)
ASSIGN_INIT_LOCK_FREE(cache->lock, shared_cache_lock);
if (initial_unit) {
PROTECT_CACHE(cache, lock);
cache->units = fcache_create_unit(dcontext, cache, NULL, cache->init_unit_size);
PROTECT_CACHE(cache, unlock);
} else
cache->units = NULL;
cache->num_regenerated = 0;
cache->num_replaced = 0;
cache->wset_check = 0;
cache->record_wset = false;
if (cache->is_shared) { /* else won't use free list */
memset(cache->free_list, 0, sizeof(cache->free_list));
DODEBUG({
memset(cache->free_stats_freed, 0, sizeof(cache->free_stats_freed));
memset(cache->free_stats_reused, 0, sizeof(cache->free_stats_reused));
memset(cache->free_stats_coalesced, 0, sizeof(cache->free_stats_coalesced));
memset(cache->free_stats_charge, 0, sizeof(cache->free_stats_charge));
memset(cache->free_stats_split, 0, sizeof(cache->free_stats_split));
memset(cache->request_size_histogram, 0,
sizeof(cache->request_size_histogram));
memset(cache->free_size_histogram, 0,
sizeof(cache->free_size_histogram));
});
}
return cache;
}
/* assumption: only called on thread exit path.
* If !free_units, we do not de-allocate or move the units to the dead list,
* but we still remove from the live list.
*/
static void
fcache_cache_free(dcontext_t *dcontext, fcache_t *cache, bool free_units)
{
fcache_unit_t *u, *next_u;
dcontext_t *alloc_dc = ALLOC_DC(dcontext, cache);
DEBUG_DECLARE(size_t cache_size = cache->size;)
DEBUG_DECLARE(size_t size_check = 0;)
if (USE_FIFO_FOR_CACHE(cache)) {
fragment_t *f, *nextf;
ASSERT(cache->consistent);
for (f = cache->fifo; f != NULL; f = nextf) {
/* fragment exit may have already happened, but we won't deref
* freed memory here since fragments will have been removed from FIFO
*/
nextf = FIFO_NEXT(f);
if (FRAG_EMPTY(f)) {
nonpersistent_heap_free(alloc_dc, f, sizeof(empty_slot_t)
HEAPACCT(ACCT_FCACHE_EMPTY));
}
}
cache->fifo = NULL;
}
ASSERT(cache->fifo == NULL);
u = cache->units;
while (u != NULL) {
DODEBUG({ size_check += u->size; });
next_u = u->next_local;
fcache_free_unit(dcontext, u, free_units);
u = next_u;
}
/* we must use pre-cached cache_size since fcache_free_unit decrements it */
ASSERT(size_check == cache_size);
ASSERT(cache->size == 0);
if (cache->is_shared)
DELETE_LOCK(cache->lock);
nonpersistent_heap_free(alloc_dc, cache, sizeof(fcache_t) HEAPACCT(ACCT_MEM_MGT));
}
#ifdef DEBUG
void
fcache_free_list_consistency(dcontext_t *dcontext, fcache_t *cache, int bucket)
{
uint live = 0;
uint charge = 0;
uint waste = 0;
free_list_header_t *header = cache->free_list[bucket];
uint prev_size = 0;
fcache_unit_t *unit;
LOG(GLOBAL, LOG_CACHE, 3, "fcache_free_list_consistency %s bucket[%2d], %3d bytes\n",
cache->name, bucket, FREE_LIST_SIZES[bucket]);
/* walk list, and verify counters */
while (header != NULL) {
cache_pc start_pc = (cache_pc) header;
uint size = header->size;
ASSERT(size >= FREE_LIST_SIZES[bucket] &&
size <= MAX_FREE_ENTRY_SIZE &&
(bucket == FREE_LIST_SIZES_NUM-1 || size < FREE_LIST_SIZES[bucket+1]));
/* FIXME: should ASSERT entries in a bucket are all sorted
* properly, when we start keeping them in order */
ASSERT(prev_size < size || true /* not sorted yet */);
prev_size = size;
ASSERT(header->next == NULL || header->next->prev == header);
ASSERT(header->prev == NULL || header->prev->next == header);
ASSERT(FRAG_IS_FREE_LIST((fragment_t *)header));
unit = fcache_lookup_unit(start_pc);
if (unit->cur_pc > start_pc + header->size) {
fragment_t *subseq = FRAG_NEXT_SLOT(start_pc, header->size);
/* should NOT be followed by a free list entry; should instead be
* followed by a marked fragment_t.
*/
ASSERT((!FRAG_IS_FREE_LIST(subseq) &&
TEST(FRAG_FOLLOWS_FREE_ENTRY, subseq->flags)) ||
/* ok to have subsequent free entry if unable to coalesce due
* to ushort size limits */
size + FRAG_NEXT_FREE(start_pc, header->size)->size >
MAX_FREE_ENTRY_SIZE);
}
/* Invariant: no free list entry at append point */
ASSERT(unit->full || unit->cur_pc != start_pc + header->size);
header = header->next;
/* maximum waste if this entry is used. The scheme before
* case 7318 was really wasting memory, for comparison here */
LOG(GLOBAL, LOG_CACHE, 4, "\t @"PFX": %3d bytes, %3d max waste\n",
start_pc, size, size - FREE_LIST_SIZES[bucket]);
live++;
charge += size;
waste += size - FREE_LIST_SIZES[bucket];
}
ASSERT(live == cache->free_stats_freed[bucket] -
(cache->free_stats_reused[bucket] + cache->free_stats_coalesced[bucket]));
ASSERT(charge == cache->free_stats_charge[bucket]);
ASSERT(waste >= (
/* waste estimate =
charged bytes - live entries * bucket _minimal_ size */
cache->free_stats_charge[bucket] -
(cache->free_stats_freed[bucket] -
(cache->free_stats_reused[bucket] +
cache->free_stats_coalesced[bucket])) *
FREE_LIST_SIZES[bucket]));
LOG(GLOBAL, LOG_CACHE, 2, "\t#%2d %3d bytes == %d live, %8d charge, %8d waste\n",
bucket,
FREE_LIST_SIZES[bucket],
live,
charge,
waste);
}
/* FIXME: put w/ periodic stats dumps and not only at end? */
static void
fcache_cache_stats(dcontext_t *dcontext, fcache_t *cache)
{
fcache_unit_t *u;
int i = 0;
size_t capacity = 0;
size_t used = 0;
bool full = true;
ASSERT_OWN_MUTEX(!dynamo_exited && cache->is_shared, &cache->lock);
for (u = cache->units; u != NULL; u = u->next_local, i++) {
capacity += u->size;
used += u->cur_pc - u->start_pc;
full &= u->full;
LOG(THREAD, LOG_CACHE, 1,
"\t%s unit %d @"PFX": capacity %d KB, used %d KB, %s\n",
cache->name, i, u->start_pc, u->size/1024, (u->cur_pc - u->start_pc)/1024,
u->full ? "full" : "not full");
}
LOG(THREAD, LOG_CACHE, 1, "%s cache: capacity %d KB, used %d KB, %s\n",
cache->name, capacity/1024, used/1024, full ? "full" : "not full");
if (DYNAMO_OPTION(cache_shared_free_list) &&
cache->is_shared) { /* using free list */
int bucket;
LOG(GLOBAL, LOG_CACHE, 1, "fcache %s free list stats:\n", cache->name);
for (bucket = 0; bucket < FREE_LIST_SIZES_NUM; bucket++) {
LOG(GLOBAL, LOG_ALL, 1,
"\t#%2d %3d bytes : %7d free, %7d reuse, %5d coalesce, %5d split\n"
"\t %3d bytes : %5d live, %8d charge, %8d waste\n",
bucket,
FREE_LIST_SIZES[bucket],
cache->free_stats_freed[bucket],
cache->free_stats_reused[bucket],
cache->free_stats_coalesced[bucket],
cache->free_stats_split[bucket],
FREE_LIST_SIZES[bucket],
cache->free_stats_freed[bucket] -
(cache->free_stats_reused[bucket] + cache->free_stats_coalesced[bucket]),
cache->free_stats_charge[bucket],
/* waste = charged bytes - live entries * bucket _minimal_ size */
cache->free_stats_charge[bucket] -
(cache->free_stats_freed[bucket] -
(cache->free_stats_reused[bucket] +
cache->free_stats_coalesced[bucket])) *
FREE_LIST_SIZES[bucket]);
}
DOLOG(1, LOG_CACHE, {
/* FIXME: add in all debug runs, if not too slow */
for (bucket = 0; bucket < FREE_LIST_SIZES_NUM; bucket++) {
fcache_free_list_consistency(dcontext, cache, bucket);
}
});
LOG(GLOBAL, LOG_ALL, 1, "fcache %s requests and frees histogram:\n", cache->name);
for (bucket = 0;
bucket < sizeof(cache->request_size_histogram)/
sizeof(cache->request_size_histogram[0]);
bucket++) {
if (cache->request_size_histogram[bucket] != 0 ||
cache->free_size_histogram[bucket] != 0) {
LOG(GLOBAL, LOG_ALL, 1, "\t# %3d bytes == %8d requests %8d freed\n",
bucket*HISTOGRAM_GRANULARITY,
cache->request_size_histogram[bucket],
cache->free_size_histogram[bucket]);
}
}
}
}
static inline
int
get_histogram_bucket(int size)
{
uint histogram_bucket = size/HISTOGRAM_GRANULARITY;
if (histogram_bucket >= HISTOGRAM_MAX_SIZE/HISTOGRAM_GRANULARITY)
histogram_bucket = (HISTOGRAM_MAX_SIZE/HISTOGRAM_GRANULARITY) - 1;
return histogram_bucket;
}
#endif /* DEBUG */
static void
fcache_shift_fragments(dcontext_t *dcontext, fcache_unit_t *unit, ssize_t shift,
cache_pc start, cache_pc end, size_t old_size)
{
fragment_t *f;
cache_pc pc, new_pc;
ASSERT_OWN_MUTEX(unit->cache->is_shared, &unit->cache->lock);
/* free list scheme can't be walked expecting FIFO headers */
ASSERT(!DYNAMO_OPTION(cache_shared_free_list) || !unit->cache->is_shared);
/* would need to re-relativize fcache exit prefix for coarse */
ASSERT(!unit->cache->is_coarse);
DODEBUG({ unit->cache->consistent = false; });
LOG(THREAD, LOG_CACHE, 2, "fcache_shift_fragments: first pass\n");
/* walk the physical cache and shift each fragment
* fine to walk the old memory, we just need the fragment_t* pointers
*/
pc = unit->start_pc;
LOG(THREAD, LOG_CACHE, 2, " unit "PFX"-"PFX" [-"PFX"]\n",
pc, unit->end_pc, unit->reserved_end_pc);
while (pc < unit->cur_pc) {
f = *((fragment_t **)pc);
ASSERT(f != NULL);
ASSERT(FIFO_UNIT(f) == unit); /* sanity check */
if (FRAG_EMPTY(f))
FRAG_START_ASSIGN(f, FRAG_START(f) + shift);
else {
LOG(THREAD, LOG_CACHE, 5, "\treading "PFX" -> "PFX" = F%d\n", pc, f, f->id);
fragment_shift_fcache_pointers(dcontext, f, shift, start, end, old_size);
}
/* now that f->start_pc is updated, update the backpointer */
new_pc = FRAG_HDR_START(f);
LOG(THREAD, LOG_CACHE, 4, "resize: writing "PFX" to "PFX"\n", f, new_pc);
*((fragment_t **)new_pc) = f;
/* move to contiguously-next fragment_t in cache */
pc += FRAG_SIZE(f);
}
DOLOG(2, LOG_FRAGMENT, {
/* need to check for consistency all tables at this point */
study_all_hashtables(dcontext);
});
LOG(THREAD, LOG_CACHE, 2, "fcache_shift_fragments: second pass\n");
/* have to do a second pass to link them to each other */
pc = unit->start_pc;
LOG(THREAD, LOG_CACHE, 2, " unit "PFX"-"PFX" [-"PFX"]\n",
pc, unit->end_pc, unit->reserved_end_pc);
while (pc < unit->cur_pc) {
f = *((fragment_t **)pc);
ASSERT(f != NULL);
/* can't repeat the FIFO_UNIT(f)==unit check b/c we've already adjusted
* f->start_pc, which is used to find the unit
*/
if (!FRAG_EMPTY(f)) {
LOG(THREAD, LOG_CACHE, 5, "\treading "PFX" -> "PFX" = F%d\n", pc, f, f->id);
/* inter-cache links must be redone:
* we have links from bb cache to trace cache, and sometimes links
* the other direction, for example from a trace to a bb that
* cannot be a trace head (e.g., is marked CANNOT_BE_TRACE).
* simplest to re-link every fragment in the shifted cache.
* N.B.: we do NOT need to re-link the outgoing, since
* fragment_shift_fcache_pointers re-relativized all outgoing
* ctis by the shifted amount.
*/
if (TEST(FRAG_LINKED_INCOMING, f->flags)) {
unlink_fragment_incoming(dcontext, f);
link_fragment_incoming(dcontext, f, false/*not new*/);
}
}
/* move to contiguously-next fragment_t in cache */
pc += FRAG_SIZE(f);
}
DODEBUG({ unit->cache->consistent = true; });
}
static void
cache_extend_commitment(fcache_unit_t *unit, size_t commit_size)
{
ASSERT(unit != NULL);
ASSERT(ALIGNED(commit_size, DYNAMO_OPTION(cache_commit_increment)));
heap_mmap_extend_commitment(unit->end_pc, commit_size);
unit->end_pc += commit_size;
unit->size += commit_size;
unit->cache->size += commit_size;
unit->full = false;
STATS_FCACHE_ADD(unit->cache, capacity, commit_size);
STATS_FCACHE_MAX(unit->cache, capacity_peak, capacity);
STATS_ADD(fcache_combined_capacity, commit_size);
ASSERT(unit->end_pc <= unit->reserved_end_pc);
ASSERT(unit->size <= UNIT_RESERVED_SIZE(unit));
}
/* FIXME case 8617: now that we have cache commit-on-demand we should
* make the private-configuration caches larger. We could even get
* rid of the fcache shifting.
*/
/* i#696: We're not getting rid of fcache shifting yet, but it is incompatible
* with labels-as-values since we can't patch those absolute addresses.
*/
static void
fcache_increase_size(dcontext_t *dcontext, fcache_t *cache, fcache_unit_t *unit,
uint slot_size)
{
bool reallocated = false;
cache_pc new_memory = NULL;
ssize_t shift;
size_t new_size = unit->size;
size_t commit_size;
/* i#696: Incompatible with clients that use labels-as-values. */
IF_CLIENT_INTERFACE(ASSERT(!dr_bb_hook_exists() &&
!dr_trace_hook_exists()));
/* we shouldn't come here if we have reservation room */
ASSERT(unit->reserved_end_pc == unit->end_pc);
if (new_size*4 <= cache->max_quadrupled_unit_size)
new_size *= 4;
else
new_size *= 2;
if (new_size < slot_size * MAX_SINGLE_MULTIPLE)
new_size = ALIGN_FORWARD(slot_size * MAX_SINGLE_MULTIPLE, PAGE_SIZE);
/* unit limit */
if (new_size > cache->max_unit_size)
new_size = cache->max_unit_size;
/* total cache limit */
if (cache->max_size != 0 && cache->size - unit->size + new_size > cache->max_size) {
new_size = cache->max_size - cache->size + unit->size;
}
commit_size = new_size; /* should be re-set below, this makes compiler happy */
/* FIXME: shouldn't this routine return whether it
* allocated enough space for slot_size?
*/
ASSERT(unit->size + slot_size <= new_size);
LOG(THREAD, LOG_CACHE, 2, "Increasing %s unit size from %d KB to %d KB\n",
cache->name, unit->size/1024, new_size/1024);
#ifdef DISALLOW_CACHE_RESIZING
SYSLOG_INTERNAL_ERROR("This build cannot handle cache resizing");
ASSERT_NOT_REACHED();
#endif
ASSERT(CACHE_PROTECTED(cache));
/* take from dead list if possible */
if (allunits->dead != NULL) {
fcache_unit_t *u, *prev_u;
mutex_lock(&allunits_lock);
u = allunits->dead;
prev_u = NULL;
while (u != NULL) {
if (UNIT_RESERVED_SIZE(u) >= new_size) {
fcache_thread_units_t *tu = (fcache_thread_units_t *)
dcontext->fcache_field;
/* remove from dead list */
if (prev_u == NULL)
allunits->dead = u->next_global;
else
prev_u->next_global = u->next_global;
allunits->num_dead--;
RSTATS_DEC(fcache_num_free);
STATS_SUB(fcache_free_capacity, u->size);
LOG(THREAD, LOG_CACHE, 1, "\tFound unit of size %d on dead list\n",
u->size/1024);
new_memory = u->start_pc;
ASSERT_TRUNCATE(new_size, uint, UNIT_RESERVED_SIZE(u));
new_size = UNIT_RESERVED_SIZE(u);
u->cache = cache;
/* Add to stats prior to extending commit as that will add the
* extension amount itself.
* Don't need to add to combined capacity: it includes free.
*/
STATS_FCACHE_ADD(cache, capacity, u->size);
STATS_FCACHE_MAX(cache, capacity_peak, capacity);
if (u->size < new_size) { /* case 8688: fill out to promised size */
size_t new_commit =
ALIGN_FORWARD(new_size - u->size,
DYNAMO_OPTION(cache_commit_increment));
ASSERT(u->size + new_commit <= UNIT_RESERVED_SIZE(u));
cache_extend_commitment(u, new_commit);
/* we increase cache's size below so undo what
* cache_extend_commitment did */
u->cache->size -= new_commit;
ASSERT(u->size >= new_size);
}
commit_size = u->size;
/* use unit's fcache_unit_t struct but u's mmap space */
ASSERT(tu->pending_unmap_pc == NULL);
tu->pending_unmap_pc = unit->start_pc;
tu->pending_unmap_size = UNIT_RESERVED_SIZE(unit);
STATS_FCACHE_SUB(cache, capacity, unit->size);
STATS_SUB(fcache_combined_capacity, unit->size);
#ifdef WINDOWS_PC_SAMPLE
if (u->profile != NULL) {
free_profile(u->profile);
u->profile = NULL;
}
#endif
/* need to replace u with unit: we remove from fcache_unit_areas
* here and re-add down below
*/
vmvector_remove(fcache_unit_areas, u->start_pc, u->reserved_end_pc);
nonpersistent_heap_free(GLOBAL_DCONTEXT, u, sizeof(fcache_unit_t)
HEAPACCT(ACCT_MEM_MGT));
break;
}
prev_u = u;
u = u->next_global;
}
mutex_unlock(&allunits_lock);
}
if (new_memory == NULL) {
/* allocate new memory for unit */
reallocated = true;
commit_size = 0;
while (commit_size < slot_size && unit->size + commit_size < new_size) {
commit_size += DYNAMO_OPTION(cache_commit_increment);
}
/* FIXME: If not we have a problem -- this routine should return failure */
ASSERT(commit_size >= slot_size);
commit_size += unit->size;
ASSERT(commit_size <= new_size);
new_memory = (cache_pc) heap_mmap_reserve(new_size, commit_size);
STATS_FCACHE_SUB(cache, capacity, unit->size);
STATS_FCACHE_ADD(cache, capacity, commit_size);
STATS_FCACHE_MAX(cache, capacity_peak, capacity);
STATS_SUB(fcache_combined_capacity, unit->size);
STATS_ADD(fcache_combined_capacity, commit_size);
STATS_MAX(peak_fcache_combined_capacity, fcache_combined_capacity);
LOG(THREAD, LOG_HEAP, 3, "fcache_increase_size -> "PFX"\n", new_memory);
ASSERT(new_memory != NULL);
ASSERT(proc_is_cache_aligned((void *)new_memory));
}
/* while we can handle resizing any unit, we only expect to resize
* the initial unit in a cache until it reaches the max unit size.
*/
ASSERT(unit == cache->units && unit->next_local == NULL);
/* copy old data over to new memory */
memcpy(new_memory, unit->start_pc, unit->size);
/* update pc-relative into-cache or out-of-cache pointers
* also update stored addresses like start pc
* assumption: all intra-cache links will still work!
* they're all relative, we copied entire cache!
*/
shift = (new_memory - unit->start_pc);
/* make sure we don't screw up any alignment */
ASSERT(ALIGNED(shift, proc_get_cache_line_size()));
ASSERT(ALIGNED(shift, PAD_JMPS_ALIGNMENT));
fcache_shift_fragments(dcontext, unit, shift,
new_memory, new_memory + new_size, unit->size);
/* now change unit fields */
if (reallocated) {
/* de-allocate old memory -- not now, but next time we're in
* fcache_add_fragment, b/c the current ilist for being-added fragment
* may reference memory in old cache
*/
fcache_thread_units_t *tu = (fcache_thread_units_t *) dcontext->fcache_field;
ASSERT(tu->pending_unmap_pc == NULL);
tu->pending_unmap_pc = unit->start_pc;
tu->pending_unmap_size = UNIT_RESERVED_SIZE(unit);
}
/* whether newly allocated or taken from dead list, increase cache->size
* by the difference between new size and old size
*/
cache->size -= unit->size;
cache->size += commit_size;
unit->cur_pc += shift;
unit->start_pc = new_memory;
unit->size = commit_size;
unit->end_pc = unit->start_pc + commit_size;
unit->reserved_end_pc = unit->start_pc + new_size;
vmvector_add(fcache_unit_areas, unit->start_pc,
unit->reserved_end_pc, (void *) unit);
unit->full = false; /* reset */
#ifdef WINDOWS_PC_SAMPLE
{
/* old unit was copied to start of enlarged unit, can copy old prof
* buffer to start of new buffer and maintain correspondence
*/
profile_t *old_prof = unit->profile;
if (old_prof != NULL) {
unit->profile = create_profile(unit->start_pc, unit->reserved_end_pc,
dynamo_options.prof_pcs_fcache,
NULL);
stop_profile(old_prof);
ASSERT(unit->profile->buffer_size >= old_prof->buffer_size);
memcpy(unit->profile->buffer, old_prof->buffer,
old_prof->buffer_size);
free_profile(old_prof);
start_profile(unit->profile);
}
}
#endif
DOLOG(2, LOG_CACHE, { verify_fifo(dcontext, cache); });
LOG(THREAD, LOG_CACHE, 1, "\tDone increasing unit size\n");
}
static void
fcache_thread_reset_init(dcontext_t *dcontext)
{
/* nothing */
}
void
fcache_thread_init(dcontext_t *dcontext)
{
fcache_thread_units_t *tu = (fcache_thread_units_t *)
heap_alloc(dcontext, sizeof(fcache_thread_units_t) HEAPACCT(ACCT_OTHER));
dcontext->fcache_field = (void *) tu;
/* don't build trace cache until we actually build a trace
* this saves memory for both DYNAMO_OPTION(disable_traces) and for
* idle threads that never do much
*/
tu->trace = NULL;
/* in fact, let's delay both, cost is single conditional in fcache_add_fragment,
* once we have that conditional for traces it's no extra cost for bbs
*/
tu->bb = NULL;
tu->pending_unmap_pc = NULL;
tu->pending_flush = false;
fcache_thread_reset_init(dcontext);
}
/* see if a fragment with that tag has existed, ever, in any cache */
bool
fragment_lookup_deleted(dcontext_t *dcontext, app_pc tag)
{
future_fragment_t *fut;
if (SHARED_FRAGMENTS_ENABLED() && dcontext != GLOBAL_DCONTEXT) {
fut = fragment_lookup_private_future(dcontext, tag);
if (fut != NULL)
return TEST(FRAG_WAS_DELETED, fut->flags);
/* if no private, lookup shared */
}
fut = fragment_lookup_future(dcontext, tag);
return (fut != NULL && TEST(FRAG_WAS_DELETED, fut->flags));
}
/* find a fragment that existed in the same type of cache */
static future_fragment_t *
fragment_lookup_cache_deleted(dcontext_t *dcontext, fcache_t *cache, app_pc tag)
{
future_fragment_t *fut;
if (!cache->is_shared) {
/* only look for private futures, since we only care about whether this
* cache needs to be resized, and thus only if we kicked tag out of
* this cache, not whether we kicked it out of the shared cache
*/
fut = fragment_lookup_private_future(dcontext, tag);
} else
fut = fragment_lookup_future(dcontext, tag);
if (fut != NULL && TEST(FRAG_WAS_DELETED, fut->flags))
return fut;
else
return NULL;
}
#ifdef DEBUG
/* this routine is separate from fcache_thread_exit because it needs to
* be run before fragment_thread_exit, whereas the real fcache cleanup
* needs to be done after fragment's cleanup
*/
void
fcache_thread_exit_stats(dcontext_t *dcontext)
{
DOELOG(1, LOG_CACHE, {
fcache_thread_units_t *tu = (fcache_thread_units_t *) dcontext->fcache_field;
if (tu->bb != NULL)
fcache_cache_stats(dcontext, tu->bb);
if (tu->trace != NULL)
fcache_cache_stats(dcontext, tu->trace);
});
}
#endif
static void
fcache_thread_reset_free(dcontext_t *dcontext)
{
fcache_thread_units_t *tu = (fcache_thread_units_t *) dcontext->fcache_field;
if (tu->pending_unmap_pc != NULL) {
/* de-allocate old memory -- stats have already been taken care of */
/* remove from interval data struct first to avoid races w/ it
* being re-used and not showing up in in_fcache
*/
vmvector_remove(fcache_unit_areas, tu->pending_unmap_pc,
tu->pending_unmap_pc+tu->pending_unmap_size);
heap_munmap(tu->pending_unmap_pc, tu->pending_unmap_size);
tu->pending_unmap_pc = NULL;
}
if (tu->bb != NULL) {
fcache_cache_free(dcontext, tu->bb, true);
tu->bb = NULL;
}
if (tu->trace != NULL) {
fcache_cache_free(dcontext, tu->trace, true);
tu->trace = NULL;
}
}
void
fcache_thread_exit(dcontext_t *dcontext)
{
DEBUG_DECLARE(fcache_thread_units_t *tu =
(fcache_thread_units_t *) dcontext->fcache_field;)
fcache_thread_reset_free(dcontext);
DODEBUG({
/* for non-debug we do fast exit path and don't free local heap */
heap_free(dcontext, tu, sizeof(fcache_thread_units_t) HEAPACCT(ACCT_OTHER));
});
}
#if defined(DEBUG) && defined(INTERNAL)
static void
print_fifo(dcontext_t *dcontext, fcache_t *cache)
{
fragment_t *f = cache->fifo;
ASSERT(USE_FIFO_FOR_CACHE(cache));
ASSERT(CACHE_PROTECTED(cache));
while (f != NULL) {
/* caller sets loglevel */
LOG(THREAD, LOG_CACHE, 1, "\tF%d "PFX" = @"PFX" size %d\n",
FRAG_ID(f), FRAG_TAG(f), FRAG_HDR_START(f), FRAG_SIZE(f));
f = FIFO_NEXT(f);
}
}
static void
verify_fifo(dcontext_t *dcontext, fcache_t *cache)
{
fragment_t *f = cache->fifo;
fcache_unit_t *u;
cache_pc pc;
ASSERT(USE_FIFO_FOR_CACHE(cache));
ASSERT(CACHE_PROTECTED(cache));
while (f != NULL) {
LOG(THREAD, LOG_CACHE, 6, "\t*"PFX" F%d "PFX" = @"PFX" size %d\n",
f, FRAG_ID(f), FRAG_TAG(f), FRAG_HDR_START(f), FRAG_SIZE(f));
/* check that header is intact */
pc = FRAG_HDR_START(f);
ASSERT(*((fragment_t **)pc) == f);
/* check that no missing space */
pc += FRAG_SIZE(f);
u = FIFO_UNIT(f);
/* free list scheme can't be walked expecting FIFO headers */
if ((!DYNAMO_OPTION(cache_shared_free_list) || !cache->is_shared)) {
if (pc < u->cur_pc)
ASSERT(*((fragment_t **)pc) != NULL);
}
f = FIFO_NEXT(f);
}
}
#endif
static void
fifo_append(fcache_t *cache, fragment_t *f)
{
ASSERT(USE_FIFO(f));
ASSERT(CACHE_PROTECTED(cache));
/* start has prev to end, but end does NOT have next to start */
FIFO_NEXT_ASSIGN(f, NULL);
if (cache->fifo == NULL) {
cache->fifo = f;
FIFO_PREV_ASSIGN(f, f);
} else {
FIFO_PREV_ASSIGN(f, FIFO_PREV(cache->fifo));
FIFO_NEXT_ASSIGN(FIFO_PREV(cache->fifo), f);
FIFO_PREV_ASSIGN(cache->fifo, f);
}
FIFO_NEXT_ASSIGN(f, NULL);
LOG(THREAD_GET, LOG_CACHE, 5, "fifo_append F%d @"PFX"\n",
FRAG_ID(f), FRAG_HDR_START(f));
DOLOG(6, LOG_CACHE, { print_fifo(get_thread_private_dcontext(), cache); });
}
static void
fifo_remove(dcontext_t *dcontext, fcache_t *cache, fragment_t *f)
{
ASSERT(USE_FIFO(f));
ASSERT(CACHE_PROTECTED(cache));
ASSERT(cache->fifo != NULL);
/* start has prev to end, but end does NOT have next to start */
if (f == cache->fifo) {
cache->fifo = FIFO_NEXT(f);
} else {
FIFO_NEXT_ASSIGN(FIFO_PREV(f), FIFO_NEXT(f));
}
if (FIFO_NEXT(f) == NULL) {
if (cache->fifo != NULL)
FIFO_PREV_ASSIGN(cache->fifo, FIFO_PREV(f));
} else {
FIFO_PREV_ASSIGN(FIFO_NEXT(f), FIFO_PREV(f));
}
LOG(THREAD, LOG_CACHE, 5, "fifo_remove F%d @"PFX"\n",
FRAG_ID(f), FRAG_HDR_START(f));
DOLOG(6, LOG_CACHE, { print_fifo(dcontext, cache); });
if (FRAG_EMPTY(f)) {
STATS_FCACHE_SUB(cache, empty, FRAG_SIZE(f));
STATS_FCACHE_ADD(cache, used, FRAG_SIZE(f));
nonpersistent_heap_free(ALLOC_DC(dcontext, cache), f, sizeof(empty_slot_t)
HEAPACCT(ACCT_FCACHE_EMPTY));
}
}
static void
fifo_prepend_empty(dcontext_t *dcontext, fcache_t *cache, fcache_unit_t *unit,
fragment_t *f, cache_pc start_pc, uint size)
{
empty_slot_t *slot;
ASSERT(CACHE_PROTECTED(cache));
STATS_FCACHE_ADD(cache, empty, size);
if (DYNAMO_OPTION(cache_shared_free_list) && cache->is_shared && !cache->is_coarse) {
ASSERT(USE_FREE_LIST_FOR_CACHE(cache));
add_to_free_list(dcontext, cache, unit, f, start_pc, size);
return;
}
/* FIXME: make cache_shared_free_list always on and remove the option
* as there really is no alternative implemented -- we just waste the space.
* FIXME case 8714: anything we can do for coarse-grain?
*/
if (!USE_FIFO_FOR_CACHE(cache))
return;
/* don't make two entries for adjacent empties
* for efficiency only check front of FIFO -- most common case anyway
*/
if (cache->fifo != NULL && FRAG_EMPTY(cache->fifo)) {
if (FRAG_HDR_START(cache->fifo) == start_pc+size) {
LOG(THREAD, LOG_CACHE, 5, "prepend: just enlarging next empty\n");
FRAG_START_ASSIGN(cache->fifo, start_pc + HEADER_SIZE_FROM_CACHE(cache));
*((fragment_t **)start_pc) = cache->fifo;
FRAG_SIZE_ASSIGN(cache->fifo, FRAG_SIZE(cache->fifo) + size);
return;
} else if (FRAG_HDR_START(cache->fifo) + FRAG_SIZE(cache->fifo) == start_pc) {
LOG(THREAD, LOG_CACHE, 5, "prepend: just enlarging prev empty\n");
FRAG_SIZE_ASSIGN(cache->fifo, FRAG_SIZE(cache->fifo) + size);
return;
}
}
slot = (empty_slot_t *)
nonpersistent_heap_alloc(ALLOC_DC(dcontext, cache), sizeof(empty_slot_t)
HEAPACCT(ACCT_FCACHE_EMPTY));
slot->flags = FRAG_FAKE | FRAG_IS_EMPTY_SLOT;
LOG(THREAD, LOG_CACHE, 5, "prepend: writing "PFX" to "PFX"\n", slot, start_pc);
*((empty_slot_t **)start_pc) = slot;
slot->start_pc = start_pc + HEADER_SIZE_FROM_CACHE(cache);
slot->fcache_size = size;
/* stick on front */
slot->next_fcache = cache->fifo;
if (cache->fifo == NULL)
slot->prev_fcache = (fragment_t *) slot;
else {
slot->prev_fcache = FIFO_PREV(cache->fifo);
FIFO_PREV_ASSIGN(cache->fifo, (fragment_t *)slot);
}
/* start has prev to end, but end does NOT have next to start */
cache->fifo = (fragment_t *) slot;
LOG(THREAD, LOG_CACHE, 5, "fifo_prepend_empty F-1 @"PFX"\n", start_pc);
DOLOG(6, LOG_CACHE, { print_fifo(dcontext, cache); });
}
/* returns whether the cache should be allowed to grow */
static bool
check_regen_replace_ratio(dcontext_t *dcontext, fcache_t *cache, uint add_size)
{
if (cache->max_size != 0 && cache->size + add_size > cache->max_size) {
/* if at max size, avoid regen/replace checks */
LOG(THREAD, LOG_CACHE, 4, "at max size 0x%x\n", cache->max_size);
return false;
} else if (!cache->finite_cache || cache->replace_param == 0) {
/* always upgrade -- adaptive working set is disabled */
LOG(THREAD, LOG_CACHE, 4, "upgrading since fcache_replace==0\n");
return true;
} else if (cache->regen_param == 0) {
/* never upgrade due to regen ratio */
LOG(THREAD, LOG_CACHE, 4, "will never upgrade since fcache_regen==0\n");
return false;
} else if (cache->wset_check > 0) {
/* wset_check is only used for fifo caches */
ASSERT(USE_FIFO_FOR_CACHE(cache));
cache->wset_check--;
LOG(THREAD, LOG_CACHE, 4, "dec wset_check -> %d\n", cache->wset_check);
return false;
} else if (cache->size < cache->free_upgrade_size) {
/* free upgrade, but set check for next time */
if (USE_FIFO_FOR_CACHE(cache)) {
ASSERT(cache->wset_check == 0);
cache->wset_check = cache->replace_param;
} else {
ASSERT(!cache->is_coarse); /* no individual fragment support */
/* if a new unit would put us over the free upgrade point,
* start keeping track of regen stats
*/
if (cache->size + cache->max_unit_size >= cache->free_upgrade_size
/* could come here after having already set this if flush
* a larger unit than last new unit and drop back below threshold
*/
&& !cache->record_wset) {
cache->record_wset = true;
}
}
LOG(THREAD, LOG_CACHE, 3, "Free upgrade, no resize check\n");
return true;
} else {
if (USE_FIFO_FOR_CACHE(cache)) {
/* wait cache->replace_param frags before checking again,
* to avoid too many checks when regen<<replace
*/
cache->wset_check = cache->replace_param;
} else {
ASSERT(!cache->is_coarse); /* no individual fragment support */
if (!cache->record_wset) {
/* now we are big enough that we need to keep track, though
* ideally we should hit this prior to the free upgrade point,
* as otherwise this is a 2nd free resize, but might not if
* create a new unit larger than max unit size.
*/
cache->record_wset = true;
return true;
}
}
/* FIXME: for shared w/ replace==100 perhaps remove this if */
if (cache->num_replaced >= cache->replace_param &&
cache->num_regenerated >= cache->regen_param) {
/* minimum regen/replaced ratio, compute w/o using floating point ops
* and avoiding overflow (unless replace overflows before regen
* hits regen_param, which is very unlikely and who cares if it does)
*/
/* this loop guaranteed to terminate b/c we check for 0 above */
ASSERT(cache->replace_param > 0 && cache->regen_param > 0);
while (cache->num_replaced >= cache->replace_param &&
cache->num_regenerated >= cache->regen_param) {
cache->num_replaced -= cache->replace_param;
cache->num_regenerated -= cache->regen_param;
}
LOG(THREAD, LOG_CACHE, 3,
"Resize check: for %s unit: %d regenerated / %d replaced\n",
cache->name, cache->num_regenerated, cache->num_replaced);
if (cache->num_regenerated >= cache->regen_param) {
LOG(THREAD, LOG_CACHE, 1,
"%s unit reached ratio with %d regenerated / %d replaced\n",
cache->name, cache->num_regenerated, cache->num_replaced);
return true;
}
}
LOG(THREAD, LOG_CACHE, 4, "No resize allowed yet\n");
return false;
}
return true;
}
/* Adds size to the end of the non-empty unit unit
* If a small area is eaten and added to size, returns that amount
*/
static size_t
extend_unit_end(dcontext_t *dcontext, fcache_t *cache,
fcache_unit_t *unit, size_t size, bool rest_empty)
{
size_t left, extra = 0;
ASSERT(CACHE_PROTECTED(cache));
unit->cur_pc += size;
STATS_FCACHE_ADD(cache, claimed, size);
STATS_ADD(fcache_combined_claimed, size);
left = unit->end_pc - unit->cur_pc;
ASSERT(unit->end_pc >= unit->cur_pc);
if (left < MIN_UNIT_END_HOLE(cache)) {
LOG(THREAD_GET, LOG_CACHE, 3, "\tunit is now full\n");
unit->full = true;
if (left > 0) {
/* eat up too-small area at end */
extra = left;
unit->cur_pc += extra;
STATS_FCACHE_ADD(cache, claimed, extra);
STATS_ADD(fcache_combined_claimed, extra);
}
} else if (rest_empty) {
/* make entire rest of unit into an empty slot */
ASSERT(CHECK_TRUNCATE_TYPE_uint(left));
fifo_prepend_empty(dcontext, cache, unit, NULL, unit->cur_pc, (uint)left);
unit->cur_pc += left;
STATS_FCACHE_ADD(cache, claimed, left);
STATS_ADD(fcache_combined_claimed, left);
unit->full = true;
}
LOG(THREAD, LOG_CACHE, 5, "\t\textend_unit_end: %d + %d / %d => cur_pc = "PFX"\n",
size, extra, left, unit->cur_pc);
/* FIXME: if extended b/c need new unit (size==0), extra is empty space, but
* we cannot add it to stats b/c will never be removed!
*/
STATS_FCACHE_ADD(cache, used, (size + extra));
STATS_FCACHE_MAX(cache, peak, used);
return extra;
}
/* Returns whether was able to either resize unit or create a new unit.
* For non-FIFO caches this routine cannot fail and must suspend the world
* and reset if necessary.
*/
static bool
try_for_more_space(dcontext_t *dcontext, fcache_t *cache, fcache_unit_t *unit,
uint slot_size)
{
uint commit_size = DYNAMO_OPTION(cache_commit_increment);
ASSERT(CACHE_PROTECTED(cache));
if (unit->end_pc < unit->reserved_end_pc &&
!POINTER_OVERFLOW_ON_ADD(unit->cur_pc, slot_size) &&
/* simpler to just not support taking very last page in address space */
!POINTER_OVERFLOW_ON_ADD(unit->end_pc, commit_size)) {
/* extend commitment if have more reserved */
while (unit->cur_pc + slot_size > unit->end_pc + commit_size)
commit_size *= 2;
if (unit->end_pc + commit_size > unit->reserved_end_pc) {
ASSERT_TRUNCATE(commit_size, uint, unit->reserved_end_pc - unit->end_pc);
commit_size = (uint) (unit->reserved_end_pc - unit->end_pc);
}
cache_extend_commitment(unit, commit_size);
if (unit->cur_pc + slot_size > unit->end_pc) {
/* must be a huge trace or something
* still worth committing, we'll make an empty here
*/
extend_unit_end(dcontext, cache, unit, 0, true);
/* continue below and try to make more space */
} else
return true;
}
/* see if we have room to expand according to user-set maximum */
if (cache->max_size == 0 || cache->size + slot_size <= cache->max_size) {
LOG(THREAD, LOG_CACHE, 1, "max size = %d, cur size = %d\n",
cache->max_size/1024, cache->size/1024);
/* At larger sizes better to create separate units to avoid expensive
* re-linking when resize.
* i#696: Don't try to resize fcache units when clients are present.
* They may use labels to insert absolute fragment PCs.
*/
if (unit->size >= cache->max_unit_size
IF_CLIENT_INTERFACE(|| dr_bb_hook_exists()
|| dr_trace_hook_exists())) {
fcache_unit_t *newunit;
size_t newsize;
ASSERT(!USE_FIFO_FOR_CACHE(cache) ||
cache->fifo != NULL ||
/* i#1129: we can get here for initial 4KB unit whose initial
* fragment is >4KB! We'll have set wset_check though.
*/
cache->wset_check > 0); /* shouldn't be empty! */
/* fill out to end first -- turn remaining room into empty slot */
extend_unit_end(dcontext, cache, unit, 0, true);
ASSERT(unit->full);
/* before create a new unit, see if we should flush an old one */
if (!USE_FIFO_FOR_CACHE(cache) && cache->finite_cache && !cache->is_coarse) {
/* wset algorithm for shared caches: when request a new unit,
* must grant the request, but if regen/replace ratio does not
* exceed target, must flush an old unit.
*/
if (!check_regen_replace_ratio(dcontext, cache,
0 /*not adding a fragment*/)) {
/* flush the oldest unit, at the end of the list */
fcache_thread_units_t *tu = (fcache_thread_units_t *)
dcontext->fcache_field;
fcache_unit_t *oldest = cache->units, *prev = NULL;
ASSERT(oldest != NULL);
/* another place where a prev_local would be nice */
for (; oldest->next_local != NULL;
prev = oldest, oldest = oldest->next_local)
; /* nothing */
/* Indicate unit is still live even though off live list.
* Flag will be cleared once really flushed in
* fcache_flush_pending_units()
*/
DODEBUG({ oldest->pending_flush = true; });
LOG(THREAD, LOG_CACHE, 2,
"marking unit "PFX"-"PFX" for flushing\n",
oldest->start_pc, oldest->end_pc);
/* move to pending-flush list and set trigger */
if (prev == NULL)
cache->units = oldest->next_local;
else
prev->next_local = oldest->next_local;
/* clear local just in case, should be no downstream use */
if (unit == oldest)
unit = NULL;
mutex_lock(&unit_flush_lock);
oldest->next_local = allunits->units_to_flush;
allunits->units_to_flush = oldest;
STATS_ADD_PEAK(cache_units_toflush, 1);
/* FIXME case 8743: we should call remove_unit_from_cache() here,
* but we need the cache field for chain_fragments_for_flush() --
* so we assume for now that there are no deletable caches that
* don't use fifos yet are finite, and let
* append_units_to_free_list() remove from the cache later on.
* This does mean that cache->size is too big from now until
* then, so we don't really support hardcoded cache sizes.
*/
mutex_unlock(&unit_flush_lock);
tu->pending_flush = true;
STATS_INC(cache_units_wset_flushed);
} else
STATS_INC(cache_units_wset_allowed);
}
/* now make a new unit
* if new frag is large, make unit large as well
*/
newsize = cache->max_unit_size;
if (newsize < slot_size * MAX_SINGLE_MULTIPLE)
newsize = ALIGN_FORWARD(slot_size * MAX_SINGLE_MULTIPLE, PAGE_SIZE);
/* final adjustment: make sure don't go over max */
if (cache->max_size > 0 && cache->size + newsize > cache->max_size)
newsize = cache->max_size - cache->size;
newunit = fcache_create_unit(dcontext, cache, NULL, newsize);
LOG(THREAD, LOG_CACHE, 1, "Creating a new %s unit of %d KB @"PFX"\n",
cache->name, newunit->size/1024, newunit->start_pc);
newunit->next_local = cache->units;
cache->units = newunit;
} else {
ASSERT(!cache->is_coarse); /* no individual support so harder to resize */
LOG(THREAD, LOG_CACHE, 1, "Increasing size of %s unit of %d KB @"PFX"\n",
cache->name, unit->size/1024, unit->start_pc);
fcache_increase_size(dcontext, cache, unit, slot_size);
LOG(THREAD, LOG_CACHE, 1, "\tnow %d KB @"PFX"\n",
unit->size/1024, unit->start_pc);
}
/* reset counters, but not deleted table */
cache->num_replaced = 0;
cache->num_regenerated = 0;
DOLOG(2, LOG_CACHE, { fcache_cache_stats(dcontext, cache); });
return true;
} else {
if (!USE_FIFO_FOR_CACHE(cache)) {
/* options check up front shouldn't allow us to get here */
ASSERT_NOT_REACHED();
/* Case 8203: we need a new reset type that doesn't free anything,
* and aborts traces only of other threads (not this one, as this
* could be a trace we're emitting now). Then we could free all
* fragments in a unit here.
* In order to do the reset we'd first need to release cache->lock,
* if !cache->is_trace release the bb_building_lock, and enter_nolinking().
* Note that for bb cache we could instead do a full reset and then
* transfer_to_dispatch() but in debug builds we won't
* free locals in prior stack frames: the fragment_t, the
* instrlist_t, etc. For trace cache doing that would be a bigger
* step backward and take longer to get back here.
*/
}
/* tell user if fragment bigger than max size
* FIXME: but if trace cache has small max size, should just
* not build traces that big!
*/
if (cache->max_size > 0 && slot_size > (int) cache->max_size) {
#ifdef INTERNAL
const char *name = "";
DODEBUG(name = cache->name;);
#endif
USAGE_ERROR("single %s fragment (%d bytes) > max cache size (%d bytes)",
name, slot_size, cache->max_size);
}
return false;
}
}
static void
place_fragment(dcontext_t *dcontext, fragment_t *f, fcache_unit_t *unit,
cache_pc header_pc)
{
fcache_t *cache = unit->cache;
ASSERT_OWN_MUTEX(unit->cache->is_shared, &unit->cache->lock);
DOLOG(3, LOG_CACHE, { /* only to reduce perf hit */
fragment_t wrapper;
/* cannot call fragment_pclookup as it will grab the fcache lock */
ASSERT(fragment_pclookup_by_htable(dcontext, header_pc + HEADER_SIZE(f),
&wrapper) == NULL);
});
if (HEADER_SIZE(f) > 0) {
/* add header */
LOG(THREAD, LOG_CACHE, 5, "place: writing "PFX" to "PFX"\n", f, header_pc);
*((fragment_t **)header_pc) = f;
}
/* we assume alignment padding was added at end of prev fragment, so this
* guy needs no padding at start
*/
FRAG_START_ASSIGN(f, header_pc + HEADER_SIZE(f));
ASSERT(ALIGNED(FRAG_HDR_START(f), SLOT_ALIGNMENT(cache)));
STATS_FCACHE_ADD(cache, headers, HEADER_SIZE(f));
STATS_FCACHE_ADD(cache, align, f->fcache_extra - (stats_int_t)HEADER_SIZE(f));
/* for shared caches we must track regen/replace on every placement */
if (cache->record_wset) {
/* FIXME: how is this supposed to work for traces where a bb
* may have replaced the future? xref case 7151, though that
* should be a problem for private as well...
*/
future_fragment_t *fut = fragment_lookup_cache_deleted(dcontext, cache, f->tag);
ASSERT(!USE_FIFO_FOR_CACHE(cache));
ASSERT(!cache->is_coarse);
cache->num_replaced++; /* simply number created past record_wset point */
if (fut != NULL) {
cache->num_regenerated++;
STATS_INC(num_fragments_regenerated);
SHARED_FLAGS_RECURSIVE_LOCK(fut->flags, acquire, change_linking_lock);
fut->flags &= ~FRAG_WAS_DELETED;
SHARED_FLAGS_RECURSIVE_LOCK(fut->flags, release, change_linking_lock);
}
LOG(THREAD, LOG_CACHE, 4, "For %s unit: %d regenerated / %d replaced\n",
cache->name, cache->num_regenerated, cache->num_replaced);
}
}
#ifdef DEBUG
static void
removed_fragment_stats(dcontext_t *dcontext, fcache_t *cache, fragment_t *f)
{
int prefixes = fragment_prefix_size(f->flags);
linkstub_t *l = FRAGMENT_EXIT_STUBS(f);
int stubs = 0;
uint selfmod = 0;
/* N.B.: we cannot call EXIT_STUB_PC() here as with the -detect_dangling_fcache
* option the fragment will be obliterated at this point and we will not
* be able to locate the stub pc for an indirect exit.
* Instead we simply calculate what the stub sizes should be.
*/
for (; l != NULL; l = LINKSTUB_NEXT_EXIT(l)) {
int sz;
if (!EXIT_HAS_LOCAL_STUB(l->flags, f->flags))
continue; /* it's kept elsewhere */
sz = linkstub_size(dcontext, f, l);
#ifdef CUSTOM_EXIT_STUBS
sz += l->fixed_stub_offset;
#endif
if (LINKSTUB_INDIRECT(l->flags))
STATS_FCACHE_ADD(cache, indirect_stubs, -sz);
else {
ASSERT(LINKSTUB_DIRECT(l->flags));
STATS_FCACHE_ADD(cache, direct_stubs, -sz);
}
stubs += sz;
}
if (TEST(FRAG_SELFMOD_SANDBOXED, f->flags)) {
/* we cannot go and re-decode the app bb now, since it may have been
* changed (selfmod doesn't make page RO!), so we use a stored size
* that's there just for stats
*/
selfmod = FRAGMENT_SELFMOD_COPY_SIZE(f);
STATS_FCACHE_SUB(cache, selfmod_copy, selfmod);
}
STATS_FCACHE_SUB(cache, bodies, (f->size - (prefixes + stubs + selfmod)));
STATS_FCACHE_SUB(cache, prefixes, prefixes);
STATS_FCACHE_SUB(cache, headers, HEADER_SIZE(f));
STATS_FCACHE_SUB(cache, align, (f->fcache_extra - (stats_int_t)HEADER_SIZE(f)));
}
#endif /* DEBUG */
static void
force_fragment_from_cache(dcontext_t *dcontext, fcache_t *cache, fragment_t *victim)
{
bool empty = FRAG_EMPTY(victim); /* fifo_remove will free empty slot */
ASSERT(CACHE_PROTECTED(cache));
if (USE_FIFO(victim))
fifo_remove(dcontext, cache, victim);
if (!empty) {
/* don't need to add deleted -- that's done by link.c for us,
* when it makes a future fragment it uses the FRAG_WAS_DELETED flag
*/
if (cache->finite_cache)
cache->num_replaced++;
DOSTATS({ removed_fragment_stats(dcontext, cache, victim); });
STATS_INC(num_fragments_replaced);
fragment_delete(dcontext, victim, FRAGDEL_NO_FCACHE);
}
}
static bool
replace_fragments(dcontext_t *dcontext, fcache_t *cache, fcache_unit_t *unit,
fragment_t *f, fragment_t *fifo, uint slot_size)
{
fragment_t *victim;
uint slot_so_far;
cache_pc pc, header_pc;
ASSERT(CACHE_PROTECTED(cache));
/* free list scheme can't be walked expecting FIFO headers */
ASSERT(!DYNAMO_OPTION(cache_shared_free_list) || !cache->is_shared);
ASSERT(USE_FIFO_FOR_CACHE(cache));
DODEBUG({ cache->consistent = false; });
/* first walk: make sure this is possible (look for un-deletable frags) */
slot_so_far = 0;
pc = FRAG_HDR_START(fifo);
victim = fifo;
while (true) {
if (TEST(FRAG_CANNOT_DELETE, victim->flags)) {
DODEBUG({ cache->consistent = true; });
return false;
}
slot_so_far += FRAG_SIZE(victim);
if (slot_so_far >= slot_size)
break;
/* look at contiguously-next fragment_t in cache */
pc += FRAG_SIZE(victim);
if (pc == unit->cur_pc) {
/* we can just take unallocated space */
break;
}
ASSERT(pc < unit->cur_pc);
victim = *((fragment_t **)pc);
LOG(THREAD, LOG_CACHE, 5, "\treading "PFX" -> "PFX"\n", pc, victim);
ASSERT(victim != NULL);
ASSERT(FIFO_UNIT(victim) == unit);
}
LOG(THREAD, LOG_CACHE, 4, "\treplacing fragment(s) in filled unit\n");
/* record stats that will be destroyed */
header_pc = FRAG_HDR_START(fifo);
/* second walk: do the deletion */
slot_so_far = 0;
pc = header_pc;
victim = fifo;
while (true) {
slot_so_far += FRAG_SIZE(victim);
pc += FRAG_SIZE(victim);
LOG(THREAD, LOG_CACHE, 4, "\t\tdeleting F%d => %d bytes\n",
FRAG_ID(victim), slot_so_far);
force_fragment_from_cache(dcontext, cache, victim);
if (slot_so_far >= slot_size)
break;
/* look at contiguously-next fragment_t in cache
* assumption: wouldn't be here if not enough victims below us, so
* don't need to check for end of cache
*/
if (pc == unit->cur_pc) {
/* take unallocated space */
size_t extra = extend_unit_end(dcontext, cache, unit,
slot_size - slot_so_far, false);
LOG(THREAD, LOG_CACHE, 4, "\t\textending unit by %d => %d bytes\n",
slot_size - slot_so_far, slot_size);
ASSERT(CHECK_TRUNCATE_TYPE_uint(extra));
FRAG_SIZE_ASSIGN(f, slot_size+(uint)extra);
/* no splitting will be needed */
slot_so_far = slot_size;
break;
}
ASSERT(pc < unit->cur_pc);
victim = *((fragment_t **)pc);
}
if (slot_so_far > slot_size) {
uint diff = slot_so_far - slot_size;
/* if we were using free lists we would check for next slot being a
* free entry and if so coalescing any size space with it
*/
if (diff < MIN_EMPTY_HOLE(cache)) {
FRAG_SIZE_ASSIGN(f, slot_so_far);
LOG(THREAD, LOG_CACHE, 4, "\t\teating extra %d bytes\n", diff);
} else {
/* add entry for diff */
fifo_prepend_empty(dcontext, cache, unit, NULL, header_pc + FRAG_SIZE(f), diff);
STATS_FCACHE_SUB(cache, used, diff);
}
}
place_fragment(dcontext, f, unit, header_pc);
fifo_append(cache, f);
if (cache->finite_cache && cache->num_replaced > 0) {
future_fragment_t *fut = fragment_lookup_cache_deleted(dcontext, cache, f->tag);
ASSERT(cache->finite_cache && cache->replace_param > 0);
if (fut != NULL) {
cache->num_regenerated++;
STATS_INC(num_fragments_regenerated);
SHARED_FLAGS_RECURSIVE_LOCK(fut->flags, acquire, change_linking_lock);
fut->flags &= ~FRAG_WAS_DELETED;
SHARED_FLAGS_RECURSIVE_LOCK(fut->flags, release, change_linking_lock);
}
LOG(THREAD, LOG_CACHE, 4, "For %s unit: %d regenerated / %d replaced\n",
cache->name, cache->num_regenerated, cache->num_replaced);
}
DODEBUG({ cache->consistent = true; });
return true;
}
static inline bool
replace_fifo(dcontext_t *dcontext, fcache_t *cache, fragment_t *f, uint slot_size,
fragment_t *fifo)
{
fcache_unit_t *unit;
ASSERT(USE_FIFO(f));
ASSERT(CACHE_PROTECTED(cache));
while (fifo != NULL) {
unit = FIFO_UNIT(fifo);
if ((ptr_uint_t)(unit->end_pc - FRAG_HDR_START(fifo)) >= slot_size) {
/* try to replace fifo and possibly subsequent frags with f
* could fail if un-deletable frags
*/
DOLOG(4, LOG_CACHE, { verify_fifo(dcontext, cache); });
if (replace_fragments(dcontext, cache, unit, f, fifo, slot_size))
return true;
}
fifo = FIFO_NEXT(fifo);
}
return false;
}
static inline
int
find_free_list_bucket(uint size)
{
int bucket;
/* Find maximum slot we are >= than, for size in [SIZE[bucket], SIZE[bucket+1]) */
for (bucket = FREE_LIST_SIZES_NUM-1; size < FREE_LIST_SIZES[bucket]; bucket--)
; /* no body */
ASSERT(bucket >= 0);
return bucket;
}
static inline free_list_footer_t *
free_list_footer_from_header(free_list_header_t *h)
{
return (free_list_footer_t *) (((cache_pc)h) + h->size - sizeof(free_list_footer_t));
}
static inline free_list_header_t *
free_list_header_from_footer(free_list_footer_t *h)
{
return (free_list_header_t *) (((cache_pc)h) + sizeof(free_list_footer_t) - h->size);
}
static inline void
remove_from_free_list(fcache_t *cache, uint bucket, free_list_header_t *header
_IF_DEBUG(bool coalesce))
{
ASSERT(CACHE_PROTECTED(cache));
ASSERT(DYNAMO_OPTION(cache_shared_free_list) && cache->is_shared);
LOG(GLOBAL, LOG_CACHE, 4,
"remove_from_free_list: %s bucket[%d] %d bytes @"PFX"\n",
cache->name, bucket,header->size, header);
if (header->prev != NULL)
header->prev->next = header->next;
else
cache->free_list[bucket] = header->next;
if (header->next != NULL)
header->next->prev = header->prev;
/* it's up to the caller to adjust FRAG_FOLLOWS_FREE_ENTRY if a fragment_t
* follows header.
* no reason to remove FRAG_FCACHE_FREE_LIST flag here.
*/
DOSTATS({
if (coalesce)
cache->free_stats_coalesced[bucket]++;
else
cache->free_stats_reused[bucket]++;
cache->free_stats_charge[bucket] -= header->size;
});
}
/* If freeing a fragment, must pass that as f; else, pass NULL as f */
static void
add_to_free_list(dcontext_t *dcontext, fcache_t *cache, fcache_unit_t *unit,
fragment_t *f, cache_pc start_pc, uint size)
{
int bucket;
free_list_header_t *header = (free_list_header_t *)start_pc;
free_list_footer_t *footer;
ASSERT(CACHE_PROTECTED(cache));
ASSERT(DYNAMO_OPTION(cache_shared_free_list) && cache->is_shared);
DODEBUG({
/* only count frees of actual fragments */
if (f != NULL)
cache->free_size_histogram[get_histogram_bucket(size)]++;
});
DOCHECK(CHKLVL_DEFAULT, { /* expensive, makes fragment_exit() O(n^2) */
ASSERT(dynamo_resetting || fcache_pc_in_live_unit(cache, start_pc));
});
if (size > MAX_FREE_ENTRY_SIZE) {
/* FIXME PR 203913: fifo_prepend_empty can handle larger sizes, but
* we can't: we would need to split into two empty slots.
* For now we bail and leak.
*/
ASSERT_NOT_REACHED();
return;
}
/* check next slot first before we potentially shift back from coalescing */
if (unit->cur_pc > start_pc + size && !POINTER_OVERFLOW_ON_ADD(start_pc, size)) {
fragment_t *subseq = FRAG_NEXT_SLOT(start_pc, size);
if (FRAG_IS_FREE_LIST(subseq)) {
/* this is a free list entry, coalesce with it */
free_list_header_t *next_header = FRAG_NEXT_FREE(start_pc, size);
/* only coalesce if not over size limit */
if ((uint)next_header->size + size <= MAX_FREE_ENTRY_SIZE) {
uint next_bucket = find_free_list_bucket(next_header->size);
LOG(GLOBAL, LOG_CACHE, 4,
"add_to_free_list: coalesce w/ next %s bucket[%d] %d bytes @"PFX"\n",
cache->name, next_bucket, next_header->size, next_header);
size += next_header->size;
/* OPTIMIZATION: if still in same bucket can eliminate some work */
remove_from_free_list(cache, next_bucket, next_header
_IF_DEBUG(true/*coalesce*/));
/* fall-through and add to free list anew
* (potentially coalesce with prev as well)
*/
STATS_FCACHE_ADD(cache, free_coalesce_next, 1);
} else {
/* FIXME: if we have a few giant free entries we should
* free the whole unit.
*/
STATS_FCACHE_ADD(cache, free_coalesce_too_big, 1);
}
} else {
/* A real fragment_t: mark it.
* This is the only place we need to mark, as we disallow free lists at
* the end of the current unit (so an appended fragment_t will never need
* this flag).
*/
LOG(GLOBAL, LOG_CACHE, 4,
"add_to_free_list: marking next F%d("PFX")."PFX" as after-free\n",
subseq->id, subseq->tag, subseq->start_pc);
ASSERT(FIFO_UNIT(subseq) == unit);
ASSERT(FRAG_HDR_START(subseq) == start_pc + size);
/* can already be marked if we're called due to a split */
if (!TEST(FRAG_FOLLOWS_FREE_ENTRY, subseq->flags)) {
if (TEST(FRAG_SHARED, subseq->flags))
acquire_recursive_lock(&change_linking_lock);
subseq->flags |= FRAG_FOLLOWS_FREE_ENTRY;
if (TEST(FRAG_SHARED, subseq->flags))
release_recursive_lock(&change_linking_lock);
}
}
}
/* If we're actually freeing a real fragment_t, we can coalesce with prev.
* Other reasons to come here should never have a free entry in the prev slot.
*/
if (f != NULL && TEST(FRAG_FOLLOWS_FREE_ENTRY, f->flags)) {
/* coalesce with prev */
free_list_footer_t *prev_footer = (free_list_footer_t *)
(start_pc - sizeof(free_list_footer_t));
free_list_header_t *prev_header = free_list_header_from_footer(prev_footer);
uint prev_bucket = find_free_list_bucket(prev_footer->size);
/* only coalesce if not over size limit */
uint new_size = (uint)prev_header->size + size;
if (new_size <= MAX_FREE_ENTRY_SIZE) {
LOG(GLOBAL, LOG_CACHE, 4,
"add_to_free_list: coalesce w/ prev %s bucket[%d] %d bytes @"PFX"\n",
cache->name, prev_bucket, prev_header->size, prev_header);
size += prev_header->size;
header = prev_header;
start_pc = (cache_pc) header;
/* OPTIMIZATION: if still in same bucket can eliminate some work */
remove_from_free_list(cache, prev_bucket, prev_header
_IF_DEBUG(true/*coalesce*/));
/* fall-through and add to free list anew */
STATS_FCACHE_ADD(cache, free_coalesce_prev, 1);
} else {
/* see FIXMEs for next-coalesce-too-large above */
STATS_FCACHE_ADD(cache, free_coalesce_too_big, 1);
}
}
/* Invariant: no free entry can end at the append point of the current unit.
* It we want to relax this we must mark an appended fragment as
* FRAG_FOLLOWS_FREE_ENTRY. We do allow a free entry at the very end of the
* now-full cur unit. FIXME: this code is fragile wrt extend_unit_end's
* fifo_prepend_empty(), which wants a free list at the end of the unit, and
* only avoids disaster here by not incrementing cur_pc until afterward, so
* our condition here is not triggered. We could add another param to
* distinguish (f==NULL is not good enough as splits also call here).
*/
if (unit == cache->units && unit->cur_pc == start_pc + size) {
/* free space at end of current unit: just adjust cur_pc */
ASSERT((unit->full && unit->cur_pc == unit->end_pc) ||
(!unit->full && unit->cur_pc < unit->end_pc));
unit->cur_pc = start_pc;
unit->full = false;
STATS_FCACHE_ADD(cache, return_last, 1);
STATS_FCACHE_SUB(cache, claimed, size);
STATS_SUB(fcache_combined_claimed, size);
return;
}
/* ok to call w/ small size if you know it will be coalesced or returned,
* but not if it's to be its own entry
*/
ASSERT(size >= MIN_FCACHE_SLOT_SIZE(cache));
bucket = find_free_list_bucket(size);
header->next = cache->free_list[bucket];
header->prev = NULL;
ASSERT_TRUNCATE(header->size, ushort, size);
header->size = (ushort) size;
header->flags = FRAG_FAKE | FRAG_FCACHE_FREE_LIST;
footer = free_list_footer_from_header(header);
ASSERT_TRUNCATE(footer->size, ushort, size);
footer->size = (ushort) size;
if (cache->free_list[bucket] != NULL) {
ASSERT(cache->free_list[bucket]->prev == NULL);
cache->free_list[bucket]->prev = header;
}
cache->free_list[bucket] = header;
/* FIXME: case 7318 we should keep sorted */
DOSTATS({
/* FIXME: we could split freed into pure-freed, split-freed, and coalesce-freed */
cache->free_stats_freed[bucket]++;
cache->free_stats_charge[bucket] += size;
LOG(GLOBAL, LOG_CACHE, 4,
"add_to_free_list: %s bucket[%d] %d bytes @"PFX"\n",
cache->name, bucket, size, start_pc);
/* assumption: caller has already adjusted cache's empty space stats */
});
}
static bool
find_free_list_slot(dcontext_t *dcontext, fcache_t *cache, fragment_t *f, uint size)
{
int bucket;
cache_pc start_pc;
uint free_size;
free_list_header_t *header = NULL;
fcache_unit_t *unit = NULL;
DEBUG_DECLARE(bool split_empty = false;)
ASSERT(!USE_FIFO(f) && USE_FREE_LIST(f));
ASSERT(CACHE_PROTECTED(cache));
ASSERT(DYNAMO_OPTION(cache_shared_free_list) && cache->is_shared);
LOG(THREAD, LOG_CACHE, 4, "find_free_list_slot: %d bytes\n", size);
DODEBUG({ cache->request_size_histogram[get_histogram_bucket(size)]++;});
if (size > MAX_FREE_ENTRY_SIZE) {
/* FIXME: we may have adjacent un-coalesced free slots we could use */
return false;
}
/* Strategy: search first in current bucket and if nothing is
* found, then upgrade. Coalescing and splitting allows a blind
* always-upgrade strategy.
*/
/* case 7318 for discussion on additional search strategies
* FIXME: for any bucket if we are
* too close to the top we should look in the next bucket? (We
* used to have a scheme for FREE_LIST_BOTTOM_BUCKET_MARGIN)
*/
for (bucket = find_free_list_bucket(size); bucket < FREE_LIST_SIZES_NUM; bucket++) {
if (cache->free_list[bucket] == NULL)
continue;
/* search for first entry larger than size in current bucket */
/* note that for a bucket of only one size, this search should
* finish immediately */
header = cache->free_list[bucket];
while (header != NULL && header->size < size) {
/* FIXME: if we keep the list sorted, we'd not waste too
* much space by picking the first large enough slot */
/* FIXME: if we want to coalesce here we can act on any
* fragment while walking list and make sure that it is
* coalesced */
header = header->next;
}
if (header != NULL)
break;
}
if (bucket >= FREE_LIST_SIZES_NUM)
return false;
DOSTATS({
if (bucket > find_free_list_bucket(size))
STATS_FCACHE_ADD(cache, free_use_larger, 1);
});
ASSERT(header != NULL);
/* found big enough free slot, extract from free list */
remove_from_free_list(cache, bucket, header _IF_DEBUG(false/*!coalesce*/));
start_pc = (cache_pc) header;
free_size = header->size;
ASSERT(free_size >= size);
ASSERT(free_size <= MAX_FREE_ENTRY_SIZE);
/* FIXME: if this vmarea lookup is expensive, we can also keep the
* unit ptr/tag in the free header.*/
unit = fcache_lookup_unit(start_pc);
ASSERT(unit != NULL);
DOCHECK(CHKLVL_DEFAULT, { /* expensive */
ASSERT(fcache_pc_in_live_unit(cache, start_pc));
});
/* FIXME: if bucket sizes are spread apart further than
* MIN_EMPTY_HOLE() this will also kick in. Currently an
* issue only for traces and the bucket [112, 172).
*/
/* if enough room left over, split it off as its own free slot */
if (free_size - size > MIN_EMPTY_HOLE(cache)) {
DODEBUG({
cache->free_stats_split[bucket]++;
split_empty = true;
});
STATS_FCACHE_ADD(cache, free_split, 1);
add_to_free_list(dcontext, cache, unit, NULL, start_pc + size, free_size - size);
free_size = size;
/* next fragment_t remains marked as FRAG_FOLLOWS_FREE_ENTRY */
} else {
/* taking whole entry */
if (unit->cur_pc > start_pc + free_size) {
fragment_t *subseq = FRAG_NEXT_SLOT(start_pc, free_size);
/* remove FRAG_FOLLOWS_FREE_ENTRY flag from subsequent fragment_t, if it exists */
if (!FRAG_IS_FREE_LIST(subseq)) {
LOG(GLOBAL, LOG_CACHE, 4,
"find_free_list_slot: un-marking next F%d("PFX")."PFX" as after-free\n",
subseq->id, subseq->tag, subseq->start_pc);
ASSERT(FIFO_UNIT(subseq) == unit);
ASSERT(FRAG_HDR_START(subseq) == start_pc + free_size);
if (TEST(FRAG_SHARED, subseq->flags))
acquire_recursive_lock(&change_linking_lock);
ASSERT(TEST(FRAG_FOLLOWS_FREE_ENTRY, subseq->flags));
subseq->flags &= ~FRAG_FOLLOWS_FREE_ENTRY;
if (TEST(FRAG_SHARED, subseq->flags))
release_recursive_lock(&change_linking_lock);
} else {
/* shouldn't be free list entry following this one, unless
* unable to coalesce due to ushort size limits
*/
ASSERT(free_size +
FRAG_NEXT_FREE(start_pc, free_size)->size > MAX_FREE_ENTRY_SIZE);
}
}
}
place_fragment(dcontext, f, unit, start_pc);
FRAG_SIZE_ASSIGN(f, free_size);
DOSTATS({
LOG(GLOBAL, LOG_CACHE, 4, "find_free_list_slot: %s bucket[%d]%s"
" %d bytes @"PFX" requested %d, waste=%d\n",
cache->name, bucket, split_empty ? "split" : "",
free_size, start_pc, size, (free_size - size));
});
STATS_FCACHE_ADD(cache, align, free_size - size);
STATS_FCACHE_SUB(cache, empty, free_size);
return true;
}
/* separate routine b/c it may recurse */
static void
add_fragment_common(dcontext_t *dcontext, fcache_t *cache, fragment_t *f, uint slot_size)
{
fragment_t *fifo = NULL;
fcache_unit_t *unit;
ASSERT(CACHE_PROTECTED(cache));
/* first, check fifo for empty slot
* don't look for empty of appropriate size -- kick out the neighbors!
* we've found that works better, else empty list too long, and splitting
* it by size ruins the fifo
*/
if (USE_FREE_LIST_FOR_CACHE(cache)) {
if (DYNAMO_OPTION(cache_shared_free_list) &&
find_free_list_slot(dcontext, cache, f, slot_size))
return;
/* If no free list, no way to insert fragment into middle of cache */
} else if (USE_FIFO_FOR_CACHE(cache)) {
fifo = cache->fifo;
while (fifo != NULL && FRAG_EMPTY(fifo)) {
unit = FIFO_UNIT(fifo);
if ((ptr_uint_t)(unit->end_pc - FRAG_HDR_START(fifo)) >= slot_size) {
/* try to replace fifo and possibly subsequent frags with f
* could fail if un-deletable frags
*/
LOG(THREAD, LOG_CACHE, 4, "\ttrying to fit in empty slot\n");
DOLOG(4, LOG_CACHE, { verify_fifo(dcontext, cache); });
if (replace_fragments(dcontext, cache, unit, f, fifo, slot_size))
return;
}
fifo = FIFO_NEXT(fifo);
}
}
/* second, look for room at end, if cache never filled up before */
unit = cache->units; /* most recent is only potentially non-full unit */
if (!unit->full && (ptr_uint_t)(unit->end_pc - unit->cur_pc) >= slot_size) {
/* just add to end */
size_t extra;
place_fragment(dcontext, f, unit, unit->cur_pc);
extra = extend_unit_end(dcontext, cache, unit, slot_size, false);
if (extra > 0) {
ASSERT(CHECK_TRUNCATE_TYPE_uint(extra));
FRAG_SIZE_ASSIGN(f, slot_size + (uint)extra);
STATS_FCACHE_ADD(cache, align, extra);
}
if (USE_FIFO_FOR_CACHE(cache))
fifo_append(cache, f);
LOG(THREAD, LOG_CACHE, 4,
"\tadded F%d to unfilled unit @"PFX" (%d [/%d] bytes left now)\n",
f->id, f->start_pc, unit->end_pc - unit->cur_pc, UNIT_RESERVED_SIZE(unit));
return;
}
/* third, resize and try again.
* for fifo caches, don't resize unless regen/replace ratio warrants it.
*/
if (!USE_FIFO_FOR_CACHE(cache) ||
cache->is_coarse ||
check_regen_replace_ratio(dcontext, cache, slot_size)) {
LOG(THREAD, LOG_CACHE, 3, "\tcache is full, trying to acquire more space\n");
if (try_for_more_space(dcontext, cache, unit, slot_size)) {
add_fragment_common(dcontext, cache, f, slot_size);
return;
}
}
/* all our subsequent schemes require a FIFO (non-FIFO caches will have
* been resized in step 3)
*/
ASSERT(USE_FIFO_FOR_CACHE(cache));
/* finally, boot somebody out -- in FIFO order, only go to next if
* not enough room from victim to end of cache.
* fifo should be pointing to first non-empty slot!
*/
if (replace_fifo(dcontext, cache, f, slot_size, fifo))
return;
/* if get here, no room, so must resize */
LOG(THREAD, LOG_CACHE, 3,
"\ttried to avoid resizing, but no way around it for fragment size %d\n",
slot_size);
if (try_for_more_space(dcontext, cache, unit, slot_size)) {
add_fragment_common(dcontext, cache, f, slot_size);
return;
}
/* if get here, must have a very large fragment relative to constrained cache size,
* plus some undeletable fragments right in the middle.
* abort the trace, hopefully making fragments re-deletable, and try again.
*/
trace_abort(dcontext);
if (replace_fifo(dcontext, cache, f, slot_size, cache->fifo))
return;
/* could still get here if undeletable fragments...but what can we do?
* current impl only makes trace-in-progress undeletable, right?
* so should never get here
*/
ASSERT_NOT_REACHED();
}
void
fcache_shift_start_pc(dcontext_t *dcontext, fragment_t *f, uint space)
{
fcache_unit_t *unit;
fcache_t *cache = NULL;
if (space == 0)
return;
/* must hold cache lock across any set of a fragment's start_pc or size
* once that fragment is in a cache, as contig-cache-walkers need a consistent view!
*/
if (TEST(FRAG_SHARED, f->flags)) {
/* optimization: avoid unit lookup for private fragments.
* this assumes that no cache lock is used for private fragments!
*/
unit = FIFO_UNIT(f);
ASSERT(unit != NULL);
cache = unit->cache;
PROTECT_CACHE(cache, lock);
}
/* we back align to remove the padding */
ASSERT(ALIGNED(f->start_pc, START_PC_ALIGNMENT));
/* adjusting start_pc */
ASSERT(space <= START_PC_ALIGNMENT-1); /* most we can shift */
ASSERT(PAD_JMPS_SHIFT_START(f->flags));
/* FIXME : no need to set this memory to anything, but is easier to debug
* if it's valid instructions */
SET_TO_DEBUG(f->start_pc, space);
f->start_pc += space;
ASSERT_TRUNCATE(f->size, ushort, (f->size - space));
f->size = (ushort) (f->size - space);
DODEBUG({
if (space > 0) {
STATS_PAD_JMPS_ADD(f->flags, num_start_pc_shifted, 1);
STATS_PAD_JMPS_ADD(f->flags, start_pc_shifted_bytes, space);
}
});
if (TEST(FRAG_SHARED, f->flags))
PROTECT_CACHE(cache, unlock);
}
void
fcache_return_extra_space(dcontext_t *dcontext, fragment_t *f, size_t space_in)
{
fcache_unit_t *unit = FIFO_UNIT(f);
fcache_t *cache = unit->cache;
uint returnable_space, slot_padding, space;
cache_pc end_addr;
uint min_padding = 0;
byte *returnable_start;
bool released = false;
PROTECT_CACHE(cache, lock);
/* truncate up front */
ASSERT_TRUNCATE(space, uint, space_in);
space = (uint) space_in;
DOSTATS({
if (ALIGN_FORWARD(f->size + HEADER_SIZE(f) - space,
SLOT_ALIGNMENT(cache)) <
MIN_FCACHE_SLOT_SIZE(cache))
STATS_INC(num_final_fragment_too_small);
});
/* get the total amount of free space at the end of the fragment including
* any end padding (for slot alignment etc., stored in fcache->extra) */
returnable_space = space + f->fcache_extra - HEADER_SIZE(f);
if (FRAG_SIZE(f) - returnable_space < MIN_FCACHE_SLOT_SIZE(cache)) {
min_padding = returnable_space - (FRAG_SIZE(f) - MIN_FCACHE_SLOT_SIZE(cache));
returnable_space = FRAG_SIZE(f) - MIN_FCACHE_SLOT_SIZE(cache);
}
/* now adjust for slot alignment padding */
end_addr = FRAG_HDR_START(f)+FRAG_SIZE(f)-returnable_space;
ASSERT_TRUNCATE(slot_padding, uint, ALIGN_FORWARD(end_addr, SLOT_ALIGNMENT(cache)) -
(ptr_uint_t)end_addr);
slot_padding = (uint) (ALIGN_FORWARD(end_addr, SLOT_ALIGNMENT(cache)) -
(ptr_uint_t)end_addr);
returnable_space -= slot_padding;
returnable_start = FRAG_HDR_START(f)+FRAG_SIZE(f)-returnable_space;
ASSERT(FRAG_SIZE(f) - returnable_space >= MIN_FCACHE_SLOT_SIZE(cache));
ASSERT(ALIGNED(returnable_space, SLOT_ALIGNMENT(cache)));
ASSERT(ALIGNED(returnable_start, SLOT_ALIGNMENT(cache)));
if (returnable_space > 0) {
STATS_INC(pad_jmps_fragments_overestimated);
/* first check if f is the last fragment in the unit */
if (FRAG_HDR_START(f)+FRAG_SIZE(f) == unit->end_pc) {
/* f is the last fragment in a full unit */
ASSERT(unit->full);
if (returnable_space >= MIN_UNIT_END_HOLE(cache)) {
if (unit == cache->units) {
/* we just filled this unit, mark un-full and adjust cur_pc */
ASSERT(unit->cur_pc == unit->end_pc);
unit->full = false;
unit->cur_pc -= returnable_space;
STATS_FCACHE_SUB(cache, claimed, returnable_space);
STATS_SUB(fcache_combined_claimed, returnable_space);
released = true;
} else {
/* create a new empty slot */
fifo_prepend_empty(dcontext, cache, unit, NULL,
returnable_start, returnable_space);
released = true;
}
}
} else if (FRAG_HDR_START(f)+FRAG_SIZE(f) == unit->cur_pc) {
/* f is the last fragment in a non-full unit */
ASSERT(!unit->full);
unit->cur_pc -= returnable_space;
STATS_FCACHE_SUB(cache, claimed, returnable_space);
STATS_SUB(fcache_combined_claimed, returnable_space);
released = true;
} else {
if (!(DYNAMO_OPTION(cache_shared_free_list) && cache->is_shared)) {
fragment_t *next_f;
/* since we aren't at the end of the used space in the unit there must
* be a fragment or empty slot after us */
next_f = FRAG_NEXT_SLOT(FRAG_HDR_START(f), FRAG_SIZE(f));
ASSERT(next_f != NULL); /* sanity check, though not very good one */
if (FRAG_EMPTY(next_f)) {
STATS_FCACHE_ADD(cache, empty, returnable_space);
FRAG_START_ASSIGN(next_f, returnable_start + HEADER_SIZE(f));
*((fragment_t **)returnable_start) = next_f;
FRAG_SIZE_ASSIGN(next_f, FRAG_SIZE(next_f) + returnable_space);
released = true;
}
}
if (!released) {
/* return excess if next slot is a free list (will be coalesced) or
* if excess is large enough by itself
*/
fragment_t *subseq = FRAG_NEXT_SLOT(FRAG_HDR_START(f), FRAG_SIZE(f));
ASSERT(FRAG_HDR_START(f)+FRAG_SIZE(f) < unit->cur_pc);
if ((FRAG_IS_FREE_LIST(subseq) &&
/* make sure will coalesce
* FIXME: fragile if coalesce rules change -- perhaps have
* free list routine return failure?
*/
returnable_space + FRAG_NEXT_FREE(FRAG_HDR_START(f),
FRAG_SIZE(f))->size <=
MAX_FREE_ENTRY_SIZE) ||
returnable_space >= MIN_EMPTY_HOLE(cache)) {
fifo_prepend_empty(dcontext, cache, unit, NULL, returnable_start,
returnable_space);
released = true;
DOSTATS({
if (FRAG_IS_FREE_LIST(subseq))
STATS_INC(pad_jmps_excess_next_free);
});
}
}
}
}
/* even if returnable_space is 0, space is not, and we need to shift it
* from f->size to f->fcache_extra
*/
/* update fragment values + sanity check */
ASSERT(f->fcache_extra + space == slot_padding + HEADER_SIZE(f) +
returnable_space + min_padding);
ASSERT_TRUNCATE(f->size, ushort, (f->size - space));
f->size = (ushort) (f->size - space);
FRAG_SIZE_ASSIGN(f, f->size + HEADER_SIZE(f) + FRAG_START_PADDING(f) +
slot_padding + (released ? 0 : returnable_space) + min_padding);
ASSERT(FRAG_SIZE(f) >= MIN_FCACHE_SLOT_SIZE(cache));
DOSTATS({
if (released)
STATS_FCACHE_SUB(cache, used, returnable_space);
else if (returnable_space > 0) {
STATS_INC(pad_jmps_excess_wasted);
STATS_ADD(pad_jmps_excess_wasted_bytes, returnable_space);
}
});
DOLOG(3, LOG_CACHE, {
if (USE_FIFO_FOR_CACHE(cache))
verify_fifo(dcontext, cache);
});
PROTECT_CACHE(cache, unlock);
STATS_PAD_JMPS_ADD(f->flags, extra_space_released,
released ? returnable_space : 0);
}
static fcache_t *
get_cache_for_new_fragment(dcontext_t *dcontext, fragment_t *f)
{
fcache_thread_units_t *tu = (fcache_thread_units_t *) dcontext->fcache_field;
fcache_t *cache = NULL;
if (TEST(FRAG_SHARED, f->flags)) {
if (TEST(FRAG_COARSE_GRAIN, f->flags)) {
/* new fragment must be targeting the one non-frozen unit */
coarse_info_t *info = get_executable_area_coarse_info(f->tag);
ASSERT(info != NULL);
if (info != NULL && info->frozen)
info = info->non_frozen;
ASSERT(info != NULL);
if (info->cache == NULL) {
/* due to lock ordering problems we must create the cache
* before acquiring the info->lock
*/
cache = fcache_cache_init(GLOBAL_DCONTEXT, f->flags, true);
mutex_lock(&info->lock);
if (info->cache == NULL) {
cache->coarse_info = info;
coarse_unit_init(info, cache);
ASSERT(cache == info->cache);
mutex_unlock(&info->lock);
} else {
/* w/ bb_building_lock we shouldn't have a race here */
ASSERT_CURIOSITY(false && "race in creating coarse cache");
mutex_unlock(&info->lock);
fcache_cache_free(GLOBAL_DCONTEXT, cache, true);
}
}
ASSERT(info->cache != NULL);
ASSERT(((fcache_t *)info->cache)->coarse_info == info);
return (fcache_t *) info->cache;
} else {
if (IN_TRACE_CACHE(f->flags))
return shared_cache_trace;
else
return shared_cache_bb;
}
} else {
/* thread-private caches are delayed */
if (IN_TRACE_CACHE(f->flags)) {
if (tu->trace == NULL) {
tu->trace = fcache_cache_init(dcontext, FRAG_IS_TRACE, true);
ASSERT(tu->trace != NULL);
LOG(THREAD, LOG_CACHE, 1, "Initial trace cache is %d KB\n",
tu->trace->init_unit_size/1024);
}
return tu->trace;
} else {
if (tu->bb == NULL) {
tu->bb = fcache_cache_init(dcontext, 0/*private bb*/, true);
ASSERT(tu->bb != NULL);
LOG(THREAD, LOG_CACHE, 1, "Initial basic block cache is %d KB\n",
tu->bb->init_unit_size/1024);
}
return tu->bb;
}
}
ASSERT_NOT_REACHED();
return NULL;
}
void
fcache_add_fragment(dcontext_t *dcontext, fragment_t *f)
{
fcache_thread_units_t *tu = (fcache_thread_units_t *) dcontext->fcache_field;
uint slot_size;
fcache_t *cache = get_cache_for_new_fragment(dcontext, f);
ASSERT(cache != NULL);
PROTECT_CACHE(cache, lock);
/* set start_pc to START_PC_ALIGNMENT so will work with FRAG_START_PADDING */
f->start_pc = (cache_pc)(ptr_uint_t)START_PC_ALIGNMENT;
/* delayed unmap...presumably ok to do now! */
if (tu->pending_unmap_pc != NULL) {
/* de-allocate old memory */
/* remove from interval data struct first to avoid races w/ it
* being re-used and not showing up in in_fcache
*/
vmvector_remove(fcache_unit_areas, tu->pending_unmap_pc,
tu->pending_unmap_pc+tu->pending_unmap_size);
/* caller must dec stats since here we don't know type of cache */
heap_munmap(tu->pending_unmap_pc, tu->pending_unmap_size);
tu->pending_unmap_pc = NULL;
}
/* starting address of a fragment and its size should always
* be cache-line-aligned. we use a 4-byte header as a backpointer
* to the fragment_t.
*/
slot_size = f->size + HEADER_SIZE(f);
if (slot_size < MIN_FCACHE_SLOT_SIZE(cache)) {
STATS_INC(num_fragment_too_small);
slot_size = MIN_FCACHE_SLOT_SIZE(cache);
}
slot_size = (uint) ALIGN_FORWARD(slot_size, SLOT_ALIGNMENT(cache));
ASSERT(slot_size >= f->size + HEADER_SIZE(f));
ASSERT_TRUNCATE(f->fcache_extra, byte, (slot_size - f->size));
f->fcache_extra = (byte) (slot_size - f->size);
LOG(THREAD, LOG_CACHE, 4,
"fcache_add_fragment to %s cache (size %dKB): F%d w/ size %d (=> %d)\n",
cache->name, cache->units->size/1024, f->id, f->size, slot_size);
add_fragment_common(dcontext, cache, f, slot_size);
ASSERT(!PAD_JMPS_SHIFT_START(f->flags) ||
ALIGNED(f->start_pc, START_PC_ALIGNMENT)); /* for start_pc padding to work */
DOLOG(3, LOG_CACHE, {
if (USE_FIFO_FOR_CACHE(cache))
verify_fifo(dcontext, cache);
});
PROTECT_CACHE(cache, unlock);
}
void
fcache_remove_fragment(dcontext_t *dcontext, fragment_t *f)
{
fcache_unit_t *unit = FIFO_UNIT(f);
fcache_t *cache = unit->cache;
/* should only be deleted through proper synched channels */
ASSERT(dcontext != GLOBAL_DCONTEXT || dynamo_exited || dynamo_resetting ||
TEST(FRAG_WAS_DELETED, f->flags) || is_self_allsynch_flushing());
PROTECT_CACHE(cache, lock);
LOG(THREAD, LOG_CACHE, 4, "fcache_remove_fragment: F%d from %s unit\n",
f->id, cache->name);
DOSTATS({ removed_fragment_stats(dcontext, cache, f); });
STATS_FCACHE_SUB(cache, used, FRAG_SIZE(f));
#ifdef DEBUG_MEMORY
/* Catch stale execution by filling w/ int3.
* We do this before fifo_prepend_empty to avoid figuring whether
* to leave alone th1 st 4 bytes of fragment space or not (it's
* used to store the size for cache_shared_free_list).
* FIXME: put in the rest of the patterns and checks to make this
* parallel to heap DEBUG_MEMORY (==case 5657)
*/
memset(f->start_pc, DEBUGGER_INTERRUPT_BYTE, f->size);
#endif
/* if the entire unit is being freed, do not place individual fragments in
* the unit on free lists or the FIFO
*/
if (!unit->pending_free) {
/* empty slots always go on front */
fifo_prepend_empty(dcontext, cache, unit, f, FRAG_HDR_START(f), FRAG_SIZE(f));
if (USE_FIFO(f)) {
fifo_remove(dcontext, cache, f);
DOLOG(3, LOG_CACHE, { verify_fifo(dcontext, cache); });
}
}
PROTECT_CACHE(cache, unlock);
}
#ifdef SIDELINE
dcontext_t *
get_dcontext_for_fragment(fragment_t *f)
{
fcache_unit_t *unit = fcache_lookup_unit(f->start_pc);
ASSERT(unit != NULL);
return unit->dcontext;
}
#endif
/***************************************************************************
* FLUSHING OF UNITS
*
* General strategy: first, put units to flush on the global units_to_flush
* list. At a nolinking point, use flush synch to get other threads out of
* the target units, and then walk each unit using contiguous header walk.
* Remove free list and lazy entries, and chain up all fragments to pass to
* flush unlink_shared(). Move units to a global units_to_free list, recording
* their flushtimes. We'll be notified when a pending delete entry is
* freed, and we can then check which units are safe to free.
*/
bool
fcache_is_flush_pending(dcontext_t *dcontext)
{
fcache_thread_units_t *tu = (fcache_thread_units_t *) dcontext->fcache_field;
return tu->pending_flush;
}
/* accepts a chain of units linked by next_local.
* caller must set pending_free and flushtime fields.
*/
static void
append_units_to_free_list(fcache_unit_t *u)
{
mutex_lock(&unit_flush_lock);
/* must append to keep in increasing flushtime order */
if (allunits->units_to_free_tail == NULL) {
ASSERT(allunits->units_to_free == NULL);
allunits->units_to_free = u;
} else {
ASSERT(allunits->units_to_free->flushtime <= u->flushtime);
ASSERT(allunits->units_to_free_tail->next_local == NULL);
ASSERT(allunits->units_to_free_tail != u);
allunits->units_to_free_tail->next_local = u;
}
STATS_ADD_PEAK(cache_units_tofree, 1);
/* support adding a chain */
ASSERT(u->flushtime > 0);
ASSERT(u->pending_free);
while (u->next_local != NULL) {
if (u->cache != NULL)
remove_unit_from_cache(u);
ASSERT(u->cache == NULL);
STATS_ADD_PEAK(cache_units_tofree, 1);
ASSERT(u->flushtime <= u->next_local->flushtime);
u = u->next_local;
ASSERT(u->flushtime > 0);
ASSERT(u->pending_free);
}
allunits->units_to_free_tail = u;
ASSERT(allunits->units_to_free_tail->next_local == NULL);
mutex_unlock(&unit_flush_lock);
}
/* It is up to the caller to ensure it's safe to string the fragments in
* unit into a list, by only calling us between stage1 and stage2 of
* flushing (there's no other synch that is safe).
*/
static fragment_t *
chain_fragments_for_flush(dcontext_t *dcontext, fcache_unit_t *unit, fragment_t **tail)
{
fragment_t *list = NULL, *f = NULL, *prev_f = NULL;
cache_pc pc;
ASSERT(is_self_flushing());
LOG(THREAD, LOG_CACHE, 4, "\tchaining fragments in unit "PFX"-"PFX"\n",
unit->start_pc, unit->end_pc);
/* FIXME: we walk all fragments here just to call
* vm_area_remove_fragment(), and do another complete walk in
* unlink_fragments_for_deletion() -- can we reduce to one walk?
*/
pc = unit->start_pc;
while (pc < unit->cur_pc) {
bool add_to_list = false;
f = *((fragment_t **)pc);
LOG(THREAD, LOG_CACHE, 5, "\treading "PFX" -> "PFX"\n", pc, f);
if (USE_FREE_LIST_FOR_CACHE(unit->cache)) {
if (FRAG_IS_FREE_LIST(f)) {
/* we're going to free the whole unit so we have to
* remove this entry from the free list
*/
/* while flush synch is enough, cleaner to hold official lock */
free_list_header_t *cur_free = ((free_list_header_t *) pc);
int bucket = find_free_list_bucket(cur_free->size);
LOG(THREAD, LOG_CACHE, 5,
"\tremoving free list entry "PFX" size %d bucket [%d]\n",
cur_free, cur_free->size, bucket);
/* we officially grab the lock for free list manip, though
* flush synch is enough. we can't hold cache lock for this
* entire routine b/c it has lower rank than the lazy_delete lock.
*/
PROTECT_CACHE(unit->cache, lock);
remove_from_free_list(unit->cache, bucket, cur_free
_IF_DEBUG(false/*!coalesce*/));
PROTECT_CACHE(unit->cache, unlock);
pc += cur_free->size;
continue;
}
}
ASSERT(f != NULL);
ASSERT(FIFO_UNIT(f) == unit);
ASSERT(FRAG_HDR_START(f) == pc);
ASSERT(!TEST(FRAG_CANNOT_DELETE, f->flags));
if (TEST(FRAG_WAS_DELETED, f->flags)) {
/* must be a consistency-flushed or lazily deleted fragment.
* while typically it will be deleted before the other fragments
* in this unit (since flushtime now is < ours, and lazy are deleted
* before pending-delete), there is a case where it will not be:
* if the lazy list hits its threshold and a new pending-delete entry
* is created before we free our unit-flush pending entry, the
* lazy pending entry will be freed AFTER flushed unit's entries.
* we must remove from the lazy list, and go ahead and put into the
* pending delete list, to keep all our dependences together.
* fragment_unlink_for_deletion() will not re-do the unlink for
* the lazy fragment.
* FIXME: this is inefficient b/c no prev ptr in lazy list
*/
add_to_list = remove_from_lazy_deletion_list(dcontext, f);
/* if not found, we assume the fragment has already been moved to
* a pending delete entry, which must have a lower timestamp
* than ours, so we're all set since vm_area_check_shared_pending()
* now walks all to-be-freed entries in increasing flushtime order.
*/
LOG(THREAD, LOG_CACHE, 5,
"\tlazily-deleted fragment F%d."PFX" removed from lazy list\n",
f->id, f->start_pc);
} else {
LOG(THREAD, LOG_CACHE, 5,
"\tadding F%d."PFX" to to-flush list\n", f->id, f->start_pc);
vm_area_remove_fragment(dcontext, f);
add_to_list = true;
}
if (add_to_list) {
if (prev_f == NULL)
list = f;
else
prev_f->next_vmarea = f;
prev_f = f;
}
/* advance to contiguously-next fragment_t in cache */
pc += FRAG_SIZE(f);
}
ASSERT(pc == unit->cur_pc);
/* If entire unit is free list entries or lazy-dels that
* were moved to pending-delete list, then we'll have no list and
* prev_f==NULL.
*/
if (list == NULL) {
ASSERT(prev_f == NULL);
/* we have to finish off the flush synch so we just pass no list
* or region to flush_fragments_unlink_shared()
*/
STATS_INC(cache_units_flushed_nolive);
} else {
ASSERT(list != NULL);
ASSERT(prev_f != NULL);
prev_f->next_vmarea = NULL;
}
if (tail != NULL)
*tail = prev_f;
return list;
}
/* Flushes all fragments in the units in the units_to_flush list and
* moves those units to the units_to_free list.
* This routine can only be called when !is_self_couldbelinking() and when
* no locks are held.
*/
bool
fcache_flush_pending_units(dcontext_t *dcontext, fragment_t *was_I_flushed)
{
fcache_thread_units_t *tu = (fcache_thread_units_t *) dcontext->fcache_field;
fcache_unit_t *unit_flushed = NULL;
fcache_unit_t *u, *local_to_flush;
bool not_flushed = true;
fragment_t *list_head = NULL, *list_tail = NULL;
fragment_t *list_add_head, *list_add_tail;
uint flushtime;
DEBUG_DECLARE(bool flushed;)
ASSERT(!is_self_couldbelinking());
ASSERT_OWN_NO_LOCKS();
if (!tu->pending_flush)
return not_flushed;
tu->pending_flush = false;
/* we grab a local copy to deal w/ races to flush these units up front
* rather than getting into the flush synch and finding someone beat us
*/
mutex_lock(&unit_flush_lock);
local_to_flush = allunits->units_to_flush;
allunits->units_to_flush = NULL;
mutex_unlock(&unit_flush_lock);
if (local_to_flush == NULL)
return not_flushed;
LOG(THREAD, LOG_CACHE, 2, "flushing fragments in all pending units\n");
/* flush flag is private, and shared caches are synch-ed within
* pending_flush_units_in_cache, so no lock needed here
*/
if (was_I_flushed != NULL)
unit_flushed = FIFO_UNIT(was_I_flushed);
/* First we have to synch w/ all threads to avoid races w/ other threads
* manipulating fragments in these units at the same time that we are (e.g.,
* lazily deleting a trace head). Sure, the unit is not on the live
* list anymore, but the fragments are reachable.
*/
DEBUG_DECLARE(flushed =)
flush_fragments_synch_unlink_priv(dcontext, EMPTY_REGION_BASE,
EMPTY_REGION_SIZE,
false/*don't have thread_initexit_lock*/,
false/*not invalidating exec areas*/,
false/*don't force synchall*/
_IF_DGCDIAG(NULL));
ASSERT(flushed);
KSTART(cache_flush_unit_walk);
for (u = local_to_flush; u != NULL; u = u->next_local) {
/* not implemented for private units, since no reason to use flush there */
ASSERT(u->cache->is_shared);
/* unit is no longer to be considered live -- no other thread should
* use it as a live unit from this point on
*/
DODEBUG({ u->pending_flush = false; });
/* Indicate that individual fragments in this unit must not be put
* on free lists/FIFO empty slots.
* FIXME: would be nice to set this earlier when move to
* units_to_flush list, but then end up w/ freed fragments in the
* middle of the cache that we can't identify in our walk -- so we
* inefficiently put on free list and then take off in the walk.
*/
u->pending_free = true;
if (u == unit_flushed)
not_flushed = false;
list_add_head = chain_fragments_for_flush(dcontext, u, &list_add_tail);
if (list_head == NULL) {
list_head = list_add_head;
list_tail = list_add_tail;
} else if (list_add_head != NULL) {
list_tail->next_vmarea = list_add_head;
list_tail = list_add_tail;
}
ASSERT(list_tail == NULL /*list empty so far*/ ||
list_tail->next_vmarea == NULL);
STATS_INC(cache_units_flushed);
STATS_DEC(cache_units_toflush);
}
KSTOP(cache_flush_unit_walk);
/* it's ok if list_head is NULL */
/* do the rest of the unlink and move the fragments to pending del list */
flush_fragments_unlink_shared(dcontext, EMPTY_REGION_BASE, EMPTY_REGION_SIZE,
list_head _IF_DGCDIAG(NULL));
flushtime = flushtime_global;
flush_fragments_end_synch(dcontext, false/*don't keep initexit_lock*/);
for (u = local_to_flush; u != NULL; u = u->next_local) {
u->flushtime = flushtime;
LOG(THREAD, LOG_CACHE, 2,
"flushed fragments in unit "PFX"-"PFX" @flushtime %u\n",
u->start_pc, u->end_pc, u->flushtime);
}
append_units_to_free_list(local_to_flush);
return not_flushed;
}
void
fcache_free_pending_units(dcontext_t *dcontext, uint flushtime)
{
fcache_unit_t *u, *nxt;
mutex_lock(&unit_flush_lock);
for (u = allunits->units_to_free; u != NULL; u = nxt) {
nxt = u->next_local;
/* free list must be sorted in increasing flushtime */
ASSERT(nxt == NULL || u->flushtime <= nxt->flushtime);
if (u->flushtime <= flushtime) {
if (u == allunits->units_to_free_tail) {
ASSERT(u == allunits->units_to_free);
ASSERT(nxt == NULL);
allunits->units_to_free_tail = NULL;
}
allunits->units_to_free = nxt;
LOG(THREAD, LOG_CACHE, 2, "freeing flushed unit "PFX"-"PFX"\n",
u->start_pc, u->end_pc);
ASSERT(u->pending_free);
u->pending_free = false;
/* free_unit will set flushtime to 0 (needs it to assert locks) */
fcache_free_unit(dcontext, u, true);
STATS_INC(cache_units_flushed_freed);
STATS_DEC(cache_units_tofree);
} else
break; /* sorted! */
}
mutex_unlock(&unit_flush_lock);
}
/* Used to prevent shared units earmarked for freeing from being re-used.
* Caller must be at full synch for a flush.
*/
static void
fcache_mark_units_for_free(dcontext_t *dcontext, fcache_t *cache)
{
fcache_unit_t *u, *head;
ASSERT(is_self_flushing()); /* FIXME: want to assert in full synch */
PROTECT_CACHE(cache, lock);
/* Mark all units as pending_free to avoid fragment deletion from adding
* them to free lists.
* Also set flushtime and move them to the pending_free list.
*/
u = cache->units;
ASSERT(u != NULL);
/* leave one unit */
head = u->next_local;
cache->units->next_local = NULL;
for (u = head; u != NULL; u = u->next_local) {
u->pending_free = true;
u->flushtime = flushtime_global;
remove_unit_from_cache(u);
}
PROTECT_CACHE(cache, unlock);
append_units_to_free_list(head);
}
/* Flush all fragments and mark as many cache units as possible for freeing
* (while invalidate_code_cache() only flushes all fragments and does not try
* to free any units -- it is meant for consistency purposes, while this is
* meant for capacity purposes).
* FIXME: currently only marks shared cache units for freeing.
* FIXME: should add -stress_flush_units N parameter
*/
void
fcache_flush_all_caches()
{
dcontext_t *dcontext = get_thread_private_dcontext();
ASSERT(dcontext != NULL);
ASSERT_NOT_TESTED();
/* FIXME: share parameters w/ invalidate_code_cache()?
* FIXME: efficiency of region-based vs unit-based flushing
*/
flush_fragments_in_region_start(dcontext, UNIVERSAL_REGION_BASE,
UNIVERSAL_REGION_SIZE,
false /* don't own initexit_lock */,
true /* remove futures */,
false/*not invalidating exec areas*/,
false/*don't force synchall*/
_IF_DGCDIAG(NULL));
/* In presence of any shared fragments, all threads are stuck here at synch point,
* so we can mess w/ global cache units in an atomic manner wrt the flush.
* We can't do private here since threads are let go if no shared
* fragments are enabled, but better to have each thread mark its
* own anyway. FIXME -- put flag in delete-list entry
*/
if (DYNAMO_OPTION(shared_bbs)) {
fcache_mark_units_for_free(dcontext, shared_cache_bb);
}
if (DYNAMO_OPTION(shared_traces)) {
fcache_mark_units_for_free(dcontext, shared_cache_trace);
}
/* FIXME: for thread-private units, should use a trigger in
* vm_area_flush_fragments() to call a routine here that frees all but
* one unit
*/
flush_fragments_in_region_finish(dcontext, false /*don't keep initexit_lock*/);
STATS_INC(fcache_flush_all);
}
/***************************************************************************/
/* Flush all fragments from all caches and free all of those caches, starting
* over completely, by suspending all other threads and freeing all fragments
* and cache units immediately.
* Can only be called while !couldbelinking.
* Assumes caller holds reset_pending_lock.
* Simultaneous resets are not queued up -- one wins and the rest are canceled.
* Use the schedule_reset() routine to queue up resets of different types, which
* will all be combined.
* FIXME: currently target is ignored and assumed to be RESET_ALL
*/
void
fcache_reset_all_caches_proactively(uint target)
{
thread_record_t **threads;
dcontext_t *my_dcontext = get_thread_private_dcontext();
int i, num_threads;
const thread_synch_state_t desired_state =
THREAD_SYNCH_SUSPENDED_VALID_MCONTEXT_OR_NO_XFER;
/* Reset is meaningless for hotp_only and thin_client modes (case 8389),
* though it can be used to release old tables; but old tables aren't
* needed for hotp_only so they should be stored anyway, which is NYI
* today.
*/
ASSERT(DYNAMO_OPTION(enable_reset) && !RUNNING_WITHOUT_CODE_CACHE());
ASSERT(!is_self_couldbelinking());
/* synch with other threads also trying to call this routine */
/* FIXME: use a cleaner model than having callers grab this lock? */
ASSERT_OWN_MUTEX(true, &reset_pending_lock);
if (reset_in_progress) {
mutex_unlock(&reset_pending_lock);
return;
}
/* N.B.: we relax various synch checks if dynamo_resetting is true, since
* we will not be holding some locks we normally would need when
* deleting shared fragments, etc., assuming that we suspend all
* threads in DR while resetting -- if that ever changes we need to
* tighten up all those checks again!
*/
reset_in_progress = true;
/* this lock is only for synchronizing resets and we do not give it the
* rank it would need to be held across the whole routine
*/
mutex_unlock(&reset_pending_lock);
LOG(GLOBAL, LOG_CACHE, 2,
"\nfcache_reset_all_caches_proactively: thread "TIDFMT" suspending all threads\n",
get_thread_id());
/* Suspend all DR-controlled threads at safe locations.
* Case 6821: other synch-all-thread uses can be ignored, as none of them carry
* any non-persistent state.
*/
if (!synch_with_all_threads(desired_state, &threads, &num_threads,
/* when called prior to entering the cache we could set
* mcontext->pc to next_tag and use
* THREAD_SYNCH_VALID_MCONTEXT, but some callers (like
* nudge) do not satisfy that */
THREAD_SYNCH_NO_LOCKS_NO_XFER, /* Case 6821 */
/* if we fail to suspend a thread (e.g., for privilege
* reasons) just abort */
THREAD_SYNCH_SUSPEND_FAILURE_ABORT
/* if we get in a race with detach, or are having
* synch issues for whatever reason, bail out sooner
* rather than later */
| THREAD_SYNCH_SMALL_LOOP_MAX)) {
/* just give up */
reset_in_progress = false;
ASSERT(!OWN_MUTEX(&all_threads_synch_lock) && !OWN_MUTEX(&thread_initexit_lock));
ASSERT(threads == NULL);
ASSERT(!dynamo_all_threads_synched);
STATS_INC(fcache_reset_abort);
LOG(GLOBAL, LOG_CACHE, 2,
"fcache_reset_all_caches_proactively: aborting due to thread synch failure\n");
/* FIXME: may need DO_ONCE but only if we do a LOT of resets combined with
* other nudges or sources of thread permission problems */
SYSLOG_INTERNAL_WARNING("proactive reset aborted due to thread synch failure");
return;
}
/* now we own the thread_initexit_lock */
ASSERT(OWN_MUTEX(&all_threads_synch_lock) && OWN_MUTEX(&thread_initexit_lock));
STATS_INC(fcache_reset_proactively);
DOSTATS({
if (TEST(RESET_PENDING_DELETION, target))
STATS_INC(fcache_reset_pending_del);
});
DOLOG(1, LOG_STATS, {
LOG(GLOBAL, LOG_STATS, 1, "\n**************************Stats BEFORE reset:\n");
dump_global_stats(false);
});
LOG(GLOBAL, LOG_CACHE, 2,
"fcache_reset_all_caches_proactively: walking the threads\n");
DOSTATS({
SYSLOG_INTERNAL_INFO("proactive reset @ %d fragments", GLOBAL_STAT(num_fragments));
});
/* reset_free and reset_init may write to .data.
* All threads are suspended so no security risk.
*/
SELF_UNPROTECT_DATASEC(DATASEC_RARELY_PROT);
/* no lock needed */
dynamo_resetting = true;
/* We free everything before re-init so we can free all heap units.
* For everything to be freed, it must either be individually freed,
* or it must reside in non-persistent heap units,
* which will be thrown out wholesale in heap_reset_free(). The latter
* is preferable to not waste time on individual deletion.
* FIXME: add consistency check walks before and after for all modules
*/
for (i = 0; i < num_threads; i++) {
dcontext_t *dcontext = threads[i]->dcontext;
if (dcontext != NULL) { /* include my_dcontext here */
LOG(GLOBAL, LOG_CACHE, 2,
"\tconsidering thread #%d "TIDFMT"\n", i, threads[i]->id);
if (dcontext != my_dcontext) {
/* must translate BEFORE freeing any memory! */
if (is_thread_currently_native(threads[i])) {
/* native_exec's regain-control point is in our DLL,
* and lost-control threads are truly native, so no
* state to worry about except for hooks -- and we're
* not freeing the interception buffer.
*/
LOG(GLOBAL, LOG_CACHE, 2,
"\tcurrently native so no translation needed\n");
} else if (thread_synch_state_no_xfer(dcontext)) {
/* Case 6821: do not translate other synch-all-thread users.
* They have no persistent state, so leave alone.
* Also xref case 7760.
*/
LOG(GLOBAL, LOG_FRAGMENT, 2,
"\tat THREAD_SYNCH_NO_LOCKS_NO_XFER so no translation needed\n");
} else {
translate_from_synchall_to_dispatch(threads[i], desired_state);
}
}
last_exit_deleted(dcontext);
if (target == RESET_PENDING_DELETION) {
/* case 7394: need to abort other threads' trace building
* since the reset xfer to dispatch will disrupt it
*/
if (is_building_trace(dcontext)) {
LOG(THREAD, LOG_FRAGMENT, 2,
"\tsquashing trace of thread "TIDFMT"\n", i);
trace_abort(dcontext);
}
} else {
LOG(GLOBAL, LOG_CACHE, 2, "\tfreeing memory in thread "TIDFMT"\n", i);
LOG(THREAD, LOG_CACHE, 2, "------- reset for thread "TIDFMT" -------\n",
threads[i]->id);
/* N.B.: none of these can assume the executing thread is the
* dcontext owner, esp. wrt tls!
*/
/* FIXME: now we have {thread_,}init(), {thread_,}exit(), and
* *_reset() -- can we systematically construct these lists of
* module calls? The list here, though, is a subset of the others.
*/
/* monitor must go first to remove any undeletable private fragments */
monitor_thread_reset_free(dcontext);
fragment_thread_reset_free(dcontext);
fcache_thread_reset_free(dcontext);
/* arch and os data is all persistent */
vm_areas_thread_reset_free(dcontext);
/* now we throw out all non-persistent private heap units */
heap_thread_reset_free(dcontext);
}
}
}
if (target == RESET_PENDING_DELETION) {
/* FIXME: optimization: suspend only those threads with low flushtimes */
LOG(GLOBAL, LOG_CACHE, 2,
"fcache_reset_all_caches_proactively: clearing shared deletion list\n");
/* free entire shared deletion list */
vm_area_check_shared_pending(GLOBAL_DCONTEXT, NULL);
} else {
fragment_reset_free();
link_reset_free();
fcache_reset_free();
/* monitor only has thread-private data */
/* arch and os data is all persistent */
vm_areas_reset_free();
# ifdef HOT_PATCHING_INTERFACE
hotp_reset_free();
# endif
/* now we throw out all non-persistent global heap units */
heap_reset_free();
LOG(GLOBAL, LOG_CACHE, 2,
"fcache_reset_all_caches_proactively: re-initializing\n");
/* now set up state all over again */
heap_reset_init();
# ifdef HOT_PATCHING_INTERFACE
hotp_reset_init();
# endif
vm_areas_reset_init();
fcache_reset_init();
link_reset_init();
fragment_reset_init();
for (i = 0; i < num_threads; i++) {
dcontext_t *dcontext = threads[i]->dcontext;
if (dcontext != NULL) { /* include my_dcontext here */
LOG(GLOBAL, LOG_CACHE, 2,
"fcache_reset_all_caches_proactively: re-initializing thread "TIDFMT"\n", i);
/* now set up private state all over again -- generally, we can do
* this before the global free/init since our private/global
* free/init are completely separate (due to the presence of
* persistent state we cannot do a global quick-free anyway).
* But when using shared IBL tables, fragment_reset_init() must
* be called before fragment_thread_reset_init(), since the
* latter copies global state initialized by the former. To
* simplify the code, we simply init all global state prior to
* initing private state (xref case 8092).
*/
heap_thread_reset_init(dcontext);
vm_areas_thread_reset_init(dcontext);
monitor_thread_reset_init(dcontext);
fcache_thread_reset_init(dcontext);
fragment_thread_reset_init(dcontext);
}
}
}
/* we assume these are atomic and need no locks */
dynamo_resetting = false;
/* new resets will now queue up on all_threads_synch_lock */
reset_in_progress = false;
SELF_PROTECT_DATASEC(DATASEC_RARELY_PROT);
DOLOG(1, LOG_STATS, {
LOG(GLOBAL, LOG_STATS, 1, "\n**************************Stats AFTER reset:\n");
dump_global_stats(false);
});
LOG(GLOBAL, LOG_CACHE, 2,
"fcache_reset_all_caches_proactively: resuming all threads\n");
end_synch_with_all_threads(threads, num_threads, true/*resume*/);
}
/* returns true if specified target wasn't already scheduled for reset */
bool
schedule_reset(uint target)
{
bool added_target;
ASSERT(target != 0);
mutex_lock(&reset_pending_lock);
added_target = !TESTALL(target, reset_pending);
reset_pending |= target;
mutex_unlock(&reset_pending_lock);
return added_target;
}
#if 0 /* currently not used see note in fcache_low_on_memory */
static void
fcache_reset_cache(dcontext_t *dcontext, fcache_t *cache)
{
fcache_unit_t *u, *prevu;
fragment_t *f;
cache_pc pc, last_pc;
bool unit_empty;
uint num_units = 0;
uint sz;
/* FIXME: this is called when low on memory: safe to grab lock? */
PROTECT_CACHE(cache, lock);
LOG(THREAD, LOG_CACHE, 2, "fcache_reset_cache %s\n", cache->name);
/* we need to free entire units, so don't walk FIFO */
for (u = cache->units; u != NULL; u = u->next_local) {
LOG(THREAD, LOG_CACHE, 3, " unit %d: "PFX" -> cur "PFX"\n",
num_units, u->start_pc, u->cur_pc);
num_units++;
/* try to delete everybody we can */
unit_empty = true;
pc = u->start_pc;
last_pc = pc;
while (pc < u->cur_pc) {
LOG(THREAD, LOG_CACHE, 4, " f @ "PFX"\n", pc);
f = *((fragment_t **)pc);
ASSERT(f != NULL);
ASSERT(FIFO_UNIT(f) == u);
/* go to contiguously-next fragment_t in cache */
sz = FRAG_SIZE(f);
/* FIXME: do we still need the notion of undeletable fragments?
* Should do an analysis and see if we ever use it anymore.
* It is a powerful feature to support, but also a limiting one...
*/
if (TEST(FRAG_CANNOT_DELETE, f->flags)) {
unit_empty = false;
/* FIXME: this allocates memory for the empty_slot_t data struct! */
fifo_prepend_empty(dcontext, cache, u, NULL, last_pc, pc - last_pc);
STATS_FCACHE_SUB(cache, used, (pc - last_pc));
last_pc = pc + sz;
} else {
/* FIXME: in low-memory situation, will we have problem
* with the future fragment that will be created?
* Even worse, what if it triggers a resize of its hashtable?
* Since we're deleting everyone, we should set some flag saying
* "don't create any future fragment for this deleted fragment"
*/
force_fragment_from_cache(dcontext, cache, f);
}
pc += sz;
}
if (last_pc < u->cur_pc) {
fifo_prepend_empty(dcontext, cache, u, NULL, last_pc, pc - last_pc);
STATS_FCACHE_SUB(cache, used, (pc - last_pc));
}
if (unit_empty) {
/* hack to indicate empty -- we restore end_pc in next loop */
u->end_pc = NULL;
}
}
for (prevu = NULL, u = cache->units; u != NULL; prevu = u, u = u->next_local) {
if (u->end_pc == NULL) {
u->end_pc = u->start_pc + u->size;
/* have to leave one unit */
if (num_units > 1) {
num_units--;
fcache_free_unit(dcontext, u, true);
if (prevu == NULL)
u = cache->units;
else
u = prevu->next_local;
}
}
}
/* FIXME: try to shrink remaining unit(s)? Would we do that by freeing
* just the tail of the unit?
*/
PROTECT_CACHE(cache, unlock);
}
#endif /* #if 0 */
/* This routine has to assume it cannot allocate memory.
* Always safe to free free list (using lock).
* Other allocations must be freed only in the current thread:
* cannot get list of all threads for a global approach b/c that requires memory.
* We let the other threads trip over the low memory trigger to flush their own
* caches.
*/
void
fcache_low_on_memory()
{
fcache_unit_t *u, *next_u;
size_t freed = 0;
#if 0
dcontext_t *dcontext = get_thread_private_dcontext();
fcache_thread_units_t *tu = (fcache_thread_units_t *) dcontext->fcache_field;
/* FIXME: we cannot reset the cache at arbitrary points -- and we can
* be called at any alloc point! If in middle of fragment creation,
* we can't just go delete the fragment!
* STRATEGY:
* keep a reserved piece of heap per thread that's big enough to get to
* a safe point from any DR allocation site (perhaps it should use
* a stack allocator). We keep going, using that for memory
* (have to work out shared vs private memory issues if building a
* shared bb), and then at a safe point we reset the cache.
*/
/* free the current thread's entire cache, leaving one empty unit */
if (tu->bb != NULL)
fcache_reset_cache(dcontext, tu->bb);
if (tu->trace != NULL)
fcache_reset_cache(dcontext, tu->trace);
#endif
/* now free the entire dead list (including thread units just moved here) */
LOG(GLOBAL, LOG_CACHE|LOG_STATS, 1,
"fcache_low_on_memory: about to free dead list units\n");
/* WARNING: this routine is called at arbitrary allocation failure points,
* so we have to be careful what locks we grab.
* No allocation site can hold a lock weaker in rank than heap_unit_lock,
* b/c it could deadlock on the allocation itself! So we only have to worry
* about the locks of rank between heap_alloc_lock and allunits_lock --
* currently dynamo_areas, fcache_unit_areas and global_alloc_lock. We
* check for those locks here. FIXME we have no way to check if holding
* a readlock on the dynamo/fcache_unit_areas lock. FIXME owning the
* dynamo_areas lock here is prob. not that uncommon, we may be able to
* release and re-grab it but would have to be sure that works in all the
* corner cases (if the failing alloc is for a dynamo_areas vector resize
* etc.).
*/
if (lockwise_safe_to_allocate_memory() &&
!self_owns_dynamo_vm_area_lock() &&
!self_owns_write_lock(&fcache_unit_areas->lock)) {
mutex_lock(&allunits_lock);
u = allunits->dead;
while (u != NULL) {
next_u = u->next_global;
freed += u->size;
fcache_really_free_unit(u, true/*on dead list*/, true/*dealloc*/);
u = next_u;
}
allunits->dead = NULL;
mutex_unlock(&allunits_lock);
LOG(GLOBAL, LOG_CACHE|LOG_STATS, 1,
"fcache_low_on_memory: freed %d KB\n", freed/1024);
} else {
LOG(GLOBAL, LOG_CACHE|LOG_STATS, 1,
"fcache_low_on_memory: cannot walk units b/c of deadlock potential\n");
}
options_make_writable();
/* be more aggressive about not resizing cache
* FIXME: I just made this up -- have param to control?
* FIXME: restore params back to original values at some point?
*/
if (dynamo_options.finite_bb_cache && dynamo_options.cache_bb_replace > 0) {
dynamo_options.cache_bb_regen *= 2;
if (dynamo_options.cache_bb_regen > dynamo_options.cache_bb_replace)
dynamo_options.cache_bb_regen = 4*dynamo_options.cache_bb_replace/5;
}
if (dynamo_options.finite_shared_bb_cache &&
dynamo_options.cache_shared_bb_replace > 0) {
dynamo_options.cache_shared_bb_regen *= 2;
if (dynamo_options.cache_shared_bb_regen >
dynamo_options.cache_shared_bb_replace) {
dynamo_options.cache_shared_bb_regen =
4*dynamo_options.cache_shared_bb_replace/5;
}
}
if (dynamo_options.finite_shared_trace_cache &&
dynamo_options.cache_shared_trace_replace > 0) {
dynamo_options.cache_shared_trace_regen *= 2;
if (dynamo_options.cache_shared_trace_regen >
dynamo_options.cache_shared_trace_replace) {
dynamo_options.cache_shared_trace_regen =
4*dynamo_options.cache_shared_trace_replace/5;
}
}
/* FIXME: be more or less aggressive about traces than bbs?
* could get rid of trace cache altogether...
*/
if (dynamo_options.finite_trace_cache && dynamo_options.cache_trace_replace > 0) {
dynamo_options.cache_trace_regen *= 2;
if (dynamo_options.cache_trace_regen > dynamo_options.cache_trace_replace)
dynamo_options.cache_trace_regen = 4*dynamo_options.cache_trace_replace/5;
}
options_restore_readonly();
}
/***************************************************************************
* COARSE-GRAIN UNITS
*/
/* Returns NULL if pc is not an address contained in a coarse fcache unit */
coarse_info_t *
get_fcache_coarse_info(cache_pc pc)
{
fcache_unit_t *unit = fcache_lookup_unit(pc);
if (unit == NULL || unit->cache == NULL)
return NULL;
ASSERT((unit->cache->is_coarse && unit->cache->coarse_info != NULL) ||
(!unit->cache->is_coarse && unit->cache->coarse_info == NULL));
return unit->cache->coarse_info;
}
void
fcache_coarse_cache_delete(dcontext_t *dcontext, coarse_info_t *info)
{
fcache_t *cache;
ASSERT(info != NULL);
ASSERT_OWN_MUTEX(!info->is_local, &info->lock);
cache = (fcache_t *) info->cache;
if (cache == NULL) /* lazily initialized, so common to have empty units */
return;
/* We don't PROTECT_CACHE(cache, lock) to avoid rank order w/ coarse info lock.
* We assume that deletion can only happen for local cache or at reset/exit.
*/
DODEBUG({ cache->is_local = true; });
fcache_cache_free(dcontext, cache, !info->frozen/*do not free frozen unit*/);
info->cache = NULL;
/* We unlink any outgoing links by walking the stubs, not walking the units,
* so nothing else to do here
*/
}
/* Returns an upper bound on the size needed for the cache if info is frozen. */
size_t
coarse_frozen_cache_size(dcontext_t *dcontext, coarse_info_t *info)
{
fcache_t *cache;
ASSERT(info != NULL);
ASSERT_OWN_MUTEX(true, &info->lock);
cache = (fcache_t *) info->cache;
if (cache == NULL)
return 0;
/* we ignore any shrinking from eliding fall-through ubrs
* or conversion to 8-bit-jmps
*/
/* cache->size is simply committed size, so subtract unused committed
* at end of last unit; we ignore small unused space at end of each unit.
*/
return cache->size - (cache->units->end_pc - cache->units->cur_pc);
}
/* Assumes that no cache lock is needed because info is newly created
* and unknown to all but this thread.
*/
void
fcache_coarse_init_frozen(dcontext_t *dcontext, coarse_info_t *info,
cache_pc start_pc, size_t size)
{
fcache_t *cache = fcache_cache_init(GLOBAL_DCONTEXT, FRAG_SHARED | FRAG_COARSE_GRAIN,
false/*no initial unit*/);
/* We don't PROTECT_CACHE(cache, lock) to avoid rank order w/ coarse info lock,
* assuming that info is newly created and unknown to all but this thread.
* (For freezing we also have dynamo_all_threads_synched, but we don't for
* loading in persisted caches.)
*/
DODEBUG({ cache->is_local = true; });
cache->units = fcache_create_unit(dcontext, cache, start_pc, size);
DODEBUG({ cache->is_local = false; });
cache->units->cur_pc = cache->units->end_pc;
cache->units->full = true;
cache->coarse_info = info;
info->cache = cache;
}
/* Used when swapping info structs for in-place freezing */
void
fcache_coarse_set_info(dcontext_t *dcontext, coarse_info_t *info)
{
fcache_t *cache;
ASSERT(info != NULL);
ASSERT_OWN_MUTEX(true, &info->lock);
cache = (fcache_t *) info->cache;
cache->coarse_info = info;
}