blob: a0d35fb7420400e4d766657543d1ae03899930df [file] [log] [blame]
/* **********************************************************
* Copyright (c) 2011-2014 Google, Inc. All rights reserved.
* Copyright (c) 2000-2010 VMware, Inc. All rights reserved.
* **********************************************************/
/*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* * Neither the name of VMware, Inc. nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
/* Copyright (c) 2003-2007 Determina Corp. */
/* Copyright (c) 2001-2003 Massachusetts Institute of Technology */
/* Copyright (c) 2000-2001 Hewlett-Packard Company */
/*
* fragment.c - fragment related routines
*/
#include "globals.h"
#include "link.h"
#include "fragment.h"
#include "fcache.h"
#include "emit.h"
#include "monitor.h"
#include <string.h> /* for memset */
#include "instrument.h"
#include <stddef.h> /* for offsetof */
#include <limits.h> /* UINT_MAX */
#include "perscache.h"
#include "synch.h"
#ifdef UNIX
# include "nudge.h"
#endif
/* FIXME: make these runtime parameters */
#define INIT_HTABLE_SIZE_SHARED_BB (DYNAMO_OPTION(coarse_units) ? 5 : 10)
#define INIT_HTABLE_SIZE_SHARED_TRACE 10
/* the only private bbs will be selfmod, so start small */
#define INIT_HTABLE_SIZE_BB (DYNAMO_OPTION(shared_bbs) ? 5 : 10)
/* coarse-grain fragments do not use futures */
#define INIT_HTABLE_SIZE_SHARED_FUTURE (DYNAMO_OPTION(coarse_units) ? 5 : 10)
#ifdef RETURN_AFTER_CALL
/* we have small per-module hashtables */
# define INIT_HTABLE_SIZE_AFTER_CALL 5
#endif
/* private futures are only used when we have private fragments */
#define INIT_HTABLE_SIZE_FUTURE \
((DYNAMO_OPTION(shared_bbs) && DYNAMO_OPTION(shared_traces)) ? 5 : 9)
/* per-module htables */
#define INIT_HTABLE_SIZE_COARSE 5
#define INIT_HTABLE_SIZE_COARSE_TH 4
#ifdef RCT_IND_BRANCH
# include "rct.h"
/* we have small per-module hashtables */
# define INIT_HTABLE_SIZE_RCT_IBT 7
# ifndef RETURN_AFTER_CALL
# error RCT_IND_BRANCH requires RETURN_AFTER_CALL since it reuses data types
# endif
#endif
/* if shared traces, we currently have no private traces so make table tiny
* FIMXE: should start out w/ no table at all
*/
#define INIT_HTABLE_SIZE_TRACE (DYNAMO_OPTION(shared_traces) ? 6 : 9)
/* for small table sizes resize is not an expensive operation and we start smaller */
/* Current flusher, protected by thread_initexit_lock. */
DECLARE_FREQPROT_VAR(static dcontext_t *flusher, NULL);
/* Current allsynch-flusher, protected by thread_initexit_lock. */
DECLARE_FREQPROT_VAR(static dcontext_t *allsynch_flusher, NULL);
/* These global tables are kept on the heap for selfprot (case 7957) */
/* synchronization to these tables is accomplished via read-write locks,
* where the writers are removal and resizing -- addition is atomic to
* readers.
* for now none of these are read from ibl routines so we only have to
* synch with other DR routines
*/
static fragment_table_t *shared_bb;
static fragment_table_t *shared_trace;
/* if we have either shared bbs or shared traces we need this shared: */
static fragment_table_t *shared_future;
/* Thread-shared tables are allocated in a shared per_thread_t.
* The structure is also used if we're dumping shared traces.
* Kept on the heap for selfprot (case 7957)
*/
static per_thread_t *shared_pt;
#define USE_SHARED_PT() (SHARED_IBT_TABLES_ENABLED() || \
(TRACEDUMP_ENABLED() && DYNAMO_OPTION(shared_traces)))
/* We keep track of "old" IBT target tables in a linked list and
* deallocate them in fragment_exit(). */
/* FIXME Deallocate tables more aggressively using a distributed, refcounting
* algo as is used for shared deletion. */
typedef struct _dead_fragment_table_t {
fragment_entry_t *table_unaligned;
uint table_flags;
uint capacity;
uint ref_count;
struct _dead_fragment_table_t *next;
} dead_fragment_table_t;
/* We keep these list pointers on the heap for selfprot (case 8074). */
typedef struct _dead_table_lists_t {
dead_fragment_table_t *dead_tables;
dead_fragment_table_t *dead_tables_tail;
} dead_table_lists_t;
static dead_table_lists_t *dead_lists;
DECLARE_CXTSWPROT_VAR(static mutex_t dead_tables_lock, INIT_LOCK_FREE(dead_tables_lock));
#ifdef RETURN_AFTER_CALL
/* High level lock for an atomic lookup+add operation on the
* after call tables. */
DECLARE_CXTSWPROT_VAR(static mutex_t after_call_lock, INIT_LOCK_FREE(after_call_lock));
/* We use per-module tables and only need this table for non-module code;
* on Linux though this is the only table used, until we have a module list.
*/
static rct_module_table_t rac_non_module_table;
#endif
/* allows independent sequences of flushes and delayed deletions,
* though with -syscalls_synch_flush additions we now hold this
* throughout a flush.
*/
DECLARE_CXTSWPROT_VAR(mutex_t shared_cache_flush_lock,
INIT_LOCK_FREE(shared_cache_flush_lock));
/* Global count of flushes, used as a timestamp for shared deletion.
* Reads may be done w/o a lock, but writes can only be done
* via increment_global_flushtime() while holding shared_cache_flush_lock.
*/
DECLARE_FREQPROT_VAR(uint flushtime_global, 0);
#ifdef CLIENT_INTERFACE
DECLARE_CXTSWPROT_VAR(mutex_t client_flush_request_lock,
INIT_LOCK_FREE(client_flush_request_lock));
DECLARE_CXTSWPROT_VAR(client_flush_req_t *client_flush_requests, NULL);
#endif
#if defined(RCT_IND_BRANCH) && defined(UNIX)
/* On Win32 we use per-module tables; on Linux we use a single global table,
* until we have a module list.
*/
rct_module_table_t rct_global_table;
#endif
#define NULL_TAG ((app_pc)PTR_UINT_0)
/* FAKE_TAG is used as a deletion marker for unlinked entries */
#define FAKE_TAG ((app_pc)PTR_UINT_MINUS_1)
/* instead of an empty hashtable slot containing NULL, we fill it
* with a pointer to this constant fragment, which we give a tag
* of 0.
* PR 305731: rather than having a start_pc of 0, which causes
* an app targeting 0 to crash at 0, we point at a handler that
* sends the app to an ibl miss.
*/
byte * hashlookup_null_target;
#define HASHLOOKUP_NULL_START_PC ((cache_pc)hashlookup_null_handler)
static const fragment_t null_fragment = { NULL_TAG, 0, 0, 0, 0,
HASHLOOKUP_NULL_START_PC, };
/* to avoid range check on fast path using an end of table sentinel fragment */
static const fragment_t sentinel_fragment = { NULL_TAG, 0, 0, 0, 0,
HASHLOOKUP_SENTINEL_START_PC, };
/* Shared fragment IBTs: We need to preserve the open addressing traversal
* in the hashtable while marking a table entry as unlinked.
* A null_fragment won't work since it terminates the traversal,
* so we use an unlinked marker. The lookup table entry for
* an unlinked entry *always* has its start_pc_fragment set to
* an IBL target_delete entry.
*/
static const fragment_t unlinked_fragment = { FAKE_TAG, };
/* macro used in the code from time of deletion markers */
/* Shared fragment IBTs: unlinked_fragment isn't a real fragment either. So they
* are naturally deleted during a table resize. */
#define REAL_FRAGMENT(fragment) \
((fragment) != &null_fragment && \
(fragment) != &unlinked_fragment && \
(fragment) != &sentinel_fragment)
#define GET_PT(dc) ((dc) == GLOBAL_DCONTEXT ? (USE_SHARED_PT() ? shared_pt : NULL) :\
(per_thread_t *) (dc)->fragment_field)
#define TABLE_PROTECTED(ptable) \
(!TABLE_NEEDS_LOCK(ptable) || READWRITE_LOCK_HELD(&(ptable)->rwlock))
/* everything except the invisible table is in here */
#define GET_FTABLE_HELPER(pt, flags, otherwise) \
(TEST(FRAG_IS_TRACE, (flags)) ? \
(TEST(FRAG_SHARED, (flags)) ? shared_trace : &pt->trace) : \
(TEST(FRAG_SHARED, (flags)) ? \
(TEST(FRAG_IS_FUTURE, (flags)) ? shared_future : shared_bb) : \
(TEST(FRAG_IS_FUTURE, (flags)) ? &pt->future : \
(otherwise))))
#define GET_FTABLE(pt, flags) GET_FTABLE_HELPER(pt, (flags), &pt->bb)
/* indirect branch table per target type (bb vs trace) and indirect branch type */
#define GET_IBT_TABLE(pt, flags, branch_type) \
(TEST(FRAG_IS_TRACE, (flags)) ? \
(DYNAMO_OPTION(shared_trace_ibt_tables) ? \
&shared_pt->trace_ibt[(branch_type)] : \
&(pt)->trace_ibt[(branch_type)]) : \
(DYNAMO_OPTION(shared_bb_ibt_tables) ? \
&shared_pt->bb_ibt[(branch_type)] : \
&(pt)->bb_ibt[(branch_type)]))
/********************************** STATICS ***********************************/
static uint fragment_heap_size(uint flags, int direct_exits, int indirect_exits);
static void fragment_free_future(dcontext_t *dcontext, future_fragment_t *fut);
#if defined(RETURN_AFTER_CALL) || defined(RCT_IND_BRANCH)
static void
coarse_persisted_fill_ibl(dcontext_t *dcontext, coarse_info_t *info,
ibl_branch_type_t branch_type);
#endif
#ifdef CLIENT_INTERFACE
static void
process_client_flush_requests(dcontext_t *dcontext, dcontext_t *alloc_dcontext,
client_flush_req_t *req, bool flush);
#endif
#if defined(INTERNAL) || defined(CLIENT_INTERFACE)
/* trace logging and synch for shared trace file: */
DECLARE_CXTSWPROT_VAR(static mutex_t tracedump_mutex, INIT_LOCK_FREE(tracedump_mutex));
DECLARE_FREQPROT_VAR(static stats_int_t tcount, 0); /* protected by tracedump_mutex */
static void exit_trace_file(per_thread_t *pt);
static void output_trace(dcontext_t *dcontext, per_thread_t *pt,
fragment_t *f, stats_int_t deleted_at);
static void init_trace_file(per_thread_t *pt);
#endif
#define SHOULD_OUTPUT_FRAGMENT(flags) \
(TEST(FRAG_IS_TRACE, (flags)) && \
!TEST(FRAG_TRACE_OUTPUT, (flags)) && \
TRACEDUMP_ENABLED())
#define FRAGMENT_COARSE_WRAPPER_FLAGS \
FRAG_FAKE | FRAG_SHARED | FRAG_COARSE_GRAIN | \
FRAG_LINKED_OUTGOING | FRAG_LINKED_INCOMING
/* We use temporary fragment_t + linkstub_t structs to more easily
* use existing code when emitting coarse-grain fragments.
* Only 1-ind-exit or 1 or 2 dir exit bbs can be coarse-grain.
* The bb_building_lock protects use of this.
*/
DECLARE_FREQPROT_VAR(
static struct {
fragment_t f;
union {
struct {
direct_linkstub_t dir_exit_1;
direct_linkstub_t dir_exit_2;
} dir_exits;
indirect_linkstub_t ind_exit;
} exits;
} coarse_emit_fragment, {{0}});
#ifdef SHARING_STUDY
/***************************************************************************
* fragment_t sharing study
* Only used with -fragment_sharing_study
* When the option is off we go ahead and waste the 4 static vars
* below so we don't have to have a define and separate build.
*/
typedef struct _thread_list_t {
uint thread_num;
uint count;
struct _thread_list_t *next;
} thread_list_t;
typedef struct _shared_entry_t {
app_pc tag;
uint num_threads;
thread_list_t *threads;
uint heap_size;
uint cache_size;
struct _shared_entry_t *next;
} shared_entry_t;
# define SHARED_HASH_BITS 16
static shared_entry_t ** shared_blocks;
DECLARE_CXTSWPROT_VAR(static mutex_t shared_blocks_lock,
INIT_LOCK_FREE(shared_blocks_lock));
static shared_entry_t ** shared_traces;
DECLARE_CXTSWPROT_VAR(static mutex_t shared_traces_lock,
INIT_LOCK_FREE(shared_traces_lock));
/* assumes caller holds table's lock! */
static shared_entry_t *
shared_block_lookup(shared_entry_t **table, fragment_t *f)
{
shared_entry_t *e;
uint hindex;
hindex = HASH_FUNC_BITS((ptr_uint_t)f->tag, SHARED_HASH_BITS);
/* using collision chains */
for (e = table[hindex]; e != NULL; e = e->next) {
if (e->tag == f->tag) {
return e;
}
}
return NULL;
}
static void
reset_shared_block_table(shared_entry_t **table, mutex_t *lock)
{
shared_entry_t *e, *nxte;
uint i;
uint size = HASHTABLE_SIZE(SHARED_HASH_BITS);
mutex_lock(lock);
for (i = 0; i < size; i++) {
for (e = table[i]; e != NULL; e = nxte) {
thread_list_t *tl = e->threads;
thread_list_t *tlnxt;
nxte = e->next;
while (tl != NULL) {
tlnxt = tl->next;
global_heap_free(tl, sizeof(thread_list_t) HEAPACCT(ACCT_OTHER));
tl = tlnxt;
}
global_heap_free(e, sizeof(shared_entry_t) HEAPACCT(ACCT_OTHER));
}
}
global_heap_free(table, size*sizeof(shared_entry_t*) HEAPACCT(ACCT_OTHER));
mutex_unlock(lock);
}
static void
add_shared_block(shared_entry_t **table, mutex_t *lock, fragment_t *f)
{
shared_entry_t *e;
uint hindex;
int num_direct = 0, num_indirect = 0;
linkstub_t *l = FRAGMENT_EXIT_STUBS(f);
/* use num to avoid thread_id_t recycling problems */
uint tnum = get_thread_num(get_thread_id());
mutex_lock(lock);
e = shared_block_lookup(table, f);
if (e != NULL) {
thread_list_t *tl = e->threads;
for (; tl != NULL; tl = tl->next) {
if (tl->thread_num == tnum) {
tl->count++;
LOG(GLOBAL, LOG_ALL, 2,
"add_shared_block: tag "PFX", but re-add #%d for thread #%d\n",
e->tag, tl->count, tnum);
mutex_unlock(lock);
return;
}
}
tl = global_heap_alloc(sizeof(thread_list_t) HEAPACCT(ACCT_OTHER));
tl->thread_num = tnum;
tl->count = 1;
tl->next = e->threads;
e->threads = tl;
e->num_threads++;
LOG(GLOBAL, LOG_ALL, 2, "add_shared_block: tag "PFX" thread #%d => %d threads\n",
e->tag, tnum, e->num_threads);
mutex_unlock(lock);
return;
}
/* get num stubs to find heap size */
for (; l != NULL; l = LINKSTUB_NEXT_EXIT(l)) {
if (LINKSTUB_DIRECT(l->flags))
num_direct++;
else {
ASSERT(LINKSTUB_INDIRECT(l->flags));
num_indirect++;
}
}
/* add entry to thread hashtable */
e = (shared_entry_t *) global_heap_alloc(sizeof(shared_entry_t) HEAPACCT(ACCT_OTHER));
e->tag = f->tag;
e->num_threads = 1;
e->heap_size = fragment_heap_size(f->flags, num_direct, num_indirect);
e->cache_size = (f->size + f->fcache_extra);
e->threads = global_heap_alloc(sizeof(thread_list_t) HEAPACCT(ACCT_OTHER));
e->threads->thread_num = tnum;
e->threads->count = 1;
e->threads->next = NULL;
LOG(GLOBAL, LOG_ALL, 2, "add_shared_block: tag "PFX", heap %d, cache %d, thread #%d\n",
e->tag, e->heap_size, e->cache_size, e->threads->thread_num);
hindex = HASH_FUNC_BITS((ptr_uint_t)f->tag, SHARED_HASH_BITS);
e->next = table[hindex];
table[hindex] = e;
mutex_unlock(lock);
}
static void
print_shared_table_stats(shared_entry_t **table, mutex_t *lock, const char *name)
{
uint i;
shared_entry_t *e;
uint size = HASHTABLE_SIZE(SHARED_HASH_BITS);
uint tot = 0, shared_tot = 0, shared = 0, heap = 0, cache = 0,
creation_count = 0;
mutex_lock(lock);
for (i = 0; i < size; i++) {
for (e = table[i]; e != NULL; e = e->next) {
thread_list_t *tl = e->threads;
tot++;
shared_tot += e->num_threads;
for (; tl != NULL; tl = tl->next)
creation_count += tl->count;
if (e->num_threads > 1) {
shared++;
/* assume similar size for each thread -- cache padding
* only real difference
*/
heap += (e->heap_size * e->num_threads);
cache += (e->cache_size * e->num_threads);
}
}
}
mutex_unlock(lock);
LOG(GLOBAL, LOG_ALL, 1, "Shared %s statistics:\n", name);
LOG(GLOBAL, LOG_ALL, 1, "\ttotal blocks: %10d\n", tot);
LOG(GLOBAL, LOG_ALL, 1, "\tcreation count: %10d\n", creation_count);
LOG(GLOBAL, LOG_ALL, 1, "\tshared count: %10d\n", shared_tot);
LOG(GLOBAL, LOG_ALL, 1, "\tshared blocks: %10d\n", shared);
LOG(GLOBAL, LOG_ALL, 1, "\tshared heap: %10d\n", heap);
LOG(GLOBAL, LOG_ALL, 1, "\tshared cache: %10d\n", cache);
}
void
print_shared_stats()
{
print_shared_table_stats(shared_blocks, &shared_blocks_lock, "basic block");
print_shared_table_stats(shared_traces, &shared_traces_lock, "trace");
}
#endif /* SHARING_STUDY ***************************************************/
#ifdef FRAGMENT_SIZES_STUDY /*****************************************/
#include <math.h>
/* don't bother to synchronize these */
static int bb_sizes[200000];
static int trace_sizes[40000];
static int num_bb = 0;
static int num_traces = 0;
void
record_fragment_size(int size, bool is_trace)
{
if (is_trace) {
trace_sizes[num_traces] = size;
num_traces++;
ASSERT(num_traces < 40000);
} else {
bb_sizes[num_bb] = size;
num_bb++;
ASSERT(num_bb < 200000);
}
}
void
print_size_results()
{
LOG(GLOBAL, LOG_ALL, 1, "Basic block sizes (bytes):\n");
print_statistics(bb_sizes, num_bb);
LOG(GLOBAL, LOG_ALL, 1, "Trace sizes (bytes):\n");
print_statistics(trace_sizes, num_traces);
}
#endif /* FRAGMENT_SIZES_STUDY */ /*****************************************/
#define FRAGTABLE_WHICH_HEAP(flags) \
(TESTALL(FRAG_TABLE_INCLUSIVE_HIERARCHY | FRAG_TABLE_IBL_TARGETED, \
(flags)) ? ACCT_IBLTABLE : ACCT_FRAG_TABLE)
#ifdef HASHTABLE_STATISTICS
# define UNPROT_STAT(stats) unprot_stats->stats
/* FIXME: either put in nonpersistent heap as appropriate, or
* preserve across resets
*/
# define ALLOC_UNPROT_STATS(dcontext, table) do { \
(table)->unprot_stats = \
HEAP_TYPE_ALLOC((dcontext), unprot_ht_statistics_t, \
FRAGTABLE_WHICH_HEAP((table)->table_flags), \
UNPROTECTED); \
memset((table)->unprot_stats, 0, sizeof(unprot_ht_statistics_t)); \
} while (0)
# define DEALLOC_UNPROT_STATS(dcontext, table) \
HEAP_TYPE_FREE((dcontext), (table)->unprot_stats, unprot_ht_statistics_t, \
FRAGTABLE_WHICH_HEAP((table)->table_flags), UNPROTECTED)
# define CHECK_UNPROT_STATS(table) ASSERT(table.unprot_stats != NULL)
static void
check_stay_on_trace_stats_overflow(dcontext_t *dcontext, ibl_branch_type_t branch_type)
{
per_thread_t *pt = (per_thread_t *) dcontext->fragment_field;
hashtable_statistics_t *lookup_stats = &pt->trace_ibt[branch_type].unprot_stats->
trace_ibl_stats[branch_type];
if (lookup_stats->ib_stay_on_trace_stat < lookup_stats->ib_stay_on_trace_stat_last) {
lookup_stats->ib_stay_on_trace_stat_ovfl++;
}
lookup_stats->ib_stay_on_trace_stat_last = lookup_stats->ib_stay_on_trace_stat;
/* FIXME: ib_trace_last_ibl_exit should have an overflow check as well */
}
#endif /* HASHTABLE_STATISTICS */
/* init/update the tls slots storing this table's mask and lookup base
* N.B.: for thread-shared the caller must call for each thread
*/
/* currently we don't support a mixture */
static inline void
update_lookuptable_tls(dcontext_t *dcontext, ibl_table_t *table)
{
/* use dcontext->local_state, rather than get_local_state(), to support
* being called from other threads!
*/
local_state_extended_t *state =
(local_state_extended_t *) dcontext->local_state;
ASSERT(state != NULL);
ASSERT(DYNAMO_OPTION(ibl_table_in_tls));
/* We must hold at least the read lock here, else we could grab
* an inconsistent mask/lookuptable pair if another thread is in the middle
* of resizing the table (case 10405).
*/
ASSERT_TABLE_SYNCHRONIZED(table, READWRITE);
/* case 10296: for shared tables we must update the table
* before the mask, as the ibl lookup code accesses the mask first,
* and old mask + new table is ok since it will de-ref within the
* new table (we never shrink tables) and be a miss, whereas
* new mask + old table can de-ref beyond the end of the table,
* crashing or worse.
*/
state->table_space.table[table->branch_type].lookuptable =
table->table;
state->table_space.table[table->branch_type].hash_mask =
table->hash_mask;
}
#ifdef DEBUG
static const char *ibl_bb_table_type_names[IBL_BRANCH_TYPE_END] =
{"ret_bb", "indcall_bb", "indjmp_bb"};
static const char *ibl_trace_table_type_names[IBL_BRANCH_TYPE_END] =
{"ret_trace", "indcall_trace", "indjmp_trace"};
#endif
#ifdef DEBUG
static inline void
dump_lookuptable_tls(dcontext_t *dcontext)
{
/* use dcontext->local_state, rather than get_local_state(), to support
* being called from other threads!
*/
if (DYNAMO_OPTION(ibl_table_in_tls)) {
local_state_extended_t *state =
(local_state_extended_t *) dcontext->local_state;
ibl_branch_type_t branch_type;
ASSERT(state != NULL);
for (branch_type = IBL_BRANCH_TYPE_START;
branch_type < IBL_BRANCH_TYPE_END; branch_type++) {
LOG(THREAD, LOG_FRAGMENT, 1,
"\t Table %s, table "PFX", mask "PFX"\n",
!SHARED_BB_ONLY_IB_TARGETS() ?
ibl_trace_table_type_names[branch_type] :
ibl_bb_table_type_names[branch_type],
state->table_space.table[branch_type].lookuptable,
state->table_space.table[branch_type].hash_mask);
}
}
}
#endif
/*******************************************************************************
* IBL HASHTABLE INSTANTIATION
*/
#define FRAGENTRY_FROM_FRAGMENT(f) { (f)->tag, (f)->start_pc }
/* macros w/ name and types are duplicated in fragment.h -- keep in sync */
#define NAME_KEY ibl
#define ENTRY_TYPE fragment_entry_t
/* not defining HASHTABLE_USE_LOOKUPTABLE */
/* compiler won't let me use null_fragment.tag here */
static const fragment_entry_t fe_empty = { NULL_TAG, HASHLOOKUP_NULL_START_PC };
static const fragment_entry_t fe_sentinel = { NULL_TAG, HASHLOOKUP_SENTINEL_START_PC };
#define ENTRY_TAG(fe) ((ptr_uint_t)(fe).tag_fragment)
#define ENTRY_EMPTY (fe_empty)
#define ENTRY_SENTINEL (fe_sentinel)
#define IBL_ENTRY_IS_EMPTY(fe) \
((fe).tag_fragment == fe_empty.tag_fragment && \
(fe).start_pc_fragment == fe_empty.start_pc_fragment)
#define IBL_ENTRY_IS_INVALID(fe) ((fe).tag_fragment == FAKE_TAG)
#define IBL_ENTRY_IS_SENTINEL(fe) \
((fe).tag_fragment == fe_sentinel.tag_fragment && \
(fe).start_pc_fragment == fe_sentinel.start_pc_fragment)
#define ENTRY_IS_EMPTY(fe) IBL_ENTRY_IS_EMPTY(fe)
#define ENTRY_IS_SENTINEL(fe) IBL_ENTRY_IS_SENTINEL(fe)
#define ENTRY_IS_INVALID(fe) IBL_ENTRY_IS_INVALID(fe)
#define IBL_ENTRIES_ARE_EQUAL(fe1,fe2) ((fe1).tag_fragment == (fe2).tag_fragment)
#define ENTRIES_ARE_EQUAL(table,fe1,fe2) IBL_ENTRIES_ARE_EQUAL(fe1,fe2)
#define HASHTABLE_WHICH_HEAP(flags) FRAGTABLE_WHICH_HEAP(flags)
#define HTLOCK_RANK table_rwlock
#define HASHTABLE_ENTRY_STATS 1
#include "hashtablex.h"
/* all defines are undef-ed at end of hashtablex.h */
/* required routines for hashtable interface that we don't need for this instance */
static void
hashtable_ibl_free_entry(dcontext_t *dcontext, ibl_table_t *table,
fragment_entry_t entry)
{
/* nothing to do, data is inlined */
}
/*******************************************************************************
* FRAGMENT HASHTABLE INSTANTIATION
*/
/* macros w/ name and types are duplicated in fragment.h -- keep in sync */
#define NAME_KEY fragment
#define ENTRY_TYPE fragment_t *
/* not defining HASHTABLE_USE_LOOKUPTABLE */
#define ENTRY_TAG(f) ((ptr_uint_t)(f)->tag)
/* instead of setting to 0, point at null_fragment */
#define ENTRY_EMPTY ((fragment_t *)&null_fragment)
#define ENTRY_SENTINEL ((fragment_t *)&sentinel_fragment)
#define ENTRY_IS_EMPTY(f) ((f) == (fragment_t *)&null_fragment)
#define ENTRY_IS_SENTINEL(f) ((f) == (fragment_t *)&sentinel_fragment)
#define ENTRY_IS_INVALID(f) ((f) == (fragment_t *)&unlinked_fragment)
#define ENTRIES_ARE_EQUAL(t,f,g) ((f) == (g))
#define HASHTABLE_WHICH_HEAP(flags) FRAGTABLE_WHICH_HEAP(flags)
#define HTLOCK_RANK table_rwlock
#include "hashtablex.h"
/* all defines are undef-ed at end of hashtablex.h */
static void
hashtable_fragment_resized_custom(dcontext_t *dcontext, fragment_table_t *table,
uint old_capacity, fragment_t **old_table,
fragment_t **old_table_unaligned,
uint old_ref_count, uint old_table_flags)
{
/* nothing */
}
static void
hashtable_fragment_init_internal_custom(dcontext_t *dcontext, fragment_table_t *table)
{
/* nothing */
}
#ifdef DEBUG
static void
hashtable_fragment_study_custom(dcontext_t *dcontext, fragment_table_t *table,
uint entries_inc/*amnt table->entries was pre-inced*/)
{
/* nothing */
}
#endif
/* callers should use either hashtable_ibl_preinit or hashtable_resize instead */
static void
hashtable_ibl_init_internal_custom(dcontext_t *dcontext, ibl_table_t *table)
{
ASSERT(null_fragment.tag == NULL_TAG);
ASSERT(null_fragment.start_pc == HASHLOOKUP_NULL_START_PC);
ASSERT(FAKE_TAG != NULL_TAG);
ASSERT(sentinel_fragment.tag == NULL_TAG);
ASSERT(sentinel_fragment.start_pc == HASHLOOKUP_SENTINEL_START_PC);
ASSERT(HASHLOOKUP_SENTINEL_START_PC != HASHLOOKUP_NULL_START_PC);
ASSERT(TEST(FRAG_TABLE_IBL_TARGETED, table->table_flags));
ASSERT(TEST(FRAG_TABLE_INCLUSIVE_HIERARCHY, table->table_flags));
/* every time we resize a table we reset the flush threshold,
* since it is cleared in place after one flush
*/
table->groom_factor_percent =
TEST(FRAG_TABLE_TRACE, table->table_flags) ?
DYNAMO_OPTION(trace_ibt_groom) : DYNAMO_OPTION(bb_ibt_groom);
table->max_capacity_bits =
TEST(FRAG_TABLE_TRACE, table->table_flags) ?
DYNAMO_OPTION(private_trace_ibl_targets_max) :
DYNAMO_OPTION(private_bb_ibl_targets_max);
#ifdef HASHTABLE_STATISTICS
if (INTERNAL_OPTION(hashtable_ibl_stats)) {
if (table->unprot_stats == NULL) {
/* first time, not a resize */
ALLOC_UNPROT_STATS(dcontext, table);
} /* else, keep original */
}
#endif /* HASHTABLE_STATISTICS */
if (SHARED_IB_TARGETS() &&
!TEST(FRAG_TABLE_SHARED, table->table_flags)) {
/* currently we don't support a mixture */
ASSERT(TEST(FRAG_TABLE_TARGET_SHARED, table->table_flags));
ASSERT(TEST(FRAG_TABLE_IBL_TARGETED, table->table_flags));
ASSERT(table->branch_type != IBL_NONE);
/* Only data for one set of tables is stored in TLS -- for the trace
* tables in the default config OR the BB tables in shared BBs
* only mode.
*/
if ((TEST(FRAG_TABLE_TRACE, table->table_flags) ||
SHARED_BB_ONLY_IB_TARGETS()) &&
DYNAMO_OPTION(ibl_table_in_tls))
update_lookuptable_tls(dcontext, table);
}
}
/* We need our own routines to init/free our added fields */
static void
hashtable_ibl_myinit(dcontext_t *dcontext, ibl_table_t *table, uint bits,
uint load_factor_percent, hash_function_t func,
uint hash_offset, ibl_branch_type_t branch_type,
bool use_lookup, uint table_flags _IF_DEBUG(const char *table_name))
{
uint flags = table_flags;
ASSERT(dcontext != GLOBAL_DCONTEXT || TEST(FRAG_TABLE_SHARED, flags));
/* flags shared by all ibl tables */
flags |= FRAG_TABLE_INCLUSIVE_HIERARCHY;
flags |= FRAG_TABLE_IBL_TARGETED;
flags |= HASHTABLE_ALIGN_TABLE;
/* use entry stats with all our ibl-targeted tables */
flags |= HASHTABLE_USE_ENTRY_STATS;
#ifdef HASHTABLE_STATISTICS
/* indicate this is first time, not a resize */
table->unprot_stats = NULL;
#endif
table->branch_type = branch_type;
hashtable_ibl_init(dcontext, table, bits, load_factor_percent,
func, hash_offset, flags _IF_DEBUG(table_name));
/* PR 305731: rather than having a start_pc of 0, which causes an
* app targeting 0 to crash at 0, we point at a handler that sends
* the app to an ibl miss via target_delete, which restores
* registers saved in the found path.
*/
if (dcontext != GLOBAL_DCONTEXT && hashlookup_null_target == NULL) {
ASSERT(!dynamo_initialized);
hashlookup_null_target = get_target_delete_entry_pc(dcontext, table);
#if !defined(X64) && defined(LINUX)
/* see comments in x86.asm: we patch to avoid text relocations */
byte *pc = (byte *) hashlookup_null_handler;
byte *page_start = (byte *) PAGE_START(pc);
byte *page_end = (byte *) ALIGN_FORWARD(pc + JMP_LONG_LENGTH, PAGE_SIZE);
make_writable(page_start, page_end - page_start);
insert_relative_target(pc + 1, hashlookup_null_target, NOT_HOT_PATCHABLE);
make_unwritable(page_start, page_end - page_start);
#endif
}
}
static void
hashtable_ibl_myfree(dcontext_t *dcontext, ibl_table_t *table)
{
#ifdef HASHTABLE_STATISTICS
if (INTERNAL_OPTION(hashtable_ibl_stats)) {
ASSERT(TEST(FRAG_TABLE_IBL_TARGETED, table->table_flags));
DEALLOC_UNPROT_STATS(dcontext, table);
}
#endif /* HASHTABLE_STATISTICS */
hashtable_ibl_free(dcontext, table);
}
static void
hashtable_fragment_free_entry(dcontext_t *dcontext, fragment_table_t *table,
fragment_t *f)
{
if (TEST(FRAG_TABLE_INCLUSIVE_HIERARCHY, table->table_flags)) {
ASSERT_NOT_REACHED(); /* case 7691 */
} else {
if (TEST(FRAG_IS_FUTURE, f->flags))
fragment_free_future(dcontext, (future_fragment_t *)f);
else
fragment_free(dcontext, f);
}
}
static inline bool
fragment_add_to_hashtable(dcontext_t *dcontext, fragment_t *e, fragment_table_t *table)
{
/* When using shared IBT tables w/trace building and BB2BB IBL, there is a
* race between adding a BB target to a table and having it marked by
* another thread as a trace head. The race exists because the two functions
* do not use a common lock.
* The race does NOT cause a correctness problem since a) the marking thread
* removes the trace head from the table and b) any subsequent add attempt
* is caught in add_ibl_target(). The table lock is used during add and
* remove operations and FRAG_IS_TRACE_HEAD is checked while holding
* the lock. So although a trace head may be present in a table temporarily --
* it's being marked while an add operation that has passed the frag flags
* check is in progress -- it will be subsequently removed by the marking
* thread.
* However, the existence of the race does mean that
* we cannot ASSERT(!(FRAG_IS_TRACE_HEAD,...)) at arbitrary spots along the
* add_ibl_target() path since such an assert could fire due to the race.
* What is likely a safe point to assert is when there is only a single
* thread in the process.
*/
DOCHECK(1, {
if (TEST(FRAG_TABLE_IBL_TARGETED, table->table_flags) &&
get_num_threads() == 1)
ASSERT(!TEST(FRAG_IS_TRACE_HEAD, e->flags));
});
return hashtable_fragment_add(dcontext, e, table);
}
/* updates all fragments in a given fragment table which may
* have IBL routine heads inlined in the indirect exit stubs
*
* FIXME: [perf] should add a filter of which branch types need updating if
* updating all is a noticeable performance hit.
*
* FIXME: [perf] Also it maybe better to traverse all fragments in an fcache
* unit instead of entries in a half-empty hashtable
*/
static void
update_indirect_exit_stubs_from_table(dcontext_t *dcontext,
fragment_table_t *ftable)
{
fragment_t *f;
linkstub_t *l;
uint i;
for (i = 0; i < ftable->capacity; i++) {
f = ftable->table[i];
if (!REAL_FRAGMENT(f))
continue;
for (l = FRAGMENT_EXIT_STUBS(f); l != NULL; l = LINKSTUB_NEXT_EXIT(l)) {
if (LINKSTUB_INDIRECT(l->flags)) {
/* FIXME: should add a filter of which branch types need updating */
update_indirect_exit_stub(dcontext, f, l);
LOG(THREAD, LOG_FRAGMENT, 5,
"\tIBL target table resizing: updating F%d\n", f->id);
STATS_INC(num_ibl_stub_resize_updates);
}
}
}
}
static void
safely_nullify_tables(dcontext_t *dcontext, ibl_table_t *new_table,
fragment_entry_t *table, uint capacity)
{
uint i;
cache_pc target_delete = get_target_delete_entry_pc(dcontext, new_table);
ASSERT(target_delete != NULL);
ASSERT_TABLE_SYNCHRONIZED(new_table, WRITE);
for (i = 0; i < capacity; i++) {
if (IBL_ENTRY_IS_SENTINEL(table[i])) {
ASSERT(i == capacity - 1);
continue;
}
/* We need these writes to be atomic, so check that they're aligned. */
ASSERT(ALIGNED(&table[i].tag_fragment, 4));
ASSERT(ALIGNED(&table[i].start_pc_fragment, 4));
/* We update the tag first so that so that a thread that's skipping
* along a chain will exit ASAP. Breaking the chain is ok since we're
* nullifying the entire table.
*/
table[i].tag_fragment = fe_empty.tag_fragment;
/* We set the payload to target_delete to induce a cache exit.
*
* The target_delete path leads to a loss of information -- we can't
* tell what the src fragment was (the one that transitioned to the
* IBL code) and this in principle could weaken our RCT checks (see case
* 5085). In practical terms, RCT checks are unaffected since they
* are not employed on in-cache transitions such as an IBL hit.
* (All transitions to target_delete are a race along the hit path.)
* If we still want to preserve the src info, we can leave the payload
* as-is, possibly pointing to a cache address. The effect is that
* any thread accessing the old table on the IBL hit path will not exit
* the cache as early. (We should leave the fragment_t* value in the
* table untouched also so that the fragment_table_t is in a consistent
* state.)
*/
table[i].start_pc_fragment = target_delete;
}
STATS_INC(num_shared_ibt_table_flushes);
}
/* Add an item to the dead tables list */
static inline void
add_to_dead_table_list(dcontext_t *alloc_dc, ibl_table_t *ftable,
uint old_capacity,
fragment_entry_t *old_table_unaligned, uint old_ref_count,
uint old_table_flags)
{
dead_fragment_table_t *item =(dead_fragment_table_t*)
heap_alloc(GLOBAL_DCONTEXT, sizeof(dead_fragment_table_t)
HEAPACCT(ACCT_IBLTABLE));
LOG(GLOBAL, LOG_FRAGMENT, 2,
"add_to_dead_table_list %s "PFX" capacity %d\n",
ftable->name, old_table_unaligned, old_capacity);
ASSERT(old_ref_count >= 1); /* someone other the caller must be holding a reference */
/* write lock must be held so that ref_count is copied accurately */
ASSERT_TABLE_SYNCHRONIZED(ftable, WRITE);
item->capacity = old_capacity;
item->table_unaligned = old_table_unaligned;
item->table_flags = old_table_flags;
item->ref_count = old_ref_count;
item->next = NULL;
/* Add to the end of list. We use a FIFO because generally we'll be
* decrementing ref-counts for older tables before we do so for
* younger tables. A FIFO will yield faster searches than, say, a
* stack.
*/
mutex_lock(&dead_tables_lock);
if (dead_lists->dead_tables == NULL) {
ASSERT(dead_lists->dead_tables_tail == NULL);
dead_lists->dead_tables = item;
}
else {
ASSERT(dead_lists->dead_tables_tail != NULL);
ASSERT(dead_lists->dead_tables_tail->next == NULL);
dead_lists->dead_tables_tail->next = item;
}
dead_lists->dead_tables_tail = item;
mutex_unlock(&dead_tables_lock);
STATS_ADD_PEAK(num_dead_shared_ibt_tables, 1);
STATS_INC(num_total_dead_shared_ibt_tables);
}
/* forward decl */
static inline void
update_private_ptr_to_shared_ibt_table(dcontext_t *dcontext,
ibl_branch_type_t branch_type, bool trace,
bool adjust_old_ref_count, bool lock_table);
static void
hashtable_ibl_resized_custom(dcontext_t *dcontext, ibl_table_t *table,
uint old_capacity, fragment_entry_t *old_table,
fragment_entry_t *old_table_unaligned,
uint old_ref_count, uint old_table_flags)
{
dcontext_t *alloc_dc = FRAGMENT_TABLE_ALLOC_DC(dcontext, table->table_flags);
per_thread_t *pt = GET_PT(dcontext);
bool shared_ibt_table =
TESTALL(FRAG_TABLE_TARGET_SHARED | FRAG_TABLE_SHARED, table->table_flags);
ASSERT(TEST(FRAG_TABLE_IBL_TARGETED, table->table_flags));
/* If we change an ibl-targeted table, must patch up every
* inlined indirect exit stub that targets it.
* For our per-type ibl tables however we don't bother updating
* fragments _targeted_ by the resized table, instead we need to
* update all fragments that may be a source of an inlined IBL.
*/
/* private inlined IBL heads targeting this table need to be updated */
if (DYNAMO_OPTION(inline_trace_ibl) && PRIVATE_TRACES_ENABLED()) {
/* We'll get here on a trace table resize, while we
* need to patch only when the trace_ibt tables are resized.
*/
/* We assume we don't inline IBL lookup targeting tables of basic blocks
* and so shouldn't need to do this for now. */
ASSERT(dcontext != GLOBAL_DCONTEXT && pt != NULL); /* private traces */
if (TESTALL(FRAG_TABLE_INCLUSIVE_HIERARCHY | FRAG_TABLE_TRACE,
table->table_flags)) {
/* need to update all traces that could be targeting the
* currently resized table */
LOG(THREAD, LOG_FRAGMENT, 2,
"\tIBL target table resizing: updating all private trace fragments\n");
update_indirect_exit_stubs_from_table(dcontext, &pt->trace);
}
}
/* if we change the trace table (or an IBL target trace
* table), must patch up every inlined indirect exit stub
* in all bb fragments in case the inlined target is the
* resized table
*/
if (DYNAMO_OPTION(inline_bb_ibl)) {
LOG(THREAD, LOG_FRAGMENT, 3,
"\tIBL target table resizing: updating bb fragments\n");
update_indirect_exit_stubs_from_table(dcontext, &pt->bb);
}
/* don't need to update any inlined lookups in shared fragments */
if (shared_ibt_table) {
if (old_ref_count > 0) {
/* The old table should be nullified ASAP. Since threads update
* their table pointers on-demand only when they exit the cache
* after a failed IBL lookup, they could have IBL targets for
* stale entries. This would likely occur only when there's an
* app race but in the future could occur due to cache
* management.
*/
safely_nullify_tables(dcontext, table, old_table, old_capacity);
add_to_dead_table_list(alloc_dc, table, old_capacity,
old_table_unaligned,
old_ref_count, table->table_flags);
}
/* Update the resizing thread's private ptr. */
update_private_ptr_to_shared_ibt_table(dcontext, table->branch_type,
TEST(FRAG_TABLE_TRACE,
table->table_flags),
false, /* no adjust
* old ref-count */
false /* already hold lock */);
ASSERT(table->ref_count == 1);
}
/* CHECK: is it safe to update the table without holding the lock? */
/* Using the table flags to drive the update of generated code may
* err on the side of caution, but it's the best way to guarantee
* that all of the necessary code is updated.
* We may perform extra unnecessary updates when a table that's
* accessed off of the dcontext/per_thread_t is grown, but that doesn't
* cause correctness problems and likely doesn't hurt peformance.
*/
STATS_INC(num_ibt_table_resizes);
update_generated_hashtable_access(dcontext);
}
#ifdef DEBUG
static void
hashtable_ibl_study_custom(dcontext_t *dcontext, ibl_table_t *table,
uint entries_inc/*amnt table->entries was pre-inced*/)
{
# ifdef HASHTABLE_STATISTICS
/* For trace table(s) only, use stats from emitted ibl routines */
if (TEST(FRAG_TABLE_IBL_TARGETED, table->table_flags) &&
INTERNAL_OPTION(hashtable_ibl_stats)) {
per_thread_t *pt = GET_PT(dcontext);
ibl_branch_type_t branch_type;
for (branch_type = IBL_BRANCH_TYPE_START;
branch_type < IBL_BRANCH_TYPE_END; branch_type++) {
/* This is convoluted since given a table we have to
* recover its branch type.
* FIXME: should simplify these assumptions one day
*/
/* Current table should be targeted only by one of the IBL routines */
if (!((!DYNAMO_OPTION(disable_traces) &&
table == &pt->trace_ibt[branch_type]) ||
(DYNAMO_OPTION(bb_ibl_targets) &&
table == &pt->bb_ibt[branch_type])))
continue;
/* stats for lookup routines from bb's and trace's targeting the current table */
print_hashtable_stats(dcontext, entries_inc == 0 ? "Total" : "Current",
table->name,
"trace ibl ", get_branch_type_name(branch_type),
&table->UNPROT_STAT(trace_ibl_stats[branch_type]));
print_hashtable_stats(dcontext, entries_inc == 0 ? "Total" : "Current",
table->name,
"bb ibl ",
get_branch_type_name(branch_type),
&table->UNPROT_STAT(bb_ibl_stats[branch_type]));
}
}
# endif /* HASHTABLE_STATISTICS */
}
#endif /* DEBUG */
#if defined(DEBUG) || defined(CLIENT_INTERFACE)
/* filter specifies flags for fragments which are OK to be freed */
/* NOTE - if this routine is ever used for non DEBUG purposes be aware that
* because of case 7697 we don't unlink when we free the hashtable elements.
* As such, if we aren't also freeing all fragments that could possibly link
* to fragments in this table at the same time (synchronously) we'll have
* problems (for ex. a trace only reset would need to unlink incoming, or
* allowing private->shared linking would need to ulink outgoing).
*/
static void
hashtable_fragment_reset(dcontext_t *dcontext, fragment_table_t *table)
{
int i;
fragment_t *f;
/* case 7691: we now use separate ibl table types */
ASSERT(!TEST(FRAG_TABLE_INCLUSIVE_HIERARCHY, table->table_flags));
LOG(THREAD, LOG_FRAGMENT, 2, "hashtable_fragment_reset\n");
DOLOG(1, LOG_FRAGMENT|LOG_STATS, {
hashtable_fragment_load_statistics(dcontext, table);
});
if (TEST(FRAG_TABLE_SHARED, table->table_flags) &&
TEST(FRAG_TABLE_IBL_TARGETED, table->table_flags)) {
DOLOG(4, LOG_FRAGMENT, {
hashtable_fragment_dump_table(dcontext, table);
});
}
DODEBUG({
hashtable_fragment_study(dcontext, table, 0/*table consistent*/);
/* ensure write lock is held if the table is shared, unless exiting
* or resetting (N.B.: if change reset model to not suspend all in-DR
* threads, will have to change this and handle rank order issues)
*/
if (!dynamo_exited && !dynamo_resetting)
ASSERT_TABLE_SYNCHRONIZED(table, WRITE);
});
/* Go in reverse order (for efficiency) since using
* hashtable_fragment_remove_helper to keep all reachable, which is required
* for dynamo_resetting where we unlink fragments here and need to be able to
* perform lookups.
*/
i = table->capacity - 1 - 1 /* sentinel */;
while (i >= 0) {
f = table->table[i];
if (f == &null_fragment) {
i--;
} else { /* i stays put */
/* The shared BB table is reset at process reset or shutdown, so
* trace_abort() has already been called by (or for) every thread.
* If shared traces is true, by this point none of the shared BBs
* should have FRAG_TRACE_BUILDING set since the flag is cleared
* by trace_abort(). Of course, the flag shouldn't be present
* if shared traces is false so we don't need to conditionalize
* the assert.
*/
ASSERT(!TEST(FRAG_TRACE_BUILDING, f->flags));
hashtable_fragment_remove_helper(table, i, &table->table[i]);
if (!REAL_FRAGMENT(f))
continue;
/* make sure no other hashtable has shared fragments in it
* this routine is called on shared table, but only after dynamo_exited
* the per-thread IBL tables contain pointers to shared fragments
* and are OK
*/
ASSERT(dynamo_exited || !TEST(FRAG_SHARED, f->flags) || dynamo_resetting);
# if defined(SIDELINE) && defined(PROFILE_LINKCOUNT)
if ((f->flags & FRAG_DO_NOT_SIDELINE) != 0) {
/* print out total count of exit counters */
LOG(THREAD, LOG_SIDELINE, 2, "\tSidelined trace F%d total times executed: "
LINKCOUNT_FORMAT_STRING "\n", f->id, get_total_linkcount(f));
}
# endif
if (TEST(FRAG_IS_FUTURE, f->flags)) {
DODEBUG({ ((future_fragment_t *)f)->incoming_stubs = NULL; });
fragment_free_future(dcontext, (future_fragment_t *)f);
} else {
DOSTATS({
if (dynamo_resetting)
STATS_INC(num_fragments_deleted_reset);
else
STATS_INC(num_fragments_deleted_exit);
});
/* Xref 7697 - unlinking the fragments here can screw up the
* future table as we are walking in hash order, so we don't
* unlink. See note at top of routine for issues with not
* unlinking here if this code is ever used in non debug
* builds. */
fragment_delete(dcontext, f,
FRAGDEL_NO_HTABLE | FRAGDEL_NO_UNLINK |
FRAGDEL_NEED_CHLINK_LOCK |
(dynamo_resetting ? 0 : FRAGDEL_NO_OUTPUT));
}
}
}
table->entries = 0;
table->unlinked_entries = 0;
}
#endif /* DEBUG || CLIENT_INTERFACE */
/*
*******************************************************************************/
#if defined(RETURN_AFTER_CALL) || defined (RCT_IND_BRANCH)
/*******************************************************************************
* APP_PC HASHTABLE INSTANTIATION
*/
/* FIXME: RCT tables no longer use future_fragment_t and can be moved out of fragment.c */
/* The ENTRY_* defines are undef-ed at end of hashtablex.h so we make our own.
* Would be nice to re-use ENTRY_IS_EMPTY, etc., though w/ multiple htables
* in same file can't realistically get away w/o custom defines like these:
*/
#define APP_PC_EMPTY (NULL)
/* assume 1 is always invalid address */
#define APP_PC_SENTINEL ((app_pc)PTR_UINT_1)
#define APP_PC_ENTRY_IS_EMPTY(pc) ((pc) == APP_PC_EMPTY)
#define APP_PC_ENTRY_IS_SENTINEL(pc) ((pc) == APP_PC_SENTINEL)
#define APP_PC_ENTRY_IS_REAL(pc) (!APP_PC_ENTRY_IS_EMPTY(pc) && \
!APP_PC_ENTRY_IS_SENTINEL(pc))
/* 2 macros w/ name and types are duplicated in fragment.h -- keep in sync */
#define NAME_KEY app_pc
#define ENTRY_TYPE app_pc
/* not defining HASHTABLE_USE_LOOKUPTABLE */
#define ENTRY_TAG(f) ((ptr_uint_t)(f))
#define ENTRY_EMPTY APP_PC_EMPTY
#define ENTRY_SENTINEL APP_PC_SENTINEL
#define ENTRY_IS_EMPTY(f) APP_PC_ENTRY_IS_EMPTY(f)
#define ENTRY_IS_SENTINEL(f) APP_PC_ENTRY_IS_SENTINEL(f)
#define ENTRY_IS_INVALID(f) (false) /* no invalid entries */
#define ENTRIES_ARE_EQUAL(t,f,g) ((f) == (g))
#define HASHTABLE_WHICH_HEAP(flags) (ACCT_AFTER_CALL)
#define HTLOCK_RANK app_pc_table_rwlock
#define HASHTABLE_SUPPORT_PERSISTENCE 1
#include "hashtablex.h"
/* all defines are undef-ed at end of hashtablex.h */
/* required routines for hashtable interface that we don't need for this instance */
static void
hashtable_app_pc_init_internal_custom(dcontext_t *dcontext, app_pc_table_t *htable)
{ /* nothing */
}
static void
hashtable_app_pc_resized_custom(dcontext_t *dcontext, app_pc_table_t *htable,
uint old_capacity, app_pc *old_table,
app_pc *old_table_unaligned,
uint old_ref_count, uint old_table_flags)
{ /* nothing */
}
# ifdef DEBUG
static void
hashtable_app_pc_study_custom(dcontext_t *dcontext, app_pc_table_t *htable,
uint entries_inc/*amnt table->entries was pre-inced*/)
{ /* nothing */
}
# endif
static void
hashtable_app_pc_free_entry(dcontext_t *dcontext, app_pc_table_t *htable,
app_pc entry)
{
/* nothing to do, data is inlined */
}
#endif /* defined(RETURN_AFTER_CALL) || defined (RCT_IND_BRANCH) */
/*******************************************************************************/
bool
fragment_initialized(dcontext_t *dcontext)
{
return (dcontext != GLOBAL_DCONTEXT && dcontext->fragment_field != NULL);
}
/* thread-shared initialization that should be repeated after a reset */
void
fragment_reset_init(void)
{
/* case 7966: don't initialize at all for hotp_only & thin_client */
if (RUNNING_WITHOUT_CODE_CACHE())
return;
mutex_lock(&shared_cache_flush_lock);
/* ASSUMPTION: a reset frees all deletions that use flushtimes, so we can
* reset the global flushtime here
*/
flushtime_global = 0;
mutex_unlock(&shared_cache_flush_lock);
if (SHARED_FRAGMENTS_ENABLED()) {
if (DYNAMO_OPTION(shared_bbs)) {
hashtable_fragment_init(GLOBAL_DCONTEXT, shared_bb,
INIT_HTABLE_SIZE_SHARED_BB,
INTERNAL_OPTION(shared_bb_load),
(hash_function_t)INTERNAL_OPTION(alt_hash_func),
0 /* hash_mask_offset */,
FRAG_TABLE_SHARED | FRAG_TABLE_TARGET_SHARED
_IF_DEBUG("shared_bb"));
}
if (DYNAMO_OPTION(shared_traces)) {
hashtable_fragment_init(GLOBAL_DCONTEXT, shared_trace,
INIT_HTABLE_SIZE_SHARED_TRACE,
INTERNAL_OPTION(shared_trace_load),
(hash_function_t)INTERNAL_OPTION(alt_hash_func),
0 /* hash_mask_offset */,
FRAG_TABLE_SHARED | FRAG_TABLE_TARGET_SHARED
_IF_DEBUG("shared_trace"));
}
/* init routine will work for future_fragment_t* same as for fragment_t* */
hashtable_fragment_init(GLOBAL_DCONTEXT, shared_future,
INIT_HTABLE_SIZE_SHARED_FUTURE,
INTERNAL_OPTION(shared_future_load),
(hash_function_t)INTERNAL_OPTION(alt_hash_func),
0 /* hash_mask_offset */,
FRAG_TABLE_SHARED | FRAG_TABLE_TARGET_SHARED
_IF_DEBUG("shared_future"));
}
if (SHARED_IBT_TABLES_ENABLED()) {
ibl_branch_type_t branch_type;
ASSERT(USE_SHARED_PT());
for (branch_type = IBL_BRANCH_TYPE_START;
branch_type < IBL_BRANCH_TYPE_END; branch_type++) {
if (DYNAMO_OPTION(shared_trace_ibt_tables)) {
hashtable_ibl_myinit(GLOBAL_DCONTEXT, &shared_pt->trace_ibt[branch_type],
DYNAMO_OPTION(shared_ibt_table_trace_init),
DYNAMO_OPTION(shared_ibt_table_trace_load),
HASH_FUNCTION_NONE,
HASHTABLE_IBL_OFFSET(branch_type),
branch_type,
false, /* no lookup table */
FRAG_TABLE_SHARED |
FRAG_TABLE_TARGET_SHARED |
FRAG_TABLE_TRACE
_IF_DEBUG(ibl_trace_table_type_names[branch_type]));
#ifdef HASHTABLE_STATISTICS
if (INTERNAL_OPTION(hashtable_ibl_stats)) {
CHECK_UNPROT_STATS(&shared_pt->trace_ibt[branch_type]);
/* for compatibility using an entry in the per-branch type stats */
INIT_HASHTABLE_STATS(shared_pt->trace_ibt[branch_type].
UNPROT_STAT(trace_ibl_stats[branch_type]));
} else {
shared_pt->trace_ibt[branch_type].unprot_stats = NULL;
}
#endif /* HASHTABLE_STATISTICS */
}
if (DYNAMO_OPTION(shared_bb_ibt_tables)) {
hashtable_ibl_myinit(GLOBAL_DCONTEXT, &shared_pt->bb_ibt[branch_type],
DYNAMO_OPTION(shared_ibt_table_bb_init),
DYNAMO_OPTION(shared_ibt_table_bb_load),
HASH_FUNCTION_NONE,
HASHTABLE_IBL_OFFSET(branch_type),
branch_type,
false, /* no lookup table */
FRAG_TABLE_SHARED |
FRAG_TABLE_TARGET_SHARED
_IF_DEBUG(ibl_bb_table_type_names[branch_type]));
/* mark as inclusive table for bb's - we in fact currently
* keep only frags that are not FRAG_IS_TRACE_HEAD */
#ifdef HASHTABLE_STATISTICS
if (INTERNAL_OPTION(hashtable_ibl_stats)) {
/* for compatibility using an entry in the per-branch type stats */
CHECK_UNPROT_STATS(&shared_pt->bb_ibt[branch_type]);
/* FIXME: we don't expect trace_ibl_stats yet */
INIT_HASHTABLE_STATS(shared_pt->bb_ibt[branch_type].
UNPROT_STAT(bb_ibl_stats[branch_type]));
} else {
shared_pt->bb_ibt[branch_type].unprot_stats = NULL;
}
#endif /* HASHTABLE_STATISTICS */
}
}
}
#ifdef SHARING_STUDY
if (INTERNAL_OPTION(fragment_sharing_study)) {
uint size = HASHTABLE_SIZE(SHARED_HASH_BITS) * sizeof(shared_entry_t*);
shared_blocks = (shared_entry_t**) global_heap_alloc(size HEAPACCT(ACCT_OTHER));
memset(shared_blocks, 0, size);
shared_traces = (shared_entry_t**) global_heap_alloc(size HEAPACCT(ACCT_OTHER));
memset(shared_traces, 0, size);
}
#endif
}
/* thread-shared initialization */
void
fragment_init()
{
/* case 7966: don't initialize at all for hotp_only & thin_client
* FIXME: could set initial sizes to 0 for all configurations, instead
*/
if (RUNNING_WITHOUT_CODE_CACHE())
return;
/* make sure fields are at same place */
ASSERT(offsetof(fragment_t, flags) == offsetof(future_fragment_t, flags));
ASSERT(offsetof(fragment_t, tag) == offsetof(future_fragment_t, tag));
/* ensure we can read this w/o a lock: no cache line crossing, please */
ASSERT(ALIGNED(&flushtime_global, 4));
if (SHARED_FRAGMENTS_ENABLED()) {
/* tables are persistent across resets, only on heap for selfprot (case 7957) */
if (DYNAMO_OPTION(shared_bbs)) {
shared_bb = HEAP_TYPE_ALLOC(GLOBAL_DCONTEXT, fragment_table_t,
ACCT_FRAG_TABLE, PROTECTED);
}
if (DYNAMO_OPTION(shared_traces)) {
shared_trace = HEAP_TYPE_ALLOC(GLOBAL_DCONTEXT, fragment_table_t,
ACCT_FRAG_TABLE, PROTECTED);
}
shared_future = HEAP_TYPE_ALLOC(GLOBAL_DCONTEXT, fragment_table_t,
ACCT_FRAG_TABLE, PROTECTED);
}
if (USE_SHARED_PT())
shared_pt = HEAP_TYPE_ALLOC(GLOBAL_DCONTEXT, per_thread_t, ACCT_OTHER, PROTECTED);
if (SHARED_IBT_TABLES_ENABLED()) {
dead_lists =
HEAP_TYPE_ALLOC(GLOBAL_DCONTEXT, dead_table_lists_t, ACCT_OTHER, PROTECTED);
memset(dead_lists, 0, sizeof(*dead_lists));
}
fragment_reset_init();
#if defined(INTERNAL) || defined(CLIENT_INTERFACE)
if (TRACEDUMP_ENABLED() && DYNAMO_OPTION(shared_traces)) {
ASSERT(USE_SHARED_PT());
shared_pt->tracefile = open_log_file("traces-shared", NULL, 0);
ASSERT(shared_pt->tracefile != INVALID_FILE);
init_trace_file(shared_pt);
}
#endif
}
/* Free all thread-shared state not critical to forward progress;
* fragment_reset_init() will be called before continuing.
*/
void
fragment_reset_free(void)
{
/* case 7966: don't initialize at all for hotp_only & thin_client */
if (RUNNING_WITHOUT_CODE_CACHE())
return;
/* We must study the ibl tables before the trace/bb tables so that we're
* not looking at freed entries
*/
if (SHARED_IBT_TABLES_ENABLED()) {
ibl_branch_type_t branch_type;
dead_fragment_table_t *current, *next;
DEBUG_DECLARE(int table_count = 0;)
DEBUG_DECLARE(stats_int_t dead_tables = GLOBAL_STAT(num_dead_shared_ibt_tables);)
for (branch_type = IBL_BRANCH_TYPE_START;
branch_type < IBL_BRANCH_TYPE_END; branch_type++) {
if (DYNAMO_OPTION(shared_trace_ibt_tables)) {
DOLOG(1, LOG_FRAGMENT|LOG_STATS, {
hashtable_ibl_load_statistics(GLOBAL_DCONTEXT,
&shared_pt->trace_ibt[branch_type]);
});
hashtable_ibl_myfree(GLOBAL_DCONTEXT,
&shared_pt->trace_ibt[branch_type]);
}
if (DYNAMO_OPTION(shared_bb_ibt_tables)) {
DOLOG(1, LOG_FRAGMENT|LOG_STATS, {
hashtable_ibl_load_statistics(GLOBAL_DCONTEXT,
&shared_pt->bb_ibt[branch_type]);
});
hashtable_ibl_myfree(GLOBAL_DCONTEXT,
&shared_pt->bb_ibt[branch_type]);
}
}
/* Delete dead tables. */
/* grab lock for consistency, although we expect a single thread */
mutex_lock(&dead_tables_lock);
current = dead_lists->dead_tables;
while (current != NULL) {
DODEBUG({table_count++;});
next = current->next;
LOG(GLOBAL, LOG_FRAGMENT, 2,
"fragment_reset_free: dead table "PFX" cap %d, freeing\n",
current->table_unaligned, current->capacity);
hashtable_ibl_free_table(GLOBAL_DCONTEXT, current->table_unaligned,
current->table_flags, current->capacity);
heap_free(GLOBAL_DCONTEXT, current, sizeof(dead_fragment_table_t)
HEAPACCT(ACCT_IBLTABLE));
STATS_DEC(num_dead_shared_ibt_tables);
STATS_INC(num_dead_shared_ibt_tables_freed);
current = next;
DODEBUG({
if (dynamo_exited)
STATS_INC(num_dead_shared_ibt_tables_freed_at_exit);
});
}
dead_lists->dead_tables = dead_lists->dead_tables_tail = NULL;
ASSERT(table_count == dead_tables);
mutex_unlock(&dead_tables_lock);
}
/* FIXME: Take in a flag "permanent" that controls whether exiting or
* resetting. If resetting only, do not free unprot stats and entry stats
* (they're already in persistent heap, but we explicitly free them).
* This will be easy w/ unprot but will take work for entry stats
* since they resize themselves.
* Or, move them both to a new unprot and nonpersistent heap so we can
* actually free the memory back to the os, if we don't care to keep
* the stats across the reset.
*/
/* N.B.: to avoid rank order issues w/ shared_vm_areas lock being acquired
* after table_rwlock we do NOT grab the write lock before calling
* reset on the shared tables! We assume that reset involves suspending
* all other threads in DR and there will be no races. If the reset model
* changes, the lock order will have to be addressed.
*/
if (SHARED_FRAGMENTS_ENABLED()) {
/* clean up pending delayed deletion, if any */
vm_area_check_shared_pending(GLOBAL_DCONTEXT/*== safe to free all*/, NULL);
if (DYNAMO_OPTION(coarse_units)) {
/* We need to free coarse units earlier than vm_areas_exit() so we
* call it here. Must call before we free fine fragments so coarse
* can clean up incoming pointers.
*/
vm_area_coarse_units_reset_free();
}
#if defined(DEBUG) || defined(CLIENT_INTERFACE)
/* We need for CLIENT_INTERFACE to get fragment deleted events. */
# if !defined(DEBUG) && defined(CLIENT_INTERFACE)
if (dr_fragment_deleted_hook_exists()) {
# endif
if (DYNAMO_OPTION(shared_bbs))
hashtable_fragment_reset(GLOBAL_DCONTEXT, shared_bb);
if (DYNAMO_OPTION(shared_traces))
hashtable_fragment_reset(GLOBAL_DCONTEXT, shared_trace);
DODEBUG({hashtable_fragment_reset(GLOBAL_DCONTEXT, shared_future);});
# if !defined(DEBUG) && defined(CLIENT_INTERFACE)
}
# endif
#endif
if (DYNAMO_OPTION(shared_bbs))
hashtable_fragment_free(GLOBAL_DCONTEXT, shared_bb);
if (DYNAMO_OPTION(shared_traces))
hashtable_fragment_free(GLOBAL_DCONTEXT, shared_trace);
hashtable_fragment_free(GLOBAL_DCONTEXT, shared_future);
/* Do NOT free RAC table as its state cannot be rebuilt.
* We also do not free other RCT tables to avoid the time to rebuild them.
*/
}
#ifdef SHARING_STUDY
if (INTERNAL_OPTION(fragment_sharing_study)) {
print_shared_stats();
reset_shared_block_table(shared_blocks, &shared_blocks_lock);
reset_shared_block_table(shared_traces, &shared_traces_lock);
}
#endif
}
/* free all state */
void
fragment_exit()
{
/* case 7966: don't initialize at all for hotp_only & thin_client
* FIXME: could set initial sizes to 0 for all configurations, instead
*/
if (RUNNING_WITHOUT_CODE_CACHE())
goto cleanup;
#if defined(INTERNAL) || defined(CLIENT_INTERFACE)
if (TRACEDUMP_ENABLED() && DYNAMO_OPTION(shared_traces)) {
/* write out all traces prior to deleting any, so links print nicely */
uint i;
fragment_t *f;
/* change_linking_lock is required for output_trace(), though there
* won't be any races at this point of exiting.
*/
acquire_recursive_lock(&change_linking_lock);
TABLE_RWLOCK(shared_trace, read, lock);
for (i = 0; i < shared_trace->capacity; i++) {
f = shared_trace->table[i];
if (!REAL_FRAGMENT(f))
continue;
if (SHOULD_OUTPUT_FRAGMENT(f->flags))
output_trace(GLOBAL_DCONTEXT, shared_pt, f, -1);
}
TABLE_RWLOCK(shared_trace, read, unlock);
release_recursive_lock(&change_linking_lock);
exit_trace_file(shared_pt);
}
#endif
#ifdef FRAGMENT_SIZES_STUDY
DOLOG(1, (LOG_FRAGMENT|LOG_STATS), {
print_size_results();
});
#endif
fragment_reset_free();
#ifdef RETURN_AFTER_CALL
if (dynamo_options.ret_after_call && rac_non_module_table.live_table != NULL) {
DODEBUG({
DOLOG(1, LOG_FRAGMENT|LOG_STATS, {
hashtable_app_pc_load_statistics(GLOBAL_DCONTEXT,
rac_non_module_table.live_table);
});
hashtable_app_pc_study(GLOBAL_DCONTEXT, rac_non_module_table.live_table,
0/*table consistent*/);
});
hashtable_app_pc_free(GLOBAL_DCONTEXT, rac_non_module_table.live_table);
HEAP_TYPE_FREE(GLOBAL_DCONTEXT, rac_non_module_table.live_table,
app_pc_table_t, ACCT_AFTER_CALL, PROTECTED);
rac_non_module_table.live_table = NULL;
}
ASSERT(rac_non_module_table.persisted_table == NULL);
DELETE_LOCK(after_call_lock);
#endif
#if defined(RCT_IND_BRANCH) && defined(UNIX)
/* we do not free these tables in fragment_reset_free() b/c we
* would just have to build them all back up again in order to
* continue execution
*/
if ((TEST(OPTION_ENABLED, DYNAMO_OPTION(rct_ind_call)) ||
TEST(OPTION_ENABLED, DYNAMO_OPTION(rct_ind_jump))) &&
rct_global_table.live_table != NULL) {
DODEBUG({
DOLOG(1, LOG_FRAGMENT|LOG_STATS, {
hashtable_app_pc_load_statistics(GLOBAL_DCONTEXT,
rct_global_table.live_table);
});
hashtable_app_pc_study(GLOBAL_DCONTEXT, rct_global_table.live_table,
0/*table consistent*/);
});
hashtable_app_pc_free(GLOBAL_DCONTEXT, rct_global_table.live_table);
HEAP_TYPE_FREE(GLOBAL_DCONTEXT, rct_global_table.live_table, app_pc_table_t,
ACCT_AFTER_CALL, PROTECTED);
rct_global_table.live_table = NULL;
} else
ASSERT(rct_global_table.live_table == NULL);
ASSERT(rct_global_table.persisted_table == NULL);
#endif /* RCT_IND_BRANCH */
if (SHARED_FRAGMENTS_ENABLED()) {
/* tables are persistent across resets, only on heap for selfprot (case 7957) */
if (DYNAMO_OPTION(shared_bbs)) {
HEAP_TYPE_FREE(GLOBAL_DCONTEXT, shared_bb, fragment_table_t,
ACCT_FRAG_TABLE, PROTECTED);
shared_bb = NULL;
} else
ASSERT(shared_bb == NULL);
if (DYNAMO_OPTION(shared_traces)) {
HEAP_TYPE_FREE(GLOBAL_DCONTEXT, shared_trace, fragment_table_t,
ACCT_FRAG_TABLE, PROTECTED);
shared_trace = NULL;
} else
ASSERT(shared_trace == NULL);
HEAP_TYPE_FREE(GLOBAL_DCONTEXT, shared_future, fragment_table_t,
ACCT_FRAG_TABLE, PROTECTED);
shared_future = NULL;
}
if (SHARED_IBT_TABLES_ENABLED()) {
HEAP_TYPE_FREE(GLOBAL_DCONTEXT, dead_lists, dead_table_lists_t,
ACCT_OTHER, PROTECTED);
dead_lists = NULL;
} else
ASSERT(dead_lists == NULL);
if (USE_SHARED_PT()) {
ASSERT(shared_pt != NULL);
HEAP_TYPE_FREE(GLOBAL_DCONTEXT, shared_pt, per_thread_t, ACCT_OTHER, PROTECTED);
shared_pt = NULL;
} else
ASSERT(shared_pt == NULL);
if (SHARED_IBT_TABLES_ENABLED())
DELETE_LOCK(dead_tables_lock);
#ifdef SHARING_STUDY
if (INTERNAL_OPTION(fragment_sharing_study)) {
DELETE_LOCK(shared_blocks_lock);
DELETE_LOCK(shared_traces_lock);
}
#endif
cleanup:
/* FIXME: we shouldn't need these locks anyway for hotp_only & thin_client */
#if defined(INTERNAL) || defined(CLIENT_INTERFACE)
DELETE_LOCK(tracedump_mutex);
#endif
#ifdef CLIENT_INTERFACE
process_client_flush_requests(NULL, GLOBAL_DCONTEXT, client_flush_requests,
false /* no flush */);
DELETE_LOCK(client_flush_request_lock);
#endif
DELETE_LOCK(shared_cache_flush_lock);
}
/* Decrement the ref-count for any reference to table that the
* per_thread_t contains. If could_be_live is true, will acquire write
* locks for the currently live tables. */
/* NOTE: Can't inline in release build -- too many call sites? */
static /* inline */ void
dec_table_ref_count(dcontext_t *dcontext, ibl_table_t *table, bool could_be_live)
{
ibl_table_t *live_table = NULL;
ibl_branch_type_t branch_type;
/* Search live tables. A live table's ref-count is decremented
* during a thread exit. */
/* FIXME If the table is more likely to be dead, we can reverse the order
* and search dead tables first. */
if (!DYNAMO_OPTION(ref_count_shared_ibt_tables))
return;
ASSERT(TESTALL(FRAG_TABLE_SHARED | FRAG_TABLE_IBL_TARGETED,
table->table_flags));
if (could_be_live) {
for (branch_type = IBL_BRANCH_TYPE_START;
branch_type < IBL_BRANCH_TYPE_END; branch_type++) {
/* We match based on lookup table addresses. We need to lock the table
* during the compare and hold the lock during the ref-count dec to
* prevent a race with it being moved to the dead list.
*/
ibl_table_t *sh_table_ptr = TEST(FRAG_TABLE_TRACE, table->table_flags) ?
&shared_pt->trace_ibt[branch_type] : &shared_pt->bb_ibt[branch_type];
TABLE_RWLOCK(sh_table_ptr, write, lock);
if (table->table == sh_table_ptr->table) {
live_table = sh_table_ptr;
break;
}
TABLE_RWLOCK(sh_table_ptr, write, unlock);
}
}
if (live_table != NULL) {
/* During shutdown, the ref-count can reach 0. The table is freed
* in the fragment_exit() path. */
ASSERT(live_table->ref_count >= 1);
live_table->ref_count--;
TABLE_RWLOCK(live_table, write, unlock);
}
else { /* Search the dead tables list. */
dead_fragment_table_t *current = dead_lists->dead_tables;
dead_fragment_table_t *prev = NULL;
ASSERT(dead_lists->dead_tables != NULL);
ASSERT(dead_lists->dead_tables_tail != NULL);
/* We expect to be removing from the head of the list but due to
* races could be removing from the middle, i.e., if a preceding
* entry is about to be removed by another thread but the
* dead_tables_lock hasn't been acquired yet by that thread.
*/
mutex_lock(&dead_tables_lock);
for (current = dead_lists->dead_tables; current != NULL;
prev = current, current = current->next) {
if (current->table_unaligned == table->table_unaligned) {
ASSERT_CURIOSITY(current->ref_count >= 1);
current->ref_count--;
if (current->ref_count == 0) {
LOG(GLOBAL, LOG_FRAGMENT, 2,
"dec_table_ref_count: table "PFX" cap %d at ref 0, freeing\n",
current->table_unaligned, current->capacity);
/* Unlink this table from the list. */
if (prev != NULL)
prev->next = current->next;
if (current == dead_lists->dead_tables) {
/* remove from the front */
ASSERT(prev == NULL);
dead_lists->dead_tables = current->next;
}
if (current == dead_lists->dead_tables_tail)
dead_lists->dead_tables_tail = prev;
hashtable_ibl_free_table(GLOBAL_DCONTEXT,
current->table_unaligned,
current->table_flags,
current->capacity);
heap_free(GLOBAL_DCONTEXT, current, sizeof(dead_fragment_table_t)
HEAPACCT(ACCT_IBLTABLE));
STATS_DEC(num_dead_shared_ibt_tables);
STATS_INC(num_dead_shared_ibt_tables_freed);
}
break;
}
}
mutex_unlock(&dead_tables_lock);
ASSERT(current != NULL);
}
}
/* Decrement the ref-count for every shared IBT table that the
* per_thread_t has a reference to. */
static void
dec_all_table_ref_counts(dcontext_t *dcontext, per_thread_t *pt)
{
/* We can also decrement ref-count for dead shared tables here. */
if (SHARED_IBT_TABLES_ENABLED()) {
ibl_branch_type_t branch_type;
for (branch_type = IBL_BRANCH_TYPE_START;
branch_type < IBL_BRANCH_TYPE_END; branch_type++) {
if (DYNAMO_OPTION(shared_trace_ibt_tables)) {
ASSERT(pt->trace_ibt[branch_type].table != NULL);
dec_table_ref_count(dcontext, &pt->trace_ibt[branch_type],
true/*check live*/);
}
if (DYNAMO_OPTION(shared_bb_ibt_tables)) {
ASSERT(pt->bb_ibt[branch_type].table != NULL);
dec_table_ref_count(dcontext, &pt->bb_ibt[branch_type],
true/*check live*/);
}
}
}
}
/* re-initializes non-persistent memory */
void
fragment_thread_reset_init(dcontext_t *dcontext)
{
per_thread_t *pt;
ibl_branch_type_t branch_type;
/* case 7966: don't initialize at all for hotp_only & thin_client */
if (RUNNING_WITHOUT_CODE_CACHE())
return;
pt = (per_thread_t *) dcontext->fragment_field;
/* important to init w/ cur timestamp to avoid this thread dec-ing ref
* count when it wasn't included in ref count init value!
* assumption: don't need lock to read flushtime_global atomically.
* when resetting, though, thread free & re-init is done before global free,
* so we have to explicitly set to 0 for that case.
*/
pt->flushtime_last_update = (dynamo_resetting) ? 0 : flushtime_global;
/* set initial hashtable sizes */
hashtable_fragment_init(dcontext, &pt->bb, INIT_HTABLE_SIZE_BB,
INTERNAL_OPTION(private_bb_load),
(hash_function_t)INTERNAL_OPTION(alt_hash_func),
0, 0 _IF_DEBUG("bblock"));
/* init routine will work for future_fragment_t* same as for fragment_t* */
hashtable_fragment_init(dcontext, &pt->future, INIT_HTABLE_SIZE_FUTURE,
INTERNAL_OPTION(private_future_load),
(hash_function_t)INTERNAL_OPTION(alt_hash_func),
0 /* hash_mask_offset */, 0
_IF_DEBUG("future"));
/* The trace table now is not used by IBL routines, and
* therefore doesn't need a lookup table, we can also use the
* alternative hash functions and use a higher load.
*/
if (PRIVATE_TRACES_ENABLED()) {
hashtable_fragment_init(dcontext, &pt->trace, INIT_HTABLE_SIZE_TRACE,
INTERNAL_OPTION(private_trace_load),
(hash_function_t)INTERNAL_OPTION(alt_hash_func),
0 /* hash_mask_offset */,
FRAG_TABLE_TRACE
_IF_DEBUG("trace"));
}
/* We'll now have more control over hashtables based on branch
* type. The most important of all is of course the return
* target table. These tables should be populated only when
* we know that the entry is a valid target, a trace is
* created, and it is indeed targeted by an IBL.
*/
/* These tables are targeted by both bb and trace routines */
for (branch_type = IBL_BRANCH_TYPE_START;
branch_type < IBL_BRANCH_TYPE_END; branch_type++) {
if (!DYNAMO_OPTION(disable_traces)
/* If no traces and no bb ibl targets we point ibl at
* an empty trace table */
|| !DYNAMO_OPTION(bb_ibl_targets)) {
if (!DYNAMO_OPTION(shared_trace_ibt_tables)) {
hashtable_ibl_myinit(dcontext, &pt->trace_ibt[branch_type],
DYNAMO_OPTION(private_trace_ibl_targets_init),
DYNAMO_OPTION(private_ibl_targets_load),
HASH_FUNCTION_NONE,
HASHTABLE_IBL_OFFSET(branch_type),
branch_type,
false, /* no lookup table */
(DYNAMO_OPTION(shared_traces) ?
FRAG_TABLE_TARGET_SHARED : 0) |
FRAG_TABLE_TRACE
_IF_DEBUG(ibl_trace_table_type_names[branch_type]));
#ifdef HASHTABLE_STATISTICS
if (INTERNAL_OPTION(hashtable_ibl_stats)) {
CHECK_UNPROT_STATS(pt->trace_ibt[branch_type]);
/* for compatibility using an entry in the per-branch type stats */
INIT_HASHTABLE_STATS(pt->trace_ibt[branch_type].
UNPROT_STAT(trace_ibl_stats[branch_type]));
} else {
pt->trace_ibt[branch_type].unprot_stats = NULL;
}
#endif /* HASHTABLE_STATISTICS */
}
else {
/* ensure table from last time (if we had a reset) not still there */
memset(&pt->trace_ibt[branch_type], 0, sizeof(pt->trace_ibt[branch_type]));
update_private_ptr_to_shared_ibt_table(dcontext, branch_type,
true, /* trace = yes */
false, /* no adjust old
* ref-count */
true /* lock */);
#ifdef HASHTABLE_STATISTICS
if (INTERNAL_OPTION(hashtable_ibl_stats)) {
ALLOC_UNPROT_STATS(dcontext, &pt->trace_ibt[branch_type]);
CHECK_UNPROT_STATS(pt->trace_ibt[branch_type]);
INIT_HASHTABLE_STATS(pt->trace_ibt[branch_type].
UNPROT_STAT(trace_ibl_stats[branch_type]));
} else {
pt->trace_ibt[branch_type].unprot_stats = NULL;
}
#endif
}
}
/* When targetting BBs, currently the source is assumed to be only a
* bb since traces going to a bb for the first time should mark it
* as a trace head. Therefore the tables are currently only
* targeted by bb IBL routines. It will be possible to later
* deal with trace heads and allow a trace to target a BB with
* the intent of modifying its THCI.
*
* (FIXME: having another table for THCI IBLs seems better than
* adding a counter (starting at -1) to all blocks and
* trapping when 0 for marking a trace head and again at 50
* for creating a trace. And that is all of course after proving
* that doing it in DR has significant impact.)
*
* Note that private bb2bb transitions are not captured when
* we run with -shared_bbs.
*/
/* These tables should be populated only when we know that the
* entry is a valid target, and it is indeed targeted by an
* IBL. They have to be per-type so that our security
* policies are properly checked.
*/
if (DYNAMO_OPTION(bb_ibl_targets)) {
if (!DYNAMO_OPTION(shared_bb_ibt_tables)) {
hashtable_ibl_myinit(dcontext, &pt->bb_ibt[branch_type],
DYNAMO_OPTION(private_bb_ibl_targets_init),
DYNAMO_OPTION(private_bb_ibl_targets_load),
HASH_FUNCTION_NONE,
HASHTABLE_IBL_OFFSET(branch_type),
branch_type,
false, /* no lookup table */
(DYNAMO_OPTION(shared_bbs) ?
FRAG_TABLE_TARGET_SHARED : 0)
_IF_DEBUG(ibl_bb_table_type_names[branch_type]));
/* mark as inclusive table for bb's - we in fact currently
* keep only frags that are not FRAG_IS_TRACE_HEAD */
#ifdef HASHTABLE_STATISTICS
if (INTERNAL_OPTION(hashtable_ibl_stats)) {
/* for compatibility using an entry in the per-branch type stats */
CHECK_UNPROT_STATS(pt->bb_ibt[branch_type]);
/* FIXME: we don't expect trace_ibl_stats yet */
INIT_HASHTABLE_STATS(pt->bb_ibt[branch_type].
UNPROT_STAT(bb_ibl_stats[branch_type]));
} else {
pt->bb_ibt[branch_type].unprot_stats = NULL;
}
#endif /* HASHTABLE_STATISTICS */
}
else {
/* ensure table from last time (if we had a reset) not still there */
memset(&pt->bb_ibt[branch_type], 0, sizeof(pt->bb_ibt[branch_type]));
update_private_ptr_to_shared_ibt_table(dcontext, branch_type,
false, /* trace = no */
false, /* no adjust old
* ref-count */
true /* lock */);
#ifdef HASHTABLE_STATISTICS
if (INTERNAL_OPTION(hashtable_ibl_stats)) {
ALLOC_UNPROT_STATS(dcontext, &pt->bb_ibt[branch_type]);
CHECK_UNPROT_STATS(pt->bb_ibt[branch_type]);
INIT_HASHTABLE_STATS(pt->bb_ibt[branch_type].
UNPROT_STAT(trace_ibl_stats[branch_type]));
} else {
pt->bb_ibt[branch_type].unprot_stats = NULL;
}
#endif
}
}
}
ASSERT(IBL_BRANCH_TYPE_END == 3);
update_generated_hashtable_access(dcontext);
}
void
fragment_thread_init(dcontext_t *dcontext)
{
/* we allocate per_thread_t in the global heap solely for self-protection,
* even when turned off, since even with a lot of threads this isn't a lot of
* pressure on the global heap
*/
per_thread_t *pt;
/* case 7966: don't initialize un-needed data for hotp_only & thin_client.
* FIXME: could set htable initial sizes to 0 for all configurations, instead.
* per_thread_t is pretty big, so we avoid it, though it costs us checks for
* hotp_only in the islinking-related routines.
*/
if (RUNNING_WITHOUT_CODE_CACHE())
return;
pt = (per_thread_t *) global_heap_alloc(sizeof(per_thread_t) HEAPACCT(ACCT_OTHER));
dcontext->fragment_field = (void *) pt;
fragment_thread_reset_init(dcontext);
#if defined(INTERNAL) || defined(CLIENT_INTERFACE)
if (TRACEDUMP_ENABLED() && PRIVATE_TRACES_ENABLED()) {
pt->tracefile = open_log_file("traces", NULL, 0);
ASSERT(pt->tracefile != INVALID_FILE);
init_trace_file(pt);
}
#endif
#if defined(CLIENT_INTERFACE) && defined(CLIENT_SIDELINE)
ASSIGN_INIT_LOCK_FREE(pt->fragment_delete_mutex, fragment_delete_mutex);
#endif
pt->could_be_linking = false;
pt->wait_for_unlink = false;
pt->about_to_exit = false;
pt->flush_queue_nonempty = false;
pt->waiting_for_unlink = create_event();
pt->finished_with_unlink = create_event();
ASSIGN_INIT_LOCK_FREE(pt->linking_lock, linking_lock);
pt->finished_all_unlink = create_event();
pt->soon_to_be_linking = false;
pt->at_syscall_at_flush = false;
#ifdef PROFILE_LINKCOUNT
pt->tracedump_num_below_threshold = 0;
pt->tracedump_count_below_threshold = (linkcount_type_t) 0;
#endif
}
static bool
check_flush_queue(dcontext_t *dcontext, fragment_t *was_I_flushed);
/* frees all non-persistent memory */
void
fragment_thread_reset_free(dcontext_t *dcontext)
{
per_thread_t *pt = (per_thread_t *) dcontext->fragment_field;
DEBUG_DECLARE(ibl_branch_type_t branch_type;)
/* case 7966: don't initialize at all for hotp_only & thin_client */
if (RUNNING_WITHOUT_CODE_CACHE())
return;
/* Dec ref count on any shared tables that are pointed to. */
dec_all_table_ref_counts(dcontext, pt);
#ifdef DEBUG
/* for non-debug we do fast exit path and don't free local heap */
SELF_PROTECT_CACHE(dcontext, NULL, WRITABLE);
/* we remove flushed fragments from the htable, and they can be
* flushed after enter_threadexit() due to os_thread_stack_exit(),
* so we need to check the flush queue here
*/
mutex_lock(&pt->linking_lock);
check_flush_queue(dcontext, NULL);
mutex_unlock(&pt->linking_lock);
/* For consistency we remove entries from the IBL targets
* tables before we remove them from the trace table. However,
* we cannot free any fragments because for sure all of them will
* be present in the trace table.
*/
for (branch_type = IBL_BRANCH_TYPE_START;
branch_type < IBL_BRANCH_TYPE_END; branch_type++) {
if (!DYNAMO_OPTION(disable_traces)
/* If no traces and no bb ibl targets we point ibl at
* an empty trace table */
|| !DYNAMO_OPTION(bb_ibl_targets)) {
if (!DYNAMO_OPTION(shared_trace_ibt_tables)) {
DOLOG(2, LOG_FRAGMENT, {
hashtable_ibl_dump_table(dcontext, &pt->trace_ibt[branch_type]);
});
DOLOG(1, LOG_FRAGMENT|LOG_STATS, {
hashtable_ibl_load_statistics(dcontext, &pt->trace_ibt[branch_type]);
});
hashtable_ibl_myfree(dcontext, &pt->trace_ibt[branch_type]);
} else {
# ifdef HASHTABLE_STATISTICS
if (INTERNAL_OPTION(hashtable_ibl_stats)) {
print_hashtable_stats(dcontext, "Total",
shared_pt->trace_ibt[branch_type].name,
"trace ibl ",
get_branch_type_name(branch_type),
&pt->trace_ibt[branch_type].
UNPROT_STAT(trace_ibl_stats[branch_type]));
DEALLOC_UNPROT_STATS(dcontext, &pt->trace_ibt[branch_type]);
}
# endif
memset(&pt->trace_ibt[branch_type], 0,
sizeof(pt->trace_ibt[branch_type]));
}
}
if (DYNAMO_OPTION(bb_ibl_targets)) {
if (!DYNAMO_OPTION(shared_bb_ibt_tables)) {
DOLOG(2, LOG_FRAGMENT, {
hashtable_ibl_dump_table(dcontext, &pt->bb_ibt[branch_type]);
});
DOLOG(1, LOG_FRAGMENT|LOG_STATS, {
hashtable_ibl_load_statistics(dcontext, &pt->bb_ibt[branch_type]);
});
hashtable_ibl_myfree(dcontext, &pt->bb_ibt[branch_type]);
} else {
# ifdef HASHTABLE_STATISTICS
if (INTERNAL_OPTION(hashtable_ibl_stats)) {
print_hashtable_stats(dcontext, "Total",
shared_pt->bb_ibt[branch_type].name,
"bb ibl ",
get_branch_type_name(branch_type),
&pt->bb_ibt[branch_type].
UNPROT_STAT(bb_ibl_stats[branch_type]));
DEALLOC_UNPROT_STATS(dcontext, &pt->bb_ibt[branch_type]);
}
# endif
memset(&pt->bb_ibt[branch_type], 0, sizeof(pt->bb_ibt[branch_type]));
}
}
}
/* case 7653: we can't free the main tables prior to freeing the contents
* of all of them, as link freeing involves looking up in the other tables.
*/
if (PRIVATE_TRACES_ENABLED()) {
DOLOG(1, LOG_FRAGMENT|LOG_STATS, {
hashtable_fragment_load_statistics(dcontext, &pt->trace);
});
hashtable_fragment_reset(dcontext, &pt->trace);
}
DOLOG(1, LOG_FRAGMENT|LOG_STATS, {
hashtable_fragment_load_statistics(dcontext, &pt->bb);
});
hashtable_fragment_reset(dcontext, &pt->bb);
DOLOG(1, LOG_FRAGMENT|LOG_STATS, {
hashtable_fragment_load_statistics(dcontext, &pt->future);
});
hashtable_fragment_reset(dcontext, &pt->future);
if (PRIVATE_TRACES_ENABLED())
hashtable_fragment_free(dcontext, &pt->trace);
hashtable_fragment_free(dcontext, &pt->bb);
hashtable_fragment_free(dcontext, &pt->future);
SELF_PROTECT_CACHE(dcontext, NULL, READONLY);
#else
/* Case 10807: Clients need to be informed of fragment deletions
* so we'll reset the relevant hash tables for CI release builds.
*/
# ifdef CLIENT_INTERFACE
if (PRIVATE_TRACES_ENABLED())
hashtable_fragment_reset(dcontext, &pt->trace);
hashtable_fragment_reset(dcontext, &pt->bb);
# endif
#endif /* !DEBUG */
}
/* atexit cleanup */
void
fragment_thread_exit(dcontext_t *dcontext)
{
per_thread_t *pt = (per_thread_t *) dcontext->fragment_field;
/* case 7966: don't initialize at all for hotp_only & thin_client */
if (RUNNING_WITHOUT_CODE_CACHE())
return;
#if defined(INTERNAL) || defined(CLIENT_INTERFACE)
if (TRACEDUMP_ENABLED() && PRIVATE_TRACES_ENABLED()) {
/* write out all traces prior to deleting any, so links print nicely */
uint i;
fragment_t *f;
for (i = 0; i < pt->trace.capacity; i++) {
f = pt->trace.table[i];
if (!REAL_FRAGMENT(f))
continue;
if (SHOULD_OUTPUT_FRAGMENT(f->flags))
output_trace(dcontext, pt, f, -1);
}
exit_trace_file(pt);
}
#endif
fragment_thread_reset_free(dcontext);
/* events are global */
destroy_event(pt->waiting_for_unlink);
destroy_event(pt->finished_with_unlink);
destroy_event(pt->finished_all_unlink);
DELETE_LOCK(pt->linking_lock);
#if defined(CLIENT_INTERFACE) && defined(CLIENT_SIDELINE)
DELETE_LOCK(pt->fragment_delete_mutex);
#endif
global_heap_free(pt, sizeof(per_thread_t) HEAPACCT(ACCT_OTHER));
dcontext->fragment_field = NULL;
}
#ifdef UNIX
void
fragment_fork_init(dcontext_t *dcontext)
{
/* FIXME: what about global file? */
# if defined(INTERNAL) || defined(CLIENT_INTERFACE)
per_thread_t *pt = (per_thread_t *) dcontext->fragment_field;
if (TRACEDUMP_ENABLED() && PRIVATE_TRACES_ENABLED()) {
/* new log dir has already been created, so just open a new log file */
pt->tracefile = open_log_file("traces", NULL, 0);
ASSERT(pt->tracefile != INVALID_FILE);
init_trace_file(pt);
}
# endif
}
#endif
/* fragment_t heap layout looks like this:
*
* fragment_t/trace_t
* translation_info_t*, if necessary
* array composed of different sizes of linkstub_t subclasses:
* direct_linkstub_t
* cbr_fallthrough_linkstub_t
* indirect_linkstub_t
* post_linkstub_t, if necessary
*/
static uint
fragment_heap_size(uint flags, int direct_exits, int indirect_exits)
{
uint total_sz;
ASSERT((direct_exits + indirect_exits > 0) || TEST(FRAG_COARSE_GRAIN, flags));
total_sz = FRAGMENT_STRUCT_SIZE(flags) +
linkstubs_heap_size(flags, direct_exits, indirect_exits);
/* we rely on a small heap size for our ushort offset at the end */
ASSERT(total_sz <= USHRT_MAX);
return total_sz;
}
/* Allocates memory for a fragment_t and linkstubs and initializes them, but
* does not do any fcache-related initialization.
*/
static fragment_t *
fragment_create_heap(dcontext_t *dcontext,
int direct_exits, int indirect_exits, uint flags)
{
dcontext_t *alloc_dc = FRAGMENT_ALLOC_DC(dcontext, flags);
uint heapsz = fragment_heap_size(flags, direct_exits, indirect_exits);
/* linkstubs are in an array immediately after the fragment_t/trace_t struct */
fragment_t *f = (fragment_t *)
nonpersistent_heap_alloc(alloc_dc, heapsz
HEAPACCT(TEST(FRAG_IS_TRACE, flags) ?
ACCT_TRACE : ACCT_FRAGMENT));
LOG(THREAD, LOG_FRAGMENT, 5,
"fragment heap size for flags 0x%08x, exits %d %d, is %d => "PFX"\n",
flags, direct_exits, indirect_exits, heapsz, f);
return f;
}
static void
fragment_init_heap(fragment_t *f, app_pc tag, int direct_exits, int indirect_exits,
uint flags)
{
ASSERT(f != NULL);
f->flags = flags; /* MUST set before calling fcache_add_fragment or
* FRAGMENT_EXIT_STUBS */
f->tag = tag;
/* Let fragment_create() fill in; other users are building fake fragments */
DODEBUG({ f->id = -1; });
f->next_vmarea = NULL; /* must be set by caller */
f->prev_vmarea = NULL; /* must be set by caller */
f->also.also_vmarea = NULL; /* must be set by caller */
linkstubs_init(FRAGMENT_EXIT_STUBS(f), direct_exits, indirect_exits, f);
/* initialize non-ibt entry to top of fragment (caller responsible for
* setting up prefix)
*/
f->prefix_size = 0;
#ifdef FRAGMENT_SIZES_STUDY
record_fragment_size(f->size, (flags & FRAG_IS_TRACE) != 0);
#endif
f->in_xlate.incoming_stubs = NULL;
#ifdef CUSTOM_TRACES_RET_REMOVAL
f->num_calls = 0;
f->num_rets = 0;
#endif
/* trace-only fields */
if (TEST(FRAG_IS_TRACE, flags)) {
trace_only_t *t = TRACE_FIELDS(f);
t->bbs = NULL;
/* real num_bbs won't be set until after the trace is emitted,
* but we need a non-zero value for linkstub_fragment()
*/
t->num_bbs = 1;
#ifdef PROFILE_RDTSC
t->count = 0UL;
t->total_time = (uint64) 0;
#endif
#ifdef SIDELINE_COUNT_STUDY
t->count_old_pre = (linkcount_type_t) 0;
t->count_old_post = (linkcount_type_t) 0;
#endif
}
}
/* Create a new fragment_t with empty prefix and return it.
* The fragment_t is allocated on the global or local heap, depending on the flags,
* unless FRAG_COARSE_GRAIN is set, in which case the fragment_t is a unique
* temporary struct that is NOT heap allocated and is only safe to use
* so long as the bb_building_lock is held!
*/
fragment_t *
fragment_create(dcontext_t *dcontext, app_pc tag, int body_size,
int direct_exits, int indirect_exits, int exits_size, uint flags)
{
fragment_t *f;
DEBUG_DECLARE(stats_int_t next_id;)
DOSTATS({
/* should watch this stat and if it gets too high need to re-do
* who needs the post-linkstub offset
*/
if (linkstub_frag_offs_at_end(flags, direct_exits, indirect_exits))
STATS_INC(num_fragment_post_linkstub);
});
/* ensure no races during a reset */
ASSERT(!dynamo_resetting);
if (TEST(FRAG_COARSE_GRAIN, flags)) {
ASSERT(DYNAMO_OPTION(coarse_units));
ASSERT_OWN_MUTEX(USE_BB_BUILDING_LOCK(), &bb_building_lock);
ASSERT(!TEST(FRAG_IS_TRACE, flags));
ASSERT(TEST(FRAG_SHARED, flags));
ASSERT(fragment_prefix_size(flags) == 0);
ASSERT((direct_exits == 0 && indirect_exits == 1) ||
(indirect_exits == 0 && (direct_exits == 1 || direct_exits == 2)));
/* FIXME: eliminate this temp fragment and linkstubs and
* have custom emit and link code that does not require such data
* structures? It would certainly be faster code.
* But would still want to record each exit's target in a convenient
* data structure, for later linking, unless we try to link in
* the same pass in which we emit indirect stubs.
* We could also use fragment_create() and free the resulting struct
* somewhere and switch to a wrapper at that point.
*/
memset(&coarse_emit_fragment, 0, sizeof(coarse_emit_fragment));
f = (fragment_t *) &coarse_emit_fragment;
/* We do not mark as FRAG_FAKE since this is pretty much a real
* fragment_t, and we do want to walk its linkstub_t structs, which
* are present.
*/
} else {
f = fragment_create_heap(dcontext, direct_exits, indirect_exits, flags);
}
fragment_init_heap(f, tag, direct_exits, indirect_exits, flags);
/* To make debugging easier we assign coarse-grain ids in the same namespace
* as fine-grain fragments, though we won't remember them at all.
*/
STATS_INC_ASSIGN(num_fragments, next_id);
IF_X64(ASSERT_TRUNCATE(f->id, int, next_id));
DOSTATS({ f->id = (int) next_id; });
DO_GLOBAL_STATS({
if (!TEST(FRAG_IS_TRACE, f->flags)) {
RSTATS_INC(num_bbs);
IF_X64(if (FRAG_IS_32(f->flags)) STATS_INC(num_32bit_bbs);)
}
});
DOSTATS({
/* avoid double-counting for adaptive working set */
if (!fragment_lookup_deleted(dcontext, tag) && !TEST(FRAG_COARSE_GRAIN, flags))
STATS_INC(num_unique_fragments);
});
/* FIXME: make fragment count a release-build stat so we can do this in
* release builds
*/
DOSTATS({
if (stats != NULL &&
(uint) GLOBAL_STAT(num_fragments) == INTERNAL_OPTION(reset_at_fragment_count)) {
schedule_reset(RESET_ALL);
}
});
/* size is a ushort
* our offsets are ushorts as well: they assume body_size is small enough, not size
*/
#ifdef CLIENT_INTERFACE
if (body_size + exits_size + fragment_prefix_size(flags) > MAX_FRAGMENT_SIZE) {
FATAL_USAGE_ERROR(INSTRUMENTATION_TOO_LARGE, 2,
get_application_name(), get_application_pid());
}
#endif
ASSERT(body_size + exits_size + fragment_prefix_size(flags) <= MAX_FRAGMENT_SIZE);
/* currently MAX_FRAGMENT_SIZE is USHRT_MAX, but future proofing */
ASSERT_TRUNCATE(f->size, ushort,
(body_size + exits_size + fragment_prefix_size(flags)));
f->size = (ushort) (body_size + exits_size + fragment_prefix_size(flags));
/* fcache_add will fill in start_pc, next_fcache,
* prev_fcache, and fcache_extra
*/
fcache_add_fragment(dcontext, f);
/* after fcache_add_fragment so we can call get_fragment_coarse_info */
DOSTATS({
if (TEST(FRAG_SHARED, flags)) {
STATS_INC(num_shared_fragments);
if (TEST(FRAG_IS_TRACE, flags))
STATS_INC(num_shared_traces);
else if (TEST(FRAG_COARSE_GRAIN, flags)) {
coarse_info_t *info = get_fragment_coarse_info(f);
if (get_executable_area_coarse_info(f->tag) != info)
STATS_INC(num_coarse_secondary);
STATS_INC(num_coarse_fragments);
} else
STATS_INC(num_shared_bbs);
} else {
STATS_INC(num_private_fragments);
if (TEST(FRAG_IS_TRACE, flags))
STATS_INC(num_private_traces);
else
STATS_INC(num_private_bbs);
}
});
/* wait until initialized fragment completely before dumping any stats */
DOLOG(1, LOG_FRAGMENT|LOG_VMAREAS, {
if (INTERNAL_OPTION(global_stats_interval) &&
(f->id % INTERNAL_OPTION(global_stats_interval) == 0)) {
LOG(GLOBAL, LOG_FRAGMENT, 1, "Created %d fragments\n", f->id);
dump_global_stats(false);
}
if (INTERNAL_OPTION(thread_stats_interval) &&
INTERNAL_OPTION(thread_stats)) {
/* FIXME: why do we need a new dcontext? */
dcontext_t *dcontext = get_thread_private_dcontext();
if (THREAD_STATS_ON(dcontext) &&
THREAD_STAT(dcontext, num_fragments) % INTERNAL_OPTION(thread_stats_interval) == 0) {
dump_thread_stats(dcontext, false);
}
}
});
#ifdef WINDOWS
DOLOG(1, LOG_FRAGMENT|LOG_VMAREAS, {
if (f->id % 50000 == 0) {
LOG(GLOBAL, LOG_VMAREAS, 1,
"50K fragment check point: here are the loaded modules:\n");
print_modules(GLOBAL, DUMP_NOT_XML);
LOG(GLOBAL, LOG_VMAREAS, 1,
"50K fragment check point: here are the executable areas:\n");
print_executable_areas(GLOBAL);
}
});
#endif
return f;
}
/* Creates a new fragment_t+linkstubs from the passed-in fragment and
* fills in linkstub_t and fragment_t fields, copying the fcache-related fields
* from the passed-in fragment (so be careful how the fields are used).
* Meant to be used to create a full fragment from a coarse-grain fragment.
* Caller is responsible for freeing via fragment_free() w/ the same dcontext
* passed in here.
*/
fragment_t *
fragment_recreate_with_linkstubs(dcontext_t *dcontext, fragment_t *f_src)
{
uint num_dir, num_indir;
uint size;
fragment_t *f_tgt;
instrlist_t *ilist;
linkstub_t *l;
cache_pc body_end_pc;
/* Not FAKE since has linkstubs, but still fake in a sense since no fcache
* slot -- need to mark that?
*/
uint flags = (f_src->flags & ~FRAG_FAKE);
ASSERT_CURIOSITY(TEST(FRAG_COARSE_GRAIN, f_src->flags)); /* only use so far */
/* FIXME case 9325: build from tag here? Need to exactly re-mangle + re-instrument.
* We use _exact to get any elided final jmp not counted in size
*/
ilist = decode_fragment_exact(dcontext, f_src, NULL, NULL, f_src->flags,
&num_dir, &num_indir);
f_tgt = fragment_create_heap(dcontext, num_dir, num_indir, flags);
fragment_init_heap(f_tgt, f_src->tag, num_dir, num_indir, flags);
f_tgt->start_pc = f_src->start_pc;
/* Can't call this until we have start_pc set */
body_end_pc = set_linkstub_fields(dcontext, f_tgt, ilist, num_dir, num_indir,
false/*do not emit*/);
/* Calculate total size */
IF_X64(ASSERT_TRUNCATE(size, uint, (body_end_pc - f_tgt->start_pc)));
size = (uint) (body_end_pc - f_tgt->start_pc);
for (l = FRAGMENT_EXIT_STUBS(f_tgt); l != NULL; l = LINKSTUB_NEXT_EXIT(l)) {
if (!EXIT_HAS_LOCAL_STUB(l->flags, f_tgt->flags))
continue; /* it's kept elsewhere */
size += linkstub_size(dcontext, f_tgt, l);
#ifdef CUSTOM_EXIT_STUBS
size += l->fixed_stub_offset;
#endif
}
ASSERT_TRUNCATE(f_tgt->size, ushort, size);
f_tgt->size = (ushort) size;
ASSERT(TEST(FRAG_FAKE, f_src->flags) || size == f_src->size);
ASSERT_TRUNCATE(f_tgt->prefix_size, byte, fragment_prefix_size(f_src->flags));
f_tgt->prefix_size = (byte) fragment_prefix_size(f_src->flags);
ASSERT(TEST(FRAG_FAKE, f_src->flags) || f_src->prefix_size == f_tgt->prefix_size);
f_tgt->fcache_extra = f_src->fcache_extra;
instrlist_clear_and_destroy(dcontext, ilist);
return f_tgt;
}
/* Frees the storage associated with f.
* Callers should use fragment_delete() instead of this routine, unless they
* obtained their fragment_t from fragment_recreate_with_linkstubs().
*/
void
fragment_free(dcontext_t *dcontext, fragment_t *f)
{
dcontext_t *alloc_dc = FRAGMENT_ALLOC_DC(dcontext, f->flags);
uint heapsz;
int direct_exits = 0;
int indirect_exits = 0;
linkstub_t *l = FRAGMENT_EXIT_STUBS(f);
for (; l != NULL; l = LINKSTUB_NEXT_EXIT(l)) {
if (LINKSTUB_DIRECT(l->flags))
direct_exits++;
else {
ASSERT(LINKSTUB_INDIRECT(l->flags));
indirect_exits++;
}
}
heapsz = fragment_heap_size(f->flags, direct_exits, indirect_exits);
STATS_INC(num_fragments_deleted);
if (HAS_STORED_TRANSLATION_INFO(f)) {
ASSERT(FRAGMENT_TRANSLATION_INFO(f) != NULL);
translation_info_free(dcontext, FRAGMENT_TRANSLATION_INFO(f));
} else
ASSERT(FRAGMENT_TRANSLATION_INFO(f) == NULL);
/* N.B.: monitor_remove_fragment() was called in fragment_delete,
* which is assumed to have been called prior to fragment_free
*/
linkstub_free_exitstubs(dcontext, f);
if ((f->flags & FRAG_IS_TRACE) != 0) {
trace_only_t *t = TRACE_FIELDS(f);
if (t->bbs != NULL) {
nonpersistent_heap_free(alloc_dc, t->bbs, t->num_bbs*sizeof(trace_bb_info_t)
HEAPACCT(ACCT_TRACE));
}
nonpersistent_heap_free(alloc_dc, f, heapsz HEAPACCT(ACCT_TRACE));
}
else {
nonpersistent_heap_free(alloc_dc, f, heapsz HEAPACCT(ACCT_FRAGMENT));
}
}
/* Returns the end of the fragment body + any local stubs (excluding selfmod copy) */
cache_pc
fragment_stubs_end_pc(fragment_t *f)
{
if (TEST(FRAG_SELFMOD_SANDBOXED, f->flags))
return FRAGMENT_SELFMOD_COPY_PC(f);
else
return f->start_pc + f->size;
}
/* Returns the end of the fragment body (excluding exit stubs and selfmod copy) */
cache_pc
fragment_body_end_pc(dcontext_t *dcontext, fragment_t *f)
{
linkstub_t *l;
for (l = FRAGMENT_EXIT_STUBS(f); l; l = LINKSTUB_NEXT_EXIT(l)) {
if (EXIT_HAS_LOCAL_STUB(l->flags, f->flags)) {
return EXIT_STUB_PC(dcontext, f, l);
}
}
/* must be no stubs after fragment body */
return fragment_stubs_end_pc(f);
}
#ifdef PROFILE_LINKCOUNT
linkcount_type_t
get_total_linkcount(fragment_t *f)
{
/* return total count of exit counters */
linkstub_t *l;
linkcount_type_t total = (linkcount_type_t) 0;
for (l = FRAGMENT_EXIT_STUBS(f); l != NULL; l = LINKSTUB_NEXT_EXIT(l)) {
total += l->count;
}
return total;
}
#endif
#if defined(CLIENT_INTERFACE) && defined(CLIENT_SIDELINE)
/* synchronization routines needed for sideline threads so they don't get
* fragments they are referencing deleted */
void
fragment_get_fragment_delete_mutex(dcontext_t *dcontext)
{
if (dynamo_exited || dcontext == GLOBAL_DCONTEXT)
return;
mutex_lock(&(((per_thread_t *) dcontext->fragment_field)->fragment_delete_mutex));
}
void
fragment_release_fragment_delete_mutex(dcontext_t *dcontext)
{
if (dynamo_exited || dcontext == GLOBAL_DCONTEXT)
return;
mutex_unlock(&(((per_thread_t *) dcontext->fragment_field)->fragment_delete_mutex));
}
#endif
/* cleaner to have own flags since there are no negative versions
* of FRAG_SHARED and FRAG_IS_TRACE for distinguishing from "don't care"
*/
enum {
LOOKUP_TRACE = 0x001,
LOOKUP_BB = 0x002,
LOOKUP_PRIVATE = 0x004,
LOOKUP_SHARED = 0x008,
};
/* A lookup constrained by bb/trace and/or shared/private */
static inline fragment_t *
fragment_lookup_type(dcontext_t *dcontext, app_pc tag, uint lookup_flags)
{
fragment_t *f;
LOG(THREAD, LOG_MONITOR, 6, "fragment_lookup_type "PFX" 0x%x\n",
tag, lookup_flags);
if (dcontext != GLOBAL_DCONTEXT && TEST(LOOKUP_PRIVATE, lookup_flags)) {
/* FIXME: add a hashtablex.h wrapper that checks #entries and
* grabs lock for us for all lookups?
*/
/* look at private tables */
per_thread_t *pt = (per_thread_t *) dcontext->fragment_field;
/* case 147: traces take precedence over bbs */
if (PRIVATE_TRACES_ENABLED() && TEST(LOOKUP_TRACE, lookup_flags)) {
/* now try trace table */
f = hashtable_fragment_lookup(dcontext, (ptr_uint_t)tag, &pt->trace);
if (f->tag != NULL) {
ASSERT(f->tag == tag);
DOLOG(2, LOG_FRAGMENT, {
if (DYNAMO_OPTION(shared_traces)) {
/* ensure private trace never shadows shared trace */
fragment_t *sf;
read_lock(&shared_trace->rwlock);
sf = hashtable_fragment_lookup(dcontext, (ptr_uint_t)tag,
shared_trace);
read_unlock(&shared_trace->rwlock);
ASSERT(sf->tag == NULL);
}
});
ASSERT(!TESTANY(FRAG_FAKE|FRAG_COARSE_GRAIN, f->flags));
return f;
}
}
if (TEST(LOOKUP_BB, lookup_flags) && pt->bb.entries > 0) {
/* basic block table last */
f = hashtable_fragment_lookup(dcontext, (ptr_uint_t)tag, &pt->bb);
if (f->tag != NULL) {
ASSERT(f->tag == tag);
DOLOG(2, LOG_FRAGMENT, {
if (DYNAMO_OPTION(shared_bbs)) {
/* ensure private bb never shadows shared bb, except for
* temp privates for trace building
*/
fragment_t *sf;
read_lock(&shared_bb->rwlock);
sf = hashtable_fragment_lookup(dcontext, (ptr_uint_t)tag,
shared_bb);
read_unlock(&shared_bb->rwlock);
ASSERT(sf->tag == NULL || TEST(FRAG_TEMP_PRIVATE, f->flags));
}
});
ASSERT(!TESTANY(FRAG_FAKE|FRAG_COARSE_GRAIN, f->flags));
return f;
}
}
}
if (TEST(LOOKUP_SHARED, lookup_flags)) {
if (DYNAMO_OPTION(shared_traces) && TEST(LOOKUP_TRACE, lookup_flags)) {
/* MUST look at shared trace table before shared bb table,
* since a shared trace can shadow a shared trace head
*/
read_lock(&shared_trace->rwlock);
f = hashtable_fragment_lookup(dcontext, (ptr_uint_t)tag, shared_trace);
read_unlock(&shared_trace->rwlock);
if (f->tag != NULL) {
ASSERT(f->tag == tag);
ASSERT(!TESTANY(FRAG_FAKE|FRAG_COARSE_GRAIN, f->flags));
return f;
}
}
if (DYNAMO_OPTION(shared_bbs) && TEST(LOOKUP_BB, lookup_flags)) {
/* MUST look at private trace table before shared bb table,
* since a private trace can shadow a shared trace head
*/
read_lock(&shared_bb->rwlock);
f = hashtable_fragment_lookup(dcontext, (ptr_uint_t)tag, shared_bb);
read_unlock(&shared_bb->rwlock);
if (f->tag != NULL) {
ASSERT(f->tag == tag);
ASSERT(!TESTANY(FRAG_FAKE|FRAG_COARSE_GRAIN, f->flags));
return f;
}
}
}
return NULL;
}
/* lookup a fragment tag */
fragment_t *
fragment_lookup(dcontext_t *dcontext, app_pc tag)
{
return fragment_lookup_type(dcontext, tag,
LOOKUP_TRACE|LOOKUP_BB|LOOKUP_PRIVATE|LOOKUP_SHARED);
}
/* lookup a fragment tag, but only look in trace tables
* N.B.: because of shadowing this may not return what fragment_lookup() returns!
*/
fragment_t *
fragment_lookup_trace(dcontext_t *dcontext, app_pc tag)
{
return fragment_lookup_type(dcontext, tag, LOOKUP_TRACE|LOOKUP_PRIVATE|LOOKUP_SHARED);
}
/* lookup a fragment tag, but only look in bb tables
* N.B.: because of shadowing this may not return what fragment_lookup() returns!
*/
fragment_t *
fragment_lookup_bb(dcontext_t *dcontext, app_pc tag)
{
return fragment_lookup_type(dcontext, tag, LOOKUP_BB|LOOKUP_PRIVATE|LOOKUP_SHARED);
}
/* lookup a fragment tag, but only look in shared bb table
* N.B.: because of shadowing this may not return what fragment_lookup() returns!
*/
fragment_t *
fragment_lookup_shared_bb(dcontext_t *dcontext, app_pc tag)
{
return fragment_lookup_type(dcontext, tag, LOOKUP_BB|LOOKUP_SHARED);
}
/* lookup a fragment tag, but only look in tables that are the same shared-ness
* as flags.
* N.B.: because of shadowing this may not return what fragment_lookup() returns!
*/
fragment_t *
fragment_lookup_same_sharing(dcontext_t *dcontext, app_pc tag, uint flags)
{
return fragment_lookup_type(dcontext, tag, LOOKUP_TRACE|LOOKUP_BB|
(TEST(FRAG_SHARED, flags) ?
LOOKUP_SHARED : LOOKUP_PRIVATE));
}
#ifdef DEBUG /*currently only used for debugging */
static fragment_t *
hashtable_pclookup(dcontext_t *dcontext, fragment_table_t *table, cache_pc pc)
{
uint i;
fragment_t *f;
ASSERT_TABLE_SYNCHRONIZED(table, READWRITE); /* lookup requires read (or write) lock */
for (i = 0; i < table->capacity; i++) {
f = table->table[i];
if (!REAL_FRAGMENT(f))
continue;
if (pc >= f->start_pc && pc < (f->start_pc + f->size)) {
return f;
}
}
return NULL;
}
/* lookup a fragment pc in the fcache by walking all hashtables.
* we have more efficient methods (fcache_fragment_pclookup) so this is only
* used for debugging.
*/
fragment_t *
fragment_pclookup_by_htable(dcontext_t *dcontext, cache_pc pc, fragment_t *wrapper)
{
/* if every fragment is guaranteed to end in 1+ stubs (which
* is not true for DYNAMO_OPTION(separate_private_stubs) we can
* simply decode forward until we hit the stub and recover
* the linkstub_t* from there -- much more efficient than walking
* all the hashtables, plus nicely handles invisible & removed frags!
* FIXME: measure perf hit of pclookup, implement this decode strategy.
* also we can miss invisible or removed fragments (case 122) so we
* may want this regardless of performance -- see also FIXME below.
*/
fragment_t *f;
per_thread_t *pt = NULL;
if (dcontext != GLOBAL_DCONTEXT) {
pt = (per_thread_t *) dcontext->fragment_field;
/* look at private traces first */
if (PRIVATE_TRACES_ENABLED()) {
f = hashtable_pclookup(dcontext, &pt->trace, pc);
if (f != NULL)
return f;
}
}
if (DYNAMO_OPTION(shared_traces)) {
/* then shared traces */
read_lock(&shared_trace->rwlock);
f = hashtable_pclookup(dcontext, shared_trace, pc);
read_unlock(&shared_trace->rwlock);
if (f != NULL)
return f;
}
if (DYNAMO_OPTION(shared_bbs)) {
/* then shared basic blocks */
read_lock(&shared_bb->rwlock);
f = hashtable_pclookup(dcontext, shared_bb, pc);
read_unlock(&shared_bb->rwlock);
if (f != NULL)
return f;
}
if (dcontext != GLOBAL_DCONTEXT) {
/* now private basic blocks */
f = hashtable_pclookup(dcontext, &pt->bb, pc);
if (f != NULL)
return f;
}
if (DYNAMO_OPTION(coarse_units)) {
coarse_info_t *info = get_executable_area_coarse_info(pc);
while (info != NULL) { /* loop over primary and secondary unit */
cache_pc body;
app_pc tag = fragment_coarse_pclookup(dcontext, info, pc, &body);
if (tag != NULL) {
ASSERT(wrapper != NULL);
fragment_coarse_wrapper(wrapper, tag, body);
return wrapper;
}
ASSERT(info->frozen || info->non_frozen == NULL);
info = info->non_frozen;
ASSERT(info == NULL || !info->frozen);
}
}
/* FIXME: shared fragment may have been removed from hashtable but
* still be in cache, and e.g. handle_modified_code still needs to know about it --
* should walk deletion vector
*/
return NULL;
}
#endif /* DEBUG */
/* lookup a fragment pc in the fcache */
fragment_t *
fragment_pclookup(dcontext_t *dcontext, cache_pc pc, fragment_t *wrapper)
{
/* Rather than walk every single hashtable, including the invisible table,
* and the pending-deletion list (case 3567), we find the fcache unit
* and walk it.
* An even more efficient alternative would be to decode backward, but
* that's not doable in general.
*
* If every fragment is guaranteed to end in 1+ stubs (which
* is not true for DYNAMO_OPTION(separate_{private,shared}_stubs) we can
* simply decode forward until we hit the stub and recover
* the linkstub_t* from there.
* Or we can decode until we hit a jmp and if it's to a linked fragment_t,
* search its incoming list.
* Stub decoding is complicated by CLIENT_INTERFACE custom stubs and by
* PROFILE_LINKCOUNT stub variations.
*/
return fcache_fragment_pclookup(dcontext, pc, wrapper);
}
/* Performs a pclookup and if the result is a coarse-grain fragment, allocates
* a new fragment_t+linkstubs.
* Returns in alloc whether the returned fragment_t was allocated and needs to be
* freed by the caller via fragment_free().
* If no result is found, alloc is set to false.
* FIXME: use FRAG_RECREATED flag to indicate allocated instead?
*/
fragment_t *
fragment_pclookup_with_linkstubs(dcontext_t *dcontext, cache_pc pc,
/*OUT*/bool *alloc)
{
fragment_t wrapper;
fragment_t *f = fragment_pclookup(dcontext, pc, &wrapper);
ASSERT(alloc != NULL);
if (f != NULL && TEST(FRAG_COARSE_GRAIN, f->flags)) {
ASSERT(f == &wrapper);
f = fragment_recreate_with_linkstubs(dcontext, f);
*alloc = true;
} else
*alloc = false;
return f;
}
/* add f to the ftable */
void
fragment_add(dcontext_t *dcontext, fragment_t *f)
{
per_thread_t *pt = (per_thread_t *) dcontext->fragment_field;
fragment_table_t *table = GET_FTABLE(pt, f->flags);
bool resized;
/* no future frags! */
ASSERT(!TEST(FRAG_IS_FUTURE, f->flags));
DOCHECK(1, {
fragment_t *existing = fragment_lookup(dcontext, f->tag);
ASSERT(existing == NULL ||
IF_CUSTOM_TRACES(/* we create and persist shadowed trace heads */
(TEST(FRAG_IS_TRACE_HEAD, f->flags) ||
TEST(FRAG_IS_TRACE_HEAD, existing->flags)) ||)
/* private trace or temp can shadow shared bb */
(TESTANY(FRAG_IS_TRACE | FRAG_TEMP_PRIVATE, f->flags) &&
TEST(FRAG_SHARED, f->flags) != TEST(FRAG_SHARED, existing->flags)) ||
/* shared trace can shadow shared trace head, even with
* -remove_shared_trace_heads */
(TESTALL(FRAG_IS_TRACE | FRAG_SHARED, f->flags) &&
!TEST(FRAG_IS_TRACE, existing->flags) &&
TESTALL(FRAG_SHARED | FRAG_IS_TRACE_HEAD, existing->flags)));
});
/* We'd like the shared fragment table synch to be independent of the
* bb building synch (which may become more fine-grained in the future),
* so an add needs to hold the write lock to prevent conflicts with
* other adds.
* We may be able to have a scheme where study() and remove() are writers
* but add() is a reader -- but that's confusing and prone to errors in
* the future.
* We assume that synchronizing addition of the same tag is done through
* other means -- we cannot grab this while performing the lookup
* w/o making our read locks check to see if we're the writer,
* which is a perf hit. Only the actual hashtable add is a "write".
*/
TABLE_RWLOCK(table, write, lock);
resized = fragment_add_to_hashtable(dcontext, f, table);
TABLE_RWLOCK(table, write, unlock);
/* After resizing a table that is targeted by inlined IBL heads
* the current fragment will need to be repatched; but, we don't have
* to update the stubs when using per-type trace tables since the
* trace table itself is not targeted therefore resizing it doesn't matter.
*/
#ifdef SHARING_STUDY
if (INTERNAL_OPTION(fragment_sharing_study)) {
if (TEST(FRAG_IS_TRACE, f->flags))
add_shared_block(shared_traces, &shared_traces_lock, f);
else
add_shared_block(shared_blocks, &shared_blocks_lock, f);
}
#endif
}
/* Many options, use macros in fragment.h for readability
* If output:
* dumps f to trace file
* If remove:
* removes f from ftable
* If unlink:
* if f is linked, unlinks f
* removes f from incoming link tables
* If fcache:
* deletes f from fcache unit
*/
void
fragment_delete(dcontext_t *dcontext, fragment_t *f, uint actions)
{
#if defined(CLIENT_INTERFACE) && defined(CLIENT_SIDELINE)
bool acquired_shared_vm_lock = false;
bool acquired_fragdel_lock = false;
#endif
LOG(THREAD, LOG_FRAGMENT, 3,
"fragment_delete: *"PFX" F%d("PFX")."PFX" %s 0x%x\n",
f, f->id, f->tag, f->start_pc,
TEST(FRAG_IS_TRACE, f->flags) ? "trace" : "bb", actions);
DOLOG(1, LOG_FRAGMENT, {
if ((f->flags & FRAG_CANNOT_DELETE) != 0) {
LOG(THREAD, LOG_FRAGMENT, 2,
"ERROR: trying to delete undeletable F%d("PFX") 0x%x\n",
f->id, f->tag, actions);
}
});
ASSERT((f->flags & FRAG_CANNOT_DELETE) == 0);
ASSERT((f->flags & FRAG_IS_FUTURE) == 0);
/* ensure the actual free of a shared fragment is done only
* after a multi-stage flush or a reset
*/
ASSERT(!TEST(FRAG_SHARED, f->flags) || TEST(FRAG_WAS_DELETED, f->flags) ||
dynamo_exited || dynamo_resetting || is_self_allsynch_flushing());
#if defined(CLIENT_INTERFACE) && defined(CLIENT_SIDELINE)
/* need to protect ability to reference frag fields and fcache space */
/* all other options are mostly notification */
if (monitor_delete_would_abort_trace(dcontext, f) && DYNAMO_OPTION(shared_traces)) {
/* must acquire shared_vm_areas lock before fragment_delete_mutex (PR 596371) */
acquired_shared_vm_lock = true;
acquire_recursive_lock(&change_linking_lock);
acquire_vm_areas_lock(dcontext, FRAG_SHARED);
}
/* XXX: I added the test for FRAG_WAS_DELETED for i#759: does sideline
* look at fragments after that is set? If so need to resolve rank order
* w/ shared_cache_lock.
*/
if (!TEST(FRAG_WAS_DELETED, f->flags) &&
(!TEST(FRAGDEL_NO_HEAP, actions) || !TEST(FRAGDEL_NO_FCACHE, actions))) {
acquired_fragdel_lock = true;
fragment_get_fragment_delete_mutex(dcontext);
}
#endif
#if defined(INTERNAL) || defined(CLIENT_INTERFACE)
if (!TEST(FRAGDEL_NO_OUTPUT, actions)) {
if (TEST(FRAGDEL_NEED_CHLINK_LOCK, actions) && TEST(FRAG_SHARED, f->flags))
acquire_recursive_lock(&change_linking_lock);
else {
ASSERT(!TEST(FRAG_SHARED, f->flags) ||
self_owns_recursive_lock(&change_linking_lock));
}
fragment_output(dcontext, f);
if (TEST(FRAGDEL_NEED_CHLINK_LOCK, actions) && TEST(FRAG_SHARED, f->flags))
release_recursive_lock(&change_linking_lock);
}
#endif
if (!TEST(FRAGDEL_NO_MONITOR, actions))
monitor_remove_fragment(dcontext, f);
if (!TEST(FRAGDEL_NO_UNLINK, actions)) {
if (TEST(FRAGDEL_NEED_CHLINK_LOCK, actions) && TEST(FRAG_SHARED, f->flags))
acquire_recursive_lock(&change_linking_lock);
else {
ASSERT(!TEST(FRAG_SHARED, f->flags) ||
self_owns_recursive_lock(&change_linking_lock));
}
if ((f->flags & FRAG_LINKED_INCOMING) != 0)
unlink_fragment_incoming(dcontext, f);
if ((f->flags & FRAG_LINKED_OUTGOING) != 0)
unlink_fragment_outgoing(dcontext, f);
incoming_remove_fragment(dcontext, f);
if (TEST(FRAGDEL_NEED_CHLINK_LOCK, actions) && TEST(FRAG_SHARED, f->flags))
release_recursive_lock(&change_linking_lock);
}
if (!TEST(FRAGDEL_NO_HTABLE, actions))
fragment_remove(dcontext, f);
if (!TEST(FRAGDEL_NO_VMAREA, actions))
vm_area_remove_fragment(dcontext, f);
if (!TEST(FRAGDEL_NO_FCACHE, actions)) {
fcache_remove_fragment(dcontext, f);
}
#ifdef SIDELINE
if (dynamo_options.sideline)
sideline_fragment_delete(f);
#endif
#ifdef CLIENT_INTERFACE
if (dr_fragment_deleted_hook_exists() &&
(!TEST(FRAGDEL_NO_HEAP, actions) || !TEST(FRAGDEL_NO_FCACHE, actions)))
instrument_fragment_deleted(dcontext, f->tag, f->flags);
#endif
#ifdef UNIX
if (INTERNAL_OPTION(profile_pcs))
pcprofile_fragment_deleted(dcontext, f);
#endif
if (!TEST(FRAGDEL_NO_HEAP, actions)) {
fragment_free(dcontext, f);
}
#if defined(CLIENT_INTERFACE) && defined(CLIENT_SIDELINE)
if (acquired_fragdel_lock)
fragment_release_fragment_delete_mutex(dcontext);
if (acquired_shared_vm_lock) {
release_vm_areas_lock(dcontext, FRAG_SHARED);
release_recursive_lock(&change_linking_lock);
}
#endif
}
/* Record translation info. Typically used for pending-delete fragments
* whose original app code cannot be trusted as it has been modified (case
* 3559).
* Caller is required to take care of synch (typically this is called
* during a flush or during fragment emit)
*/
void
fragment_record_translation_info(dcontext_t *dcontext, fragment_t *f, instrlist_t *ilist)
{
ASSERT(!NEED_SHARED_LOCK(f->flags) ||
!USE_BB_BUILDING_LOCK() ||
OWN_MUTEX(&bb_building_lock) ||
OWN_MUTEX(&trace_building_lock) ||
is_self_flushing());
/* We require that either the FRAG_WAS_DELETED flag is set, to
* indicate there is allocated memory in the live field that needs
* to be freed, or that the FRAG_HAS_TRANSLATION_INFO field is
* set, indicating that there is a special appended field pointing
* to the translation info.
*/
if (TEST(FRAG_HAS_TRANSLATION_INFO, f->flags)) {
ASSERT(!TEST(FRAG_WAS_DELETED, f->flags));
*(FRAGMENT_TRANSLATION_INFO_ADDR(f)) =
record_translation_info(dcontext, f, ilist);
ASSERT(FRAGMENT_TRANSLATION_INFO(f) != NULL);
STATS_INC(num_fragment_translation_stored);
} else if (TEST(FRAG_WAS_DELETED, f->flags)) {
ASSERT(f->in_xlate.incoming_stubs == NULL);
if (INTERNAL_OPTION(safe_translate_flushed)) {
f->in_xlate.translation_info = record_translation_info(dcontext, f, ilist);
ASSERT(f->in_xlate.translation_info != NULL);
ASSERT(FRAGMENT_TRANSLATION_INFO(f) == f->in_xlate.translation_info);
STATS_INC(num_fragment_translation_stored);
#ifdef INTERNAL
DODEBUG({
if (INTERNAL_OPTION(stress_recreate_pc)) {
/* verify recreation */
stress_test_recreate(dcontext, f, NULL);
}
});
#endif
} else
f->in_xlate.translation_info = NULL;
} else
ASSERT_NOT_REACHED();
}
/* Removes the shared fragment f from all lookup tables in a safe
* manner that does not require a full flush synch.
* This routine can be called without synchronizing with other threads.
*/
void
fragment_remove_shared_no_flush(dcontext_t *dcontext, fragment_t *f)
{
DEBUG_DECLARE(bool shared_ibt_table_used = !TEST(FRAG_IS_TRACE, f->flags) ?
DYNAMO_OPTION(shared_bb_ibt_tables) :
DYNAMO_OPTION(shared_trace_ibt_tables);)
ASSERT_NOT_IMPLEMENTED(!TEST(FRAG_COARSE_GRAIN, f->flags));
/* Strategy: ensure no races in updating table or links by grabbing the high-level
* locks that are used to synchronize additions to the table itself.
* Then, simply remove directly from DR-only tables, and safely from ib tables.
* FIXME: There are still risks that the fragment's link state may change
*/
LOG(THREAD, LOG_FRAGMENT, 3, "fragment_remove_shared_no_flush: F%d\n", f->id);
ASSERT(TEST(FRAG_SHARED, f->flags));
if (TEST(FRAG_IS_TRACE, f->flags)) {
mutex_lock(&trace_building_lock);
}
/* grab bb building lock even for traces to further prevent link changes */
mutex_lock(&bb_building_lock);
if (TEST(FRAG_WAS_DELETED, f->flags)) {
/* since caller can't grab locks, we can have a race where someone
* else deletes first -- in that case nothing to do
*/
STATS_INC(shared_delete_noflush_race);
mutex_unlock(&bb_building_lock);
if (TEST(FRAG_IS_TRACE, f->flags))
mutex_unlock(&trace_building_lock);
return;
}
/* FIXME: try to share code w/ fragment_unlink_for_deletion() */
/* Make link changes atomic. We also want vm_area_remove_fragment and
* marking as deleted to be atomic so we grab vm_areas lock up front.
*/
acquire_recursive_lock(&change_linking_lock);
acquire_vm_areas_lock(dcontext, f->flags);
/* FIXME: share all this code w/ vm_area_unlink_fragments()
* The work there is just different enough to make that hard, though.
*/
if (TEST(FRAG_LINKED_OUTGOING, f->flags))
unlink_fragment_outgoing(GLOBAL_DCONTEXT, f);
if (TEST(FRAG_LINKED_INCOMING, f->flags))
unlink_fragment_incoming(GLOBAL_DCONTEXT, f);
incoming_remove_fragment(GLOBAL_DCONTEXT, f);
/* remove from ib lookup tables in a safe manner. this removes the
* frag only from this thread's tables OR from shared tables.
*/
fragment_prepare_for_removal(GLOBAL_DCONTEXT, f);
/* fragment_remove ignores the ibl tables for shared fragments */
fragment_remove(GLOBAL_DCONTEXT, f);
/* FIXME: we don't currently remove from thread-private ibl tables as that
* requires walking all of the threads. */
ASSERT_NOT_IMPLEMENTED(!IS_IBL_TARGET(f->flags) || shared_ibt_table_used);
vm_area_remove_fragment(dcontext, f);
/* case 8419: make marking as deleted atomic w/ fragment_t.also_vmarea field
* invalidation, so that users of vm_area_add_to_list() can rely on this
* flag to determine validity
*/
f->flags |= FRAG_WAS_DELETED;
release_vm_areas_lock(dcontext, f->flags);
release_recursive_lock(&change_linking_lock);
/* if a flush occurs, this fragment will be ignored -- so we must store
* translation info now, just in case
*/
if (!TEST(FRAG_HAS_TRANSLATION_INFO, f->flags))
fragment_record_translation_info(dcontext, f, NULL);
/* try to catch any potential races */
ASSERT(!TEST(FRAG_LINKED_OUTGOING, f->flags));
ASSERT(!TEST(FRAG_LINKED_INCOMING, f->flags));
mutex_unlock(&bb_building_lock);
if (TEST(FRAG_IS_TRACE, f->flags)) {
mutex_unlock(&trace_building_lock);
}
/* no locks can be held when calling this, but f is already unreachable,
* so can do this outside of locks
*/
add_to_lazy_deletion_list(dcontext, f);
}
/* Prepares a fragment for delayed deletion by unlinking it.
* Caller is responsible for calling vm_area_remove_fragment().
* Caller must hold the change_linking_lock if f is shared.
*/
void
fragment_unlink_for_deletion(dcontext_t *dcontext, fragment_t *f)
{
ASSERT(!TEST(FRAG_SHARED, f->flags) ||
self_owns_recursive_lock(&change_linking_lock));
/* this is not an error since fcache unit flushing puts lazily-deleted
* fragments onto its list to ensure they are in the same pending
* delete entry as the normal fragments -- so this routine becomes
* a nop for them
*/
if (TEST(FRAG_WAS_DELETED, f->flags)) {
LOG(THREAD, LOG_FRAGMENT|LOG_VMAREAS, 5,
"NOT unlinking F%d("PFX") for deletion\n", f->id, f->start_pc);
STATS_INC(deleted_frags_re_deleted);
return;
}
LOG(THREAD, LOG_FRAGMENT|LOG_VMAREAS, 5,
"unlinking F%d("PFX") for deletion\n", f->id, f->start_pc);
#if defined(INTERNAL) || defined(CLIENT_INTERFACE)
/* we output now to avoid problems reading component blocks
* of traces after source modules are unloaded
*/
fragment_output(dcontext, f);
#endif
if (TEST(FRAG_LINKED_OUTGOING, f->flags))
unlink_fragment_outgoing(dcontext, f);
if (TEST(FRAG_LINKED_INCOMING, f->flags))
unlink_fragment_incoming(dcontext, f);
/* need to remove outgoings from others' incoming and
* redirect others' outgoing to a future. former must be
* done before we remove from hashtable, and latter must be
* done now to avoid other fragments jumping into stale code,
* so we do it here and not when we do the real fragment_delete().
* we don't need to do this for private fragments, but we do anyway
* so that we can use the fragment_t.incoming_stubs field as a union.
*/
incoming_remove_fragment(dcontext, f);
if (TEST(FRAG_SHARED, f->flags)) {
/* we shouldn't need to worry about someone else changing the
* link status, since nobody else is allowed to be in DR now,
* and afterward they all must invalidate any ptrs they hold
* to flushed fragments, and flushed fragments are not
* reachable via hashtable or incoming lists!
*/
/* ASSUMPTION: monitor_remove_fragment does NOT need to
* be called for all threads, since private trace head
* ctrs are cleared lazily (only relevant here for
* -shared_traces) and invalidating last_{exit,fragment}
* is done by the trace overlap and abort in the main
* flush loop.
*/
}
/* need to remove from htable
* we used to only do fragment_prepare_for_removal() (xref case 1808)
* for private fragments, but for case 3559 we want to free up the
* incoming field at unlink time, and we must do all 3 of unlink,
* vmarea, and htable freeing at once.
*/
fragment_remove(dcontext, f);
/* let recreate_fragment_ilist() know that this fragment is
* pending deletion and might no longer match the app's state.
* for shared fragments, also lets people know f is not in a normal
* vmarea anymore (though actually moving f is up to the caller).
* additionally the flag indicates that translation info was allocated
* for this fragment.
*/
f->flags |= FRAG_WAS_DELETED;
/* the original app code cannot be used to recreate state, so we must
* store translation info now
*/
if (!TEST(FRAG_HAS_TRANSLATION_INFO, f->flags))
fragment_record_translation_info(dcontext, f, NULL);
STATS_INC(fragments_unlinked_for_deletion);
}
/* When shared IBT tables are used, update thread-private state
* to reflect the current parameter values -- hash mask, table address --
* for the shared ftable.
*/
static bool
update_private_ibt_table_ptrs(dcontext_t *dcontext, ibl_table_t *ftable
_IF_DEBUG(fragment_entry_t **orig_table))
{
bool table_change = false;
if (TEST(FRAG_TABLE_SHARED, ftable->table_flags)) {
per_thread_t *pt = (per_thread_t *) dcontext->fragment_field;
if (TEST(FRAG_TABLE_TRACE, ftable->table_flags) &&
ftable->table != pt->trace_ibt[ftable->branch_type].table) {
DODEBUG({
if (orig_table != NULL)
*orig_table =
pt->trace_ibt[ftable->branch_type].table;
});
table_change = true;
}
else if (DYNAMO_OPTION(bb_ibl_targets) &&
!TEST(FRAG_TABLE_TRACE, ftable->table_flags) &&
ftable->table != pt->bb_ibt[ftable->branch_type].table) {
DODEBUG({
if (orig_table != NULL)
*orig_table =
pt->bb_ibt[ftable->branch_type].table;
});
table_change = true;
}
if (table_change) {
update_private_ptr_to_shared_ibt_table(dcontext, ftable->branch_type,
TEST(FRAG_TABLE_TRACE,
ftable->table_flags),
true, /* adjust old
* ref-count */
true /* lock */);
DODEBUG({
if (orig_table != NULL)
ASSERT(ftable->table != *orig_table);
});
}
#ifdef DEBUG
else if (orig_table != NULL)
*orig_table = NULL;
#endif
}
return table_change;
}
/* Update the thread-private ptrs for the dcontext to point to the
* currently "live" shared IBT table for branch_type.
* When adjust_ref_count==true, adjust the ref-count for the old table
* that the dcontext currently points to.
* When lock_table==true, lock the shared table prior to manipulating
* it. If this is false, the caller must have locked the table already.
* NOTE: If adjust_ref_count=true, lock_table should be true also and
* the caller should NOT hold the table lock, since the underlying
* routines that manipulate the ref count lock the table.
*/
static inline void
update_private_ptr_to_shared_ibt_table(dcontext_t *dcontext,
ibl_branch_type_t branch_type, bool trace,
bool adjust_old_ref_count, bool lock_table)
{
per_thread_t *pt = (per_thread_t *) dcontext->fragment_field;
ibl_table_t *sh_table_ptr = trace ? &shared_pt->trace_ibt[branch_type] :
&shared_pt->bb_ibt[branch_type];
ibl_table_t *pvt_table_ptr = trace ? &pt->trace_ibt[branch_type] :
&pt->bb_ibt[branch_type];
/* Point to the new table. The shared table must be locked prior to
* accessing any of its fields. */
if (lock_table)
TABLE_RWLOCK(sh_table_ptr, write, lock);
ASSERT_OWN_WRITE_LOCK(true, &sh_table_ptr->rwlock);
/* We can get here multiple times due to callers being racy */
if (pvt_table_ptr->table == sh_table_ptr->table) {
SYSLOG_INTERNAL_WARNING_ONCE("racy private ptr to shared table update");
if (lock_table)
TABLE_RWLOCK(sh_table_ptr, write, unlock);
return;
}
/* Decrement the ref-count for any old table that is pointed to. */
if (adjust_old_ref_count) {
dec_table_ref_count(dcontext, pvt_table_ptr, false/*can't be live*/);
}
/* We must hold at least the read lock when writing, else we could grab
* an inconsistent mask/lookuptable pair if another thread is in the middle
* of resizing the table (case 10405).
*/
/* Only data for one set of tables is stored in TLS -- for the trace
* tables in the default config OR the BB tables in shared BBs
* only mode.
*/
if ((trace || SHARED_BB_ONLY_IB_TARGETS()) &&
DYNAMO_OPTION(ibl_table_in_tls))
update_lookuptable_tls(dcontext, sh_table_ptr);
ASSERT(pvt_table_ptr->table != sh_table_ptr->table);
pvt_table_ptr->table = sh_table_ptr->table;
pvt_table_ptr->hash_mask = sh_table_ptr->hash_mask;
/* We copy the unaligned value over also because it's used for matching
* in the dead table list. */
pvt_table_ptr->table_unaligned = sh_table_ptr->table_unaligned;
pvt_table_ptr->table_flags = sh_table_ptr->table_flags;
sh_table_ptr->ref_count++;
ASSERT(sh_table_ptr->ref_count > 0);
DODEBUG({
LOG(THREAD, LOG_FRAGMENT|LOG_STATS, 2,
"update_table_ptrs %s-%s table: addr "PFX", mask "PIFX"\n",
trace ? "trace" : "BB", sh_table_ptr->name,
sh_table_ptr->table, sh_table_ptr->hash_mask);
if ((trace || SHARED_BB_ONLY_IB_TARGETS()) &&
DYNAMO_OPTION(ibl_table_in_tls)) {
local_state_extended_t *state =
(local_state_extended_t *) dcontext->local_state;
LOG(THREAD, LOG_FRAGMENT|LOG_STATS, 2,
"TLS state %s-%s table: addr "PFX", mask "PIFX"\n",
trace ? "trace" : "BB", sh_table_ptr->name,
state->table_space.table[branch_type].lookuptable,
state->table_space.table[branch_type].hash_mask);
}
});
#ifdef HASHTABLE_STATISTICS
if (INTERNAL_OPTION(hashtable_ibl_entry_stats)) {
pvt_table_ptr->entry_stats_to_lookup_table =
sh_table_ptr->entry_stats_to_lookup_table;
}
else
pvt_table_ptr->entry_stats_to_lookup_table = 0;
#endif
if (lock_table)
TABLE_RWLOCK(sh_table_ptr, write, unlock);
/* We don't need the lock for this, and holding it will have rank order
* issues with disassembling in debug builds */
if (PRIVATE_TRACES_ENABLED() || DYNAMO_OPTION(bb_ibl_targets))
update_generated_hashtable_access(dcontext);
STATS_INC(num_shared_ibt_table_ptr_resets);
}
/* When shared IBT tables are used, update thread-private state
* to reflect the current parameter values -- hash mask, table address --
* for all tables.
*/
static bool
update_all_private_ibt_table_ptrs(dcontext_t *dcontext, per_thread_t *pt)
{
bool rc = false;
if (SHARED_IBT_TABLES_ENABLED()) {
ibl_branch_type_t branch_type;
for (branch_type = IBL_BRANCH_TYPE_START;
branch_type < IBL_BRANCH_TYPE_END; branch_type++) {
if (DYNAMO_OPTION(shared_trace_ibt_tables)) {
if (update_private_ibt_table_ptrs(dcontext,
&shared_pt->trace_ibt[branch_type]
_IF_DEBUG(NULL)))
rc = true;
}
if (DYNAMO_OPTION(shared_bb_ibt_tables)) {
if (update_private_ibt_table_ptrs(dcontext,
&shared_pt->bb_ibt[branch_type]
_IF_DEBUG(NULL)))
rc = true;
}
}
}
return rc;
}
/* Prepares for removal of f from ftable (does not delete f) by pointing the
* fragment's lookup table entry to an entry point that leads to a cache exit.
* This routine is needed for safe removal of a fragment by a thread while
* another thread may be about to jump to it via an IBL. The lookuptable is
* left in a slightly inconsistent state but one that is accepted by the
* consistency check. See the note on hashtable_fragment_check_consistency
* in the routine.
*
* Returns true if the fragment was found & removed.
*/
static bool
fragment_prepare_for_removal_from_table(dcontext_t *dcontext, fragment_t *f,
ibl_table_t *ftable)
{
uint hindex;
fragment_entry_t fe = FRAGENTRY_FROM_FRAGMENT(f);
fragment_entry_t *pg;
/* We need the write lock since the start_pc is modified (though we
* technically may be ok in all scenarios there) and to avoid problems
* with parallel prepares (shouldn't count on the bb building lock).
* Grab the lock after all private ptrs are updated since that
* operation might grab the same lock, if this remove is from a
* shared IBT table.
*/
/* FIXME: why do we need to update here? */
update_private_ibt_table_ptrs(dcontext, ftable _IF_DEBUG(NULL));
TABLE_RWLOCK(ftable, write, lock);
pg = hashtable_ibl_lookup_for_removal(fe, ftable, &hindex);
if (pg != NULL) {
/* Note all IBL routines that could be looking up an entry
* in this table have to exit with equivalent register
* state. It is possible to enter a private bb IBL
* lookup, shared bb IBL lookup or trace bb IBL lookup and
* if a delete race is hit then they would all go to the
* pending_delete_pc that we'll now supply. They HAVE to
* be all equivalent independent of the source fragment for this to work.
*
* On the other hand we can provide different start_pc
* values if we have different tables. We currently don't
* take advantage of this but we'll leave the power in place.
*/
/* FIXME: [perf] we could memoize this value in the table itself */
cache_pc pending_delete_pc =
get_target_delete_entry_pc(dcontext, ftable);
ASSERT(IBL_ENTRIES_ARE_EQUAL(*pg, fe));
ASSERT(pending_delete_pc != NULL);
LOG(THREAD, LOG_FRAGMENT, 3,
"fragment_prepare: remove F%d("PFX") from %s[%u] (table addr "PFX"), "
"set to "PFX"\n",
f->id, f->tag, ftable->name, hindex, ftable->table,
pending_delete_pc);
/* start_pc_fragment will not match start_pc for the table
* consistency checks. However, the hashtable_fragment_check_consistency
* routine verifies that either start_pc/start_pc_fragment match OR that
* the start_pc_fragment is set to the correct target_delete
* entry point.
*
* We change the tag to FAKE_TAG, which preserves linear probing.
* In a thread-shared table, this ensures that the same tag will never
* be present in more than one entry in a table (1 real entry &
* 1+ target_delete entries).
* This isn't needed in a thread-private table but doesn't hurt.
*/
ftable->table[hindex].start_pc_fragment = pending_delete_pc;
ftable->table[hindex].tag_fragment = FAKE_TAG;
/* FIXME In a shared table, this means that the entry cannot
* be overwritten for a fragment with the same tag. */
ftable->unlinked_entries++;
ftable->entries--;
TABLE_RWLOCK(ftable, write, unlock);
ASSERT(!TEST(FRAG_CANNOT_DELETE, f->flags));
return true;
}
TABLE_RWLOCK(ftable, write, unlock);
return false;
}
/* Prepares fragment f for removal from all IBL routine targeted tables.
* Does not actually remove the entry from the table
* as that can only be done through proper cross-thread synchronization.
*
* Returns true if the fragment was found & removed.
*/
bool
fragment_prepare_for_removal(dcontext_t *dcontext, fragment_t *f)
{
per_thread_t *pt;
bool prepared = false;
ibl_branch_type_t branch_type;
if (!IS_IBL_TARGET(f->flags)) {
/* nothing to do */
return false;
}
ASSERT(TEST(FRAG_SHARED, f->flags) || dcontext != GLOBAL_DCONTEXT);
/* We need a real per_thread_t & context below so make sure we have one. */
if (dcontext == GLOBAL_DCONTEXT) {
dcontext = get_thread_private_dcontext();
ASSERT(dcontext != NULL);
}
pt = GET_PT(dcontext);
/* FIXME: as an optimization we could test if IS_IBL_TARGET() is
* set before looking it up
*/
for (branch_type = IBL_BRANCH_TYPE_START;
branch_type < IBL_BRANCH_TYPE_END; branch_type++) {
per_thread_t *local_pt = pt;
/* We put traces into the trace tables and BBs into the BB tables
* and sometimes put traces into BB tables also. We never put
* BBs into a trace table.
*/
if (TEST(FRAG_IS_TRACE, f->flags)) {
if (DYNAMO_OPTION(shared_trace_ibt_tables))
local_pt = shared_pt;
if (fragment_prepare_for_removal_from_table(dcontext, f,
&local_pt->
trace_ibt[branch_type]))
prepared = true;
}
if (DYNAMO_OPTION(bb_ibl_targets) &&
(!TEST(FRAG_IS_TRACE, f->flags) ||
DYNAMO_OPTION(bb_ibt_table_includes_traces))) {
if (DYNAMO_OPTION(shared_bb_ibt_tables))
local_pt = shared_pt;
if (fragment_prepare_for_removal_from_table(dcontext, f,
&local_pt->
bb_ibt[branch_type])) {
#ifdef DEBUG
ibl_table_t *ibl_table = GET_IBT_TABLE(pt, f->flags, branch_type);
fragment_entry_t current;
TABLE_RWLOCK(ibl_table, read, lock);
current = hashtable_ibl_lookup(dcontext, (ptr_uint_t)f->tag, ibl_table);
ASSERT(IBL_ENTRY_IS_EMPTY(current));
TABLE_RWLOCK(ibl_table, read, unlock);
#endif
prepared = true;
}
}
}
return prepared;
}
#ifdef DEBUG
/* FIXME: hashtable_fragment_reset() needs to walk the tables to get these
* stats, but then we'd need to subtract 1 from all smaller counts -
* e.g. if an entry is found in 3 tables we can add (1,-1,0) then
* we'll find it again and we should add (0,1,-1) and one more time
* when we should add (0,0,1). In total all will be accounted for to
* (1,0,0) without messing much else.
*/
static inline void
fragment_ibl_stat_account(uint flags, uint ibls_targeted)
{
if (TEST(FRAG_IS_TRACE, flags)) {
switch (ibls_targeted) {
case 0: break; /* doesn't have to be a target of any IBL routine */
case 1: STATS_INC(num_traces_in_1_ibl_tables); break;
case 2: STATS_INC(num_traces_in_2_ibl_tables); break;
case 3: STATS_INC(num_traces_in_3_ibl_tables); break;
default: ASSERT_NOT_REACHED();
}
} else {
switch (ibls_targeted) {
case 0: break; /* doesn't have to be a target of any IBL routine */
case 1: STATS_INC(num_bbs_in_1_ibl_tables); break;
case 2: STATS_INC(num_bbs_in_2_ibl_tables); break;
case 3: STATS_INC(num_bbs_in_3_ibl_tables); break;
default: ASSERT_NOT_REACHED();
}
}
}
#endif
/* Removes f from any IBT tables it is in.
* If f is in a shared table, only removes if from_shared is true, in
* which case dcontext must be GLOBAL_DCONTEXT and we must have
* dynamo_all_threads_synched (case 10137).
*/
void
fragment_remove_from_ibt_tables(dcontext_t *dcontext, fragment_t *f,
bool from_shared)
{
bool shared_ibt_table =
(!TEST(FRAG_IS_TRACE, f->flags) && DYNAMO_OPTION(shared_bb_ibt_tables)) ||
(TEST(FRAG_IS_TRACE, f->flags) && DYNAMO_OPTION(shared_trace_ibt_tables));
fragment_entry_t fe = FRAGENTRY_FROM_FRAGMENT(f);
ASSERT(!from_shared || !shared_ibt_table || !IS_IBL_TARGET(f->flags) ||
(dcontext == GLOBAL_DCONTEXT && dynamo_all_threads_synched));
if (((!shared_ibt_table && dcontext != GLOBAL_DCONTEXT) ||
(from_shared && dcontext == GLOBAL_DCONTEXT && dynamo_all_threads_synched)) &&
IS_IBL_TARGET(f->flags)) {
/* trace_t tables should be all private and any deletions should follow strict
* two step deletion process, we don't need to be holding nested locks when
* removing any cached entries from the per-type IBL target tables.
*/
/* FIXME: the stats on ibls_targeted are not quite correct - we need to
* gather these independently */
DEBUG_DECLARE(uint ibls_targeted = 0;)
ibl_branch_type_t branch_type;
per_thread_t *pt = GET_PT(dcontext);
ASSERT(TEST(FRAG_IS_TRACE, f->flags) || DYNAMO_OPTION(bb_ibl_targets));
for (branch_type = IBL_BRANCH_TYPE_START;
branch_type < IBL_BRANCH_TYPE_END; branch_type++) {
/* assuming a single tag can't be both a trace and bb */
ibl_table_t *ibtable = GET_IBT_TABLE(pt, f->flags, branch_type);
ASSERT(!TEST(FRAG_TABLE_SHARED, ibtable->table_flags) ||
dynamo_all_threads_synched);
TABLE_RWLOCK(ibtable, write, lock); /* satisfy asserts, even if allsynch */
if (hashtable_ibl_remove(fe, ibtable)) {
LOG(THREAD, LOG_FRAGMENT, 2,
" removed F%d("PFX") from IBT table %s\n",
f->id, f->tag,
TEST(FRAG_TABLE_TRACE, ibtable->table_flags) ?
ibl_trace_table_type_names[branch_type] :
ibl_bb_table_type_names[branch_type]);
DOSTATS({ibls_targeted++;});
}
TABLE_RWLOCK(ibtable, write, unlock);
}
DOSTATS({fragment_ibl_stat_account(f->flags, ibls_targeted);});
}
}
/* Removes ibl entries whose tags are in [start,end) */
static uint
fragment_remove_ibl_entries_in_region(dcontext_t *dcontext, app_pc start, app_pc end,
uint frag_flags)
{
uint total_removed = 0;
per_thread_t *pt = GET_PT(dcontext);
ibl_branch_type_t branch_type;
ASSERT(pt != NULL);
ASSERT(TEST(FRAG_IS_TRACE, frag_flags) || DYNAMO_OPTION(bb_ibl_targets));
ASSERT(dcontext == get_thread_private_dcontext() || dynamo_all_threads_synched);
for (branch_type = IBL_BRANCH_TYPE_START;
branch_type < IBL_BRANCH_TYPE_END; branch_type++) {
ibl_table_t *ibtable = GET_IBT_TABLE(pt, frag_flags, branch_type);
uint removed = 0;
TABLE_RWLOCK(ibtable, write, lock);
if (ibtable->entries > 0) {
removed = hashtable_ibl_range_remove(dcontext, ibtable,
(ptr_uint_t)start, (ptr_uint_t)end, NULL);
/* Ensure a full remove gets everything */
ASSERT(start != UNIVERSAL_REGION_BASE || end != UNIVERSAL_REGION_END ||
(ibtable->entries == 0 &&
is_region_memset_to_char((app_pc)ibtable->table,
(ibtable->capacity-1)*sizeof(fragment_entry_t),
0)));
}
LOG(THREAD, LOG_FRAGMENT, 2,
" removed %d entries (%d left) in "PFX"-"PFX" from IBT table %s\n",
removed, ibtable->entries, start, end,
TEST(FRAG_TABLE_TRACE, ibtable->table_flags) ?
ibl_trace_table_type_names[branch_type] :
ibl_bb_table_type_names[branch_type]);
TABLE_RWLOCK(ibtable, write, unlock);
total_removed += removed;
}
return total_removed;
}
/* Removes shared (and incidentally private, but if no shared targets
* can exist, may remove nothing) ibl entries whose tags are in
* [start,end) from all tables associated w/ dcontext. If
* dcontext==GLOBAL_DCONTEXT, uses the shared tables, if they exist;
* else, uses the private tables, if any.
*/
uint
fragment_remove_all_ibl_in_region(dcontext_t *dcontext, app_pc start, app_pc end)
{
uint removed = 0;
if (DYNAMO_OPTION(bb_ibl_targets) &&
((dcontext == GLOBAL_DCONTEXT && DYNAMO_OPTION(shared_bb_ibt_tables)) ||
(dcontext != GLOBAL_DCONTEXT && !DYNAMO_OPTION(shared_bb_ibt_tables)))) {
removed +=
fragment_remove_ibl_entries_in_region(dcontext, start, end, 0/*bb table*/);
}
if (DYNAMO_OPTION(shared_traces) &&
((dcontext == GLOBAL_DCONTEXT && DYNAMO_OPTION(shared_trace_ibt_tables)) ||
(dcontext != GLOBAL_DCONTEXT && !DYNAMO_OPTION(shared_trace_ibt_tables)))) {
removed +=
fragment_remove_ibl_entries_in_region(dcontext, start, end, FRAG_IS_TRACE);
}
return removed;
}
/* Removes f from any hashtables -- BB, trace, or future -- and IBT tables
* it is in, except for shared IBT tables. */
void
fragment_remove(dcontext_t *dcontext, fragment_t *f)
{
per_thread_t *pt = GET_PT(dcontext);
fragment_table_t *table = GET_FTABLE(pt, f->flags);
ASSERT(TEST(FRAG_SHARED, f->flags) || dcontext != GLOBAL_DCONTEXT);
/* For consistency we remove entries from the IBT
* tables before we remove them from the trace table.
*/
fragment_remove_from_ibt_tables(dcontext, f, false/*leave in shared*/);
/* We need the write lock since deleting shifts elements around (though we
* technically may be ok in all scenarios there) and to avoid problems with
* multiple removes at once (shouldn't count on the bb building lock)
*/
TABLE_RWLOCK(table, write, lock);
if (hashtable_fragment_remove(f, table)) {
LOG(THREAD, LOG_FRAGMENT, 4,
"fragment_remove: removed F%d("PFX") from fcache lookup table\n",
f->id, f->tag);
TABLE_RWLOCK(table, write, unlock);
return;
}
TABLE_RWLOCK(table, write, unlock);
/* ok to not find a trace head used to start a trace -- fine to have deleted
* the trace head
*/
ASSERT(cur_trace_tag(dcontext) == f->tag
/* PR 299808: we have invisible temp trace bbs */
IF_CLIENT_INTERFACE(|| TEST(FRAG_TEMP_PRIVATE, f->flags)));
}
/* Remove f from ftable, replacing it in the hashtable with new_f,
* which has an identical tag.
* f's next field is left intact so this can be done while owner is in fcache
* f is NOT deleted in any other way!
* To delete later, caller must call fragment_delete w/ remove=false
*/
void
fragment_replace(dcontext_t *dcontext, fragment_t *f, fragment_t *new_f)
{
per_thread_t *pt = GET_PT(dcontext);
fragment_table_t *table = GET_FTABLE(pt, f->flags);
TABLE_RWLOCK(table, write, lock);
if (hashtable_fragment_replace(f, new_f, table)) {
fragment_entry_t fe = FRAGENTRY_FROM_FRAGMENT(f);
fragment_entry_t new_fe = FRAGENTRY_FROM_FRAGMENT(new_f);
LOG(THREAD, LOG_FRAGMENT, 4,
"removed F%d from fcache lookup table (replaced with F%d) "PFX"->~"PFX","PFX"\n",
f->id, new_f->id, f->tag, f->start_pc, new_f->start_pc);
/* Need to replace all entries from the IBL tables that may have this entry */
if (IS_IBL_TARGET(f->flags)) {
ibl_branch_type_t branch_type;
for (branch_type = IBL_BRANCH_TYPE_START;
branch_type < IBL_BRANCH_TYPE_END; branch_type++) {
ibl_table_t *ibtable = GET_IBT_TABLE(pt, f->flags, branch_type);
/* currently we don't have shared ib target tables,
* otherwise a write lock would come in the picture here
*/
ASSERT(!TEST(FRAG_TABLE_SHARED, ibtable->table_flags));
hashtable_ibl_replace(fe, new_fe, ibtable);
}
}
} else
ASSERT_NOT_REACHED();
TABLE_RWLOCK(table, write, unlock);
/* tell monitor f has disappeared, but do not delete from incoming table
* or from fcache, also do not dump to trace file
*/
monitor_remove_fragment(dcontext, f);
}
void
fragment_shift_fcache_pointers(dcontext_t *dcontext, fragment_t *f, ssize_t shift,
cache_pc start, cache_pc end, size_t old_size)
{
per_thread_t *pt = GET_PT(dcontext);
IF_X64(ASSERT_NOT_IMPLEMENTED(false)); /* must re-relativize when copying! */
/* need to shift all stored cache_pcs.
* do not need to shift relative pcs pointing to other fragments -- they're
* all getting shifted too!
* just need to re-pc-relativize jmps to fixed locations, namely
* cti's in exit stubs, and call instructions inside fragments.
*/
LOG(THREAD, LOG_FRAGMENT, 2, "fragment_shift_fcache_pointers: F%d + "SSZFMT"\n",
f->id, shift);
ASSERT(!TEST(FRAG_IS_FUTURE, f->flags)); /* only in-cache frags */
f->start_pc += shift;
/* Should shift cached lookup entries in all IBL target tables,
* order doesn't matter here: either way we'll be inconsistent, can't do this within the cache.
*/
if (IS_IBL_TARGET(f->flags)) {
ibl_branch_type_t branch_type;
for (branch_type = IBL_BRANCH_TYPE_START;
branch_type < IBL_BRANCH_TYPE_END; branch_type++) {
/* Of course, we need to shift only pointers into the cache that is getting shifted! */
ibl_table_t *ibtable = GET_IBT_TABLE(pt, f->flags, branch_type);
fragment_entry_t fe = FRAGENTRY_FROM_FRAGMENT(f);
fragment_entry_t *pg;
uint hindex;
TABLE_RWLOCK(ibtable, read, lock);
pg = hashtable_ibl_lookup_for_removal(fe, ibtable, &hindex);
if (pg != NULL)
pg->start_pc_fragment += shift;
TABLE_RWLOCK(ibtable, read, unlock);
LOG(THREAD, LOG_FRAGMENT, 2,
"fragment_table_shift_fcache_pointers: %s ibt %s shifted by %d\n",
TEST(FRAG_IS_TRACE, f->flags) ? "trace" : "BB", ibtable->name, shift);
}
}
linkstubs_shift(dcontext, f, shift);
DOLOG(6, LOG_FRAGMENT, { /* print after start_pc updated so get actual code */
LOG(THREAD, LOG_FRAGMENT, 6, "before shifting F%d ("PFX")\n", f->id, f->tag);
disassemble_fragment(dcontext, f, stats->loglevel < 3);
});
#ifdef X86
if (TEST(FRAG_SELFMOD_SANDBOXED, f->flags)) {
/* just re-finalize to update */
finalize_selfmod_sandbox(dcontext, f);
}
#endif
/* inter-cache links must be redone, but all fragment entry pcs must be
* fixed up first, so that's done separately
*/
/* re-do pc-relative targets outside of cache */
shift_ctis_in_fragment(dcontext, f, shift, start, end, old_size);
#ifdef CHECK_RETURNS_SSE2
finalize_return_check(dcontext, f);
#endif
DOLOG(6, LOG_FRAGMENT, {
LOG(THREAD, LOG_FRAGMENT, 6, "after shifting F%d ("PFX")\n", f->id, f->tag);
disassemble_fragment(dcontext, f, stats->loglevel < 3);
});
}
/* this routine only copies data structures like bbs and statistics
*/
void
fragment_copy_data_fields(dcontext_t *dcontext, fragment_t *f_src, fragment_t *f_dst)
{
if ((f_src->flags & FRAG_IS_TRACE) != 0) {
trace_only_t *t_src = TRACE_FIELDS(f_src);
trace_only_t *t_dst = TRACE_FIELDS(f_dst);
ASSERT((f_dst->flags & FRAG_IS_TRACE) != 0);
if (t_src->bbs != NULL) {
t_dst->bbs =
nonpersistent_heap_alloc(dcontext, t_src->num_bbs*sizeof(trace_bb_info_t)
HEAPACCT(ACCT_TRACE));
memcpy(t_dst->bbs, t_src->bbs, t_src->num_bbs*sizeof(trace_bb_info_t));
t_dst->num_bbs = t_src->num_bbs;
}
#ifdef PROFILE_RDTSC
t_dst->count = t_src->count;
t_dst->total_time = t_src->total_time;
#endif
#if defined (PROFILE_LINKCOUNT) && defined(SIDELINE_COUNT_STUDY)
t_dst->count_old_pre = t_src->count_old_pre;
t_dst->count_old_post = t_src->count_old_post;
#endif
}
}
#if defined(DEBUG) && defined(INTERNAL)
static void
dump_lookup_table(dcontext_t *dcontext, ibl_table_t *ftable)
{
uint i;
cache_pc target_delete = get_target_delete_entry_pc(dcontext, ftable);
ASSERT(target_delete != NULL);
ASSERT(ftable->table != NULL);
LOG(THREAD, LOG_FRAGMENT, 1,
"%6s %10s %10s -- %s\n", "i", "tag", "target", ftable->name);
/* need read lock to traverse the table */
TABLE_RWLOCK(ftable, read, lock);
for (i = 0; i < ftable->capacity; i++) {
if (ftable->table[i].tag_fragment != 0) {
if (ftable->table[i].start_pc_fragment == target_delete) {
LOG(THREAD, LOG_FRAGMENT, 1,
"%6x "PFX" target_delete\n",
i, ftable->table[i].tag_fragment);
ASSERT(ftable->table[i].tag_fragment == FAKE_TAG);
}
else {
LOG(THREAD, LOG_FRAGMENT, 1,
"%6x "PFX" "PFX"\n",
i, ftable->table[i].tag_fragment,
ftable->table[i].start_pc_fragment);
}
}
DOCHECK(1, { hashtable_ibl_check_consistency(dcontext, ftable, i); });
}
TABLE_RWLOCK(ftable, read, unlock);
}
#endif
#ifdef DEBUG
/* used only for debugging purposes, check if IBL routine leaks due to wraparound */
/* interesting only when not using INTERNAL_OPTION(ibl_sentinel_check) */
static bool
is_fragment_index_wraparound(dcontext_t *dcontext, ibl_table_t *ftable, fragment_t *f)
{
uint hindex = HASH_FUNC((ptr_uint_t)f->tag, ftable);
uint found_at_hindex;
fragment_entry_t fe = FRAGENTRY_FROM_FRAGMENT(f);
fragment_entry_t *pg = hashtable_ibl_lookup_for_removal(fe, ftable, &found_at_hindex);
ASSERT(pg != NULL);
ASSERT(IBL_ENTRIES_ARE_EQUAL(*pg, fe));
LOG(THREAD, LOG_FRAGMENT, 3,
"is_fragment_index_wraparound F%d, tag "PFX", found_at_hindex 0x%x, preferred 0x%x\n",
f->id, f->tag, found_at_hindex, hindex);
return (found_at_hindex < hindex); /* wraparound */
}
#endif /* DEBUG */
static void
fragment_add_ibl_target_helper(dcontext_t *dcontext, fragment_t *f,
ibl_table_t *ibl_table)
{
fragment_entry_t current;
fragment_entry_t fe = FRAGENTRY_FROM_FRAGMENT(f);
/* Never add a BB to a trace table. */
ASSERT(!(!TEST(FRAG_IS_TRACE, f->flags) &&
TEST(FRAG_TABLE_TRACE, ibl_table->table_flags)));
/* adding is a write operation */
TABLE_RWLOCK(ibl_table, write, lock);
/* This is the last time the table lock is grabbed before adding the frag so
* check here to account for the race in the time between the
* FRAG_IS_TRACE_HEAD check in add_ibl_target() and now. We never add trace
* heads to an IBT target table.
*/
if (TEST(FRAG_IS_TRACE_HEAD, f->flags)) {
TABLE_RWLOCK(ibl_table, write, unlock);
STATS_INC(num_th_bb_ibt_add_race);
return;
}
/* For shared tables, check again in case another thread snuck in
* before the preceding lock and added the target. */
if (TEST(FRAG_TABLE_SHARED, ibl_table->table_flags)) {
current = hashtable_ibl_lookup(dcontext, (ptr_uint_t)f->tag, ibl_table);
if (IBL_ENTRY_IS_EMPTY(current))
hashtable_ibl_add(dcontext, fe, ibl_table);
/* We don't ever expect to find a like-tagged fragment. A BB
* can be unlinked due to eviction or when it's marked as a trace
* head. Eviction (for example, due to cache consistency)
* sets start_pc_fragment to FAKE_TAG, so there can't be
* a tag match; &unlinked_fragment is returned, and this
* applies to traces also. For trace head marking, FAKE_TAG
* is also set so &unlinked_fragment is returned.
*
* If we didn't set FAKE_TAG for an unlinked entry, then it
* could be clobbered with a new fragment w/the same tag.
* In a shared table, unlinked entries cannot be clobbered
* except by fragments w/the same tags, so this could help
* limit the length of collision chains.
*/
}
else {
hashtable_ibl_add(dcontext, fe, ibl_table);
}
TABLE_RWLOCK(ibl_table, write, unlock);
DOSTATS({
if (!TEST(FRAG_IS_TRACE, f->flags))
STATS_INC(num_bbs_ibl_targets);
/* We assume that traces can be added to trace and BB IBT tables but
* not just to BB tables. We count only traces added to trace tables
* so that we don't double increment.
*/
else if (TEST(FRAG_IS_TRACE, f->flags) &&
TEST(FRAG_TABLE_TRACE, ibl_table->table_flags))
STATS_INC(num_traces_ibl_targets);
});
/* Adding current exit to help calculate an estimated
* indirect branch fan-out, that is function fan-in for
* returns. Note that other IBL hits to the same place
* will not have exits associated.
*/
LOG(THREAD, LOG_FRAGMENT, 2,
"fragment_add_ibl_target added F%d("PFX"), branch %d, to %s, on exit from "PFX"\n",
f->id, f->tag,
ibl_table->branch_type, ibl_table->name,
LINKSTUB_FAKE(dcontext->last_exit) ? 0 :
EXIT_CTI_PC(dcontext->last_fragment, dcontext->last_exit)
);
DOLOG(4, LOG_FRAGMENT, {
dump_lookuptable_tls(dcontext);
hashtable_ibl_dump_table(dcontext, ibl_table);
dump_lookup_table(dcontext, ibl_table);
});
DODEBUG({
if (TEST(FRAG_SHARED, f->flags) && !TEST(FRAG_IS_TRACE, f->flags))
LOG(THREAD, LOG_FRAGMENT, 2,
"add_ibl_target: shared BB F%d("PFX") added\n", f->id,
f->tag);
});
}
/* IBL targeted fragments per branch type */
fragment_t *
fragment_add_ibl_target(dcontext_t *dcontext, app_pc tag,
ibl_branch_type_t branch_type)
{
per_thread_t *pt = (per_thread_t *) dcontext->fragment_field;
fragment_t *f = NULL;
fragment_t wrapper;
if (SHARED_BB_ONLY_IB_TARGETS()) {
f = fragment_lookup_bb(dcontext, tag);
if (f == NULL) {
f = fragment_coarse_lookup_wrapper(dcontext, tag, &wrapper);
if (f != NULL) {
#if defined(RETURN_AFTER_CALL) || defined(RCT_IND_BRANCH)
if (TEST(COARSE_FILL_IBL_MASK(branch_type),
DYNAMO_OPTION(coarse_fill_ibl))) {
/* On-demand per-type ibl filling from the persisted RAC/RCT
* table. We limit to the first thread to ask for it by
* clearing the coarse_info_t pending_table fields.
*/
/* FIXME: combine w/ the coarse lookup to do this once only */
coarse_info_t *coarse = get_fragment_coarse_info(f);
ASSERT(coarse != NULL);
if (coarse->persisted &&
exists_coarse_ibl_pending_table(dcontext, coarse, branch_type)) {
bool in_persisted_ibl = false;
mutex_lock(&coarse->lock);
if (exists_coarse_ibl_pending_table(dcontext,
coarse, branch_type)) {
ibl_table_t *ibl_table =
GET_IBT_TABLE(pt, f->flags, branch_type);
coarse_persisted_fill_ibl(dcontext, coarse, branch_type);
TABLE_RWLOCK(ibl_table, read, lock);
if (!IBL_ENTRY_IS_EMPTY(hashtable_ibl_lookup(dcontext,
(ptr_uint_t)tag, ibl_table)))
in_persisted_ibl = true;
TABLE_RWLOCK(ibl_table, read, unlock);
if (in_persisted_ibl) {
mutex_unlock(&coarse->lock);
return f;
}
}
mutex_unlock(&coarse->lock);
}
}
#endif /* defined(RETURN_AFTER_CALL) || defined(RCT_IND_BRANCH) */
}
}
} else {
f = fragment_lookup_trace(dcontext, tag);
if (f == NULL && DYNAMO_OPTION(bb_ibl_targets)) {
/* Populate with bb's that are not trace heads */
f = fragment_lookup_bb(dcontext, tag);
/* We don't add trace heads OR when a trace is targetting a BB. In the
* latter case, the BB will shortly be marked as a trace head and
* removed from the IBT table so we don't needlessly add it.
*/
if (f != NULL &&
(TEST(FRAG_IS_TRACE_HEAD, f->flags) ||
TEST(FRAG_IS_TRACE, dcontext->last_fragment->flags))) {
/* FIXME: should change the logic if trace headness becomes a private property */
f = NULL; /* ignore fragment */
STATS_INC(num_ib_th_target); /* counted in num_ibt_cold_misses */
}
}
}
LOG(THREAD, LOG_FRAGMENT, 3,
"fragment_add_ibl_target tag "PFX", branch %d, F%d %s\n",
tag, branch_type, f != NULL ? f->id : 0,
(f != NULL && TEST(FRAG_IS_TRACE, f->flags)) ? "existing trace" : "");
/* a valid IBT fragment exists */
if (f != NULL) {
ibl_table_t *ibl_table = GET_IBT_TABLE(pt, f->flags, branch_type);
DEBUG_DECLARE(fragment_entry_t *orig_lookuptable = NULL;)
fragment_entry_t current;
/* Make sure this thread's local ptrs & state is current in case a
* shared table resize occurred while it was in the cache. We update
* only during an IBL miss, since that's the first time that
* accessing the old table inflicted a cost (a context switch).
*/
/* NOTE We could be more aggressive and update the private ptrs for
* all tables, not just the one being added to, by calling
* update_all_private_ibt_table_ptrs(). We could be even more
* aggressive by updating all ptrs on every cache exit in
* enter_couldbelinking() but that could also prove to be more
* expensive by invoking the update logic when an IBL miss didn't
* occur. However, more frequent updates could lead to old tables
* being freed earlier. We can revisit this if we see old tables
* piling up and not being freed in a timely manner.
*/
update_private_ibt_table_ptrs(dcontext, ibl_table
_IF_DEBUG(&orig_lookuptable));
/* We can't place a private fragment into a thread-shared table.
* Nothing prevents a sandboxed or ignore syscalls frag from being
* the target of an IB. This is covered by case 5836.
*
* We don't need to re-check in the add_ibl_target_helper because
* the shared/private property applies to all IBT tables -- either
* all are shared or none are.
*/
if (TEST(FRAG_TABLE_SHARED, ibl_table->table_flags) &&
!TEST(FRAG_SHARED, f->flags)) {
STATS_INC(num_ibt_shared_private_conflict);
return f;
}
ASSERT(TEST(FRAG_IS_TRACE, f->flags) ==
TEST(FRAG_TABLE_TRACE, ibl_table->table_flags));
/* We can't assert that an IBL target isn't a trace head due to a race
* between trace head marking and adding to a table. See the comments
* in fragment_add_to_hashtable().
*/
TABLE_RWLOCK(ibl_table, read, lock);
current = hashtable_ibl_lookup(dcontext, (ptr_uint_t)tag, ibl_table);
TABLE_RWLOCK(ibl_table, read, unlock);
/* Now that we set the fragment_t* for any unlinked entry to
* &unlinked_fragment -- regardless of why it was unlinked -- and also
* set the lookup table tag to FAKE_TAG, we should never find a fragment
* with the same tag and should never have an unlinked marker returned
* here.
*/
ASSERT(!IBL_ENTRY_IS_INVALID(current));
if (IBL_ENTRY_IS_EMPTY(current)) {
DOLOG(4, LOG_FRAGMENT, {
dump_lookuptable_tls(dcontext);
hashtable_ibl_dump_table(dcontext, ibl_table);
dump_lookup_table(dcontext, ibl_table);
});
fragment_add_ibl_target_helper(dcontext, f, ibl_table);
/* When using BB2BB IBL w/trace building, we add trace targets
* to the BB table. (We always add a trace target to the trace
* table.) We fool the helper routine into using the BB
* table by passing in a non-trace value for the flags argument.
*/
if (TEST(FRAG_IS_TRACE, f->flags) && DYNAMO_OPTION(bb_ibl_targets) &&
DYNAMO_OPTION(bb_ibt_table_includes_traces)) {
ibl_table_t *ibl_table_too =
GET_IBT_TABLE(pt, f->flags & ~FRAG_IS_TRACE, branch_type);
ASSERT(ibl_table_too != NULL);
ASSERT(!TEST(FRAG_TABLE_TRACE, ibl_table_too->table_flags));
/* Make sure this thread's local ptrs & state is up to
* date in case a resize occurred while it was in the cache. */
update_private_ibt_table_ptrs(dcontext, ibl_table_too
_IF_DEBUG(NULL));
fragment_add_ibl_target_helper(dcontext, f, ibl_table_too);
}
}
else {
DEBUG_DECLARE(const char *reason;)
#ifdef DEBUG
if (is_building_trace(dcontext)) {
reason = "trace building";
STATS_INC(num_ibt_exit_trace_building);
} else if (TEST(FRAG_WAS_DELETED, dcontext->last_fragment->flags)) {
reason = "src unlinked (frag deleted)";
STATS_INC(num_ibt_exit_src_unlinked_frag_deleted);
} else if (!TEST(LINK_LINKED, dcontext->last_exit->flags) &&
TESTALL(FRAG_SHARED | FRAG_IS_TRACE_HEAD,
dcontext->last_fragment->flags) &&
fragment_lookup_type(dcontext,
dcontext->last_fragment->tag,
LOOKUP_TRACE|LOOKUP_SHARED) != NULL) {
/* Another thread unlinked src as part of replacing it with
* a new trace while this thread was in there (see case 5634
* for details) */
reason = "src unlinked (shadowed)";
STATS_INC(num_ibt_exit_src_unlinked_shadowed);
} else if (!INTERNAL_OPTION(ibl_sentinel_check) &&
is_fragment_index_wraparound(dcontext, ibl_table, f)) {
reason = "sentinel";
STATS_INC(num_ibt_leaks_likely_sentinel);
} else if (TEST(FRAG_SELFMOD_SANDBOXED, dcontext->last_fragment->flags)) {
reason = "src sandboxed";
STATS_INC(num_ibt_exit_src_sandboxed);
} else if (TEST(FRAG_TABLE_SHARED, ibl_table->table_flags) &&
orig_lookuptable != ibl_table->table) {
/* A table resize could cause a miss when the target is
* in the new table. */
reason = "shared IBT table resize";
STATS_INC(num_ibt_exit_shared_table_resize);
} else if (DYNAMO_OPTION(bb_ibl_targets) &&
IS_SHARED_SYSCALLS_LINKSTUB(dcontext->last_exit) &&
!DYNAMO_OPTION(disable_traces) &&
!TEST(FRAG_IS_TRACE, f->flags)) {
reason = "shared syscall exit cannot target BBs";
STATS_INC(num_ibt_exit_src_trace_shared_syscall);
} else if (DYNAMO_OPTION(bb_ibl_targets) &&
TEST(FRAG_IS_TRACE, f->flags) &&
!DYNAMO_OPTION(bb_ibt_table_includes_traces)) {
reason = "BBs do not target traces";
STATS_INC(num_ibt_exit_src_trace_shared_syscall);
} else if (!INTERNAL_OPTION(link_ibl)) {
reason = "-no_link_ibl prevents ibl";
STATS_INC(num_ibt_exit_nolink);
} else {
reason = "BAD leak?";
DOLOG(3, LOG_FRAGMENT, {
hashtable_ibl_dump_table(dcontext, ibl_table);
hashtable_ibl_study(dcontext, ibl_table, 0/*table consistent*/);
});
STATS_INC(num_ibt_exit_unknown);
ASSERT_CURIOSITY_ONCE(false && "fragment_add_ibl_target unknown reason");
}
/* nothing to do, just sanity checking */
LOG(THREAD, LOG_FRAGMENT, 2,
"fragment_add_ibl_target tag "PFX", F%d already added - %s\n",
tag, f->id, reason);
#endif
}
} else {
STATS_INC(num_ibt_cold_misses);
}
#ifdef HASHTABLE_STATISTICS
if (INTERNAL_OPTION(stay_on_trace_stats)) {
/* best effort: adjust for 32bit counter overflow occasionally
* we'll get a hashtable leak only when not INTERNAL_OPTION(ibl_sentinel_check)
*/
check_stay_on_trace_stats_overflow(dcontext, branch_type);
}
#endif /* HASHTABLE_STATISTICS */
DOLOG(4, LOG_FRAGMENT, {
dump_lookuptable_tls(dcontext);
});
return f;
}
/**********************************************************************/
/* FUTURE FRAGMENTS */
/* create a new fragment with empty prefix and return it
*/
static future_fragment_t *
fragment_create_future(dcontext_t *dcontext, app_pc tag, uint flags)
{
dcontext_t *alloc_dc = FRAGMENT_ALLOC_DC(dcontext, flags);
future_fragment_t *fut = (future_fragment_t*)
nonpersistent_heap_alloc(alloc_dc, sizeof(future_fragment_t)
HEAPACCT(ACCT_FRAG_FUTURE));
ASSERT(!NEED_SHARED_LOCK(flags) || self_owns_recursive_lock(&change_linking_lock));
LOG(THREAD, LOG_FRAGMENT, 4, "Created future fragment "PFX" w/ flags 0x%08x\n",
tag, flags|FRAG_FAKE|FRAG_IS_FUTURE);
STATS_INC(num_future_fragments);
DOSTATS({
if (TEST(FRAG_SHARED, flags))
STATS_INC(num_shared_future_fragments);
});
fut->tag = tag;
fut->flags = flags | FRAG_FAKE | FRAG_IS_FUTURE;
fut->incoming_stubs = NULL;
return fut;
}
static void
fragment_free_future(dcontext_t *dcontext, future_fragment_t *fut)
{
dcontext_t *alloc_dc = FRAGMENT_ALLOC_DC(dcontext, fut->flags);
LOG(THREAD, LOG_FRAGMENT, 4, "Freeing future fragment "PFX"\n", fut->tag);
ASSERT(fut->incoming_stubs == NULL);
nonpersistent_heap_free(alloc_dc, fut, sizeof(future_fragment_t)
HEAPACCT(ACCT_FRAG_FUTURE));
}
future_fragment_t *
fragment_create_and_add_future(dcontext_t *dcontext, app_pc tag, uint flags)
{
per_thread_t *pt = GET_PT(dcontext);
future_fragment_t *fut = fragment_create_future(dcontext, tag, flags);
fragment_table_t *futtable = GET_FTABLE(pt, fut->flags);
ASSERT(!NEED_SHARED_LOCK(flags) || self_owns_recursive_lock(&change_linking_lock));
/* adding to the table is a write operation */
TABLE_RWLOCK(futtable, write, lock);
fragment_add_to_hashtable(dcontext, (fragment_t *)fut, futtable);
TABLE_RWLOCK(futtable, write, unlock);
return fut;
}
void
fragment_delete_future(dcontext_t *dcontext, future_fragment_t *fut)
{
per_thread_t *pt = GET_PT(dcontext);
fragment_table_t *futtable = GET_FTABLE(pt, fut->flags);
ASSERT(!NEED_SHARED_LOCK(fut->flags) ||
self_owns_recursive_lock(&change_linking_lock));
/* removing from the table is a write operation */
TABLE_RWLOCK(futtable, write, lock);
hashtable_fragment_remove((fragment_t *)fut, futtable);
TABLE_RWLOCK(futtable, write, unlock);
fragment_free_future(dcontext, fut);
}
/* We do not want to remove futures from a flushed region if they have
* incoming links (i#609).
*/
static bool
fragment_delete_future_filter(fragment_t *f)
{
future_fragment_t *fut = (future_fragment_t *) f;
ASSERT(TEST(FRAG_IS_FUTURE, f->flags));
return (fut->incoming_stubs == NULL);
}
static uint
fragment_delete_futures_in_region(dcontext_t *dcontext, app_pc start, app_pc end)
{
per_thread_t *pt = GET_PT(dcontext);
uint flags = FRAG_IS_FUTURE | (dcontext == GLOBAL_DCONTEXT ? FRAG_SHARED : 0);
fragment_table_t *futtable = GET_FTABLE(pt, flags);
uint removed;
/* Higher-level lock needed since we do lookup+add w/o holding table lock between */
ASSERT(!NEED_SHARED_LOCK(flags) || self_owns_recursive_lock(&change_linking_lock));
TABLE_RWLOCK(futtable, write, lock);
removed = hashtable_fragment_range_remove(dcontext, futtable,
(ptr_uint_t)start, (ptr_uint_t)end,
fragment_delete_future_filter);
TABLE_RWLOCK(futtable, write, unlock);
return removed;
}
future_fragment_t *
fragment_lookup_future(dcontext_t *dcontext, app_pc tag)
{
/* default is to lookup shared, since private only sometimes exists,
* and often only care about trace head, for which always use shared
*/
uint flags = SHARED_FRAGMENTS_ENABLED() ? FRAG_SHARED : 0;
per_thread_t *pt = GET_PT(dcontext);
fragment_table_t *futtable = GET_FTABLE(pt, FRAG_IS_FUTURE | flags);
fragment_t *f;
TABLE_RWLOCK(futtable, read, lock);
f = hashtable_fragment_lookup(dcontext, (ptr_uint_t)tag, futtable);
TABLE_RWLOCK(futtable, read, unlock);
if (f != &null_fragment)
return (future_fragment_t *) f;
return NULL;
}
future_fragment_t *
fragment_lookup_private_future(dcontext_t *dcontext, app_pc tag)
{
per_thread_t *pt = (per_thread_t *) dcontext->fragment_field;
fragment_table_t *futtable = GET_FTABLE(pt, FRAG_IS_FUTURE);
fragment_t *f = hashtable_fragment_lookup(dcontext, (ptr_uint_t)tag, futtable);
if (f != &null_fragment)
return (future_fragment_t *) f;
return NULL;
}
/* END FUTURE FRAGMENTS
**********************************************************************/
#if defined(RETURN_AFTER_CALL) || defined(RCT_IND_BRANCH)
/* FIXME: move to rct.c when we move the whole app_pc table there */
#define STATS_RCT_ADD(which, stat, val) DOSTATS({ \
if ((which) == RCT_RAC) \
STATS_ADD(rac_##stat, val); \
else \
STATS_ADD(rct_##stat, val); \
})
static inline bool
rct_is_global_table(rct_module_table_t *permod)
{
return (permod == &rac_non_module_table ||
IF_UNIX_ELSE(permod == &rct_global_table, false));
}
static inline rct_module_table_t *
rct_get_table(app_pc tag, rct_type_t which)
{
rct_module_table_t *permod = os_module_get_rct_htable(tag, which);
if (permod == NULL) { /* not a module */
if (which == RCT_RAC)
permod = &rac_non_module_table;
}
return permod;
}
/* returns NULL if not found */
static app_pc
rct_table_lookup_internal(dcontext_t *dcontext, app_pc tag,
rct_module_table_t *permod)
{
app_pc actag = NULL;
ASSERT(os_get_module_info_locked());
if (permod != NULL) {
/* Check persisted table first as it's likely to be larger and
* it needs no read lock
*/
if (permod->persisted_table != NULL) {
actag = hashtable_app_pc_rlookup(dcontext, (ptr_uint_t)tag,
permod->persisted_table);
}
if (actag == NULL && permod->live_table != NULL) {
actag = hashtable_app_pc_rlookup(dcontext, (ptr_uint_t)tag,
permod->live_table);
}
}
return actag;
}
/* returns NULL if not found */
static app_pc
rct_table_lookup(dcontext_t *dcontext, app_pc tag, rct_type_t which)
{
app_pc actag = NULL;
rct_module_table_t *permod;
ASSERT(which >= 0 && which < RCT_NUM_TYPES);
os_get_module_info_lock();
permod = rct_get_table(tag, which);
actag = rct_table_lookup_internal(dcontext, tag, permod);
os_get_module_info_unlock();
return actag;
}
/* Caller must hold the higher-level lock.
* Returns whether added a new entry or not.
*/
static bool
rct_table_add(dcontext_t *dcontext, app_pc tag, rct_type_t which)
{
rct_module_table_t *permod;
/* we use a higher-level lock to synchronize the lookup + add
* combination with other simultaneous adds as well as with removals
*/
/* FIXME We could use just the table lock for the lookup+add. This is cleaner
* than using another lock during the entire routine and acquiring & releasing
* the table lock in read mode for the lookup and then acquiring & releasing
* it again in write mode for the add. Also, any writes to the table outside
* of this routine would be blocked (as is desired). The down side is that
* reads would be blocked during the entire operation.
* The #ifdef DEBUG lookup would need to be moved to after the table lock
* is released to avoid a rank order violation (all table locks have the
* same rank). That's not problematic since it's only stat code.
*/
/* If we no longer hold this high-level lock for adds+removes we need
* to hold the new add/remove lock across persist_size->persist
*/
ASSERT_OWN_MUTEX(true, (which == RCT_RAC ? &after_call_lock : &rct_module_lock));
os_get_module_info_lock();
permod = rct_get_table(tag, which);
/* Xref case 9717, on a partial image mapping we may try to add locations
* (specifically the entry point) that our outside of any module. Technically this
* is also possible on a full mapping since we've seen entry points redirected
* (and there's nothing requiring that they be re-directed to another dll or, if
* at dr init, that we've already processed that target module, xref case 10693. */
ASSERT_CURIOSITY(permod != NULL || EXEMPT_TEST("win32.partial_map.exe"));
if (permod == NULL || rct_table_lookup_internal(dcontext, tag, permod) != NULL) {
os_get_module_info_unlock();
return false;
}
if (permod->live_table == NULL) {
/* lazily initialized */
if (rct_is_global_table(permod))
SELF_UNPROTECT_DATASEC(DATASEC_RARELY_PROT);
permod->live_table = HEAP_TYPE_ALLOC(GLOBAL_DCONTEXT, app_pc_table_t,
ACCT_AFTER_CALL, PROTECTED);
if (rct_is_global_table(permod)) {
/* For global tables we would have to move to heap, or
* else unprot every time, to maintain min and max: but
* the min-max optimization isn't going to help global
* tables so we just don't bother.
*/
permod->live_min = NULL;
permod->live_max = (app_pc) POINTER_MAX;
SELF_PROTECT_DATASEC(DATASEC_RARELY_PROT);
}
hashtable_app_pc_init(GLOBAL_DCONTEXT, permod->live_table,
which == RCT_RAC ? INIT_HTABLE_SIZE_AFTER_CALL :
INIT_HTABLE_SIZE_RCT_IBT,
which == RCT_RAC ? DYNAMO_OPTION(shared_after_call_load) :
DYNAMO_OPTION(global_rct_ind_br_load),
(hash_function_t)INTERNAL_OPTION(alt_hash_func),
0 /* hash_mask_offset */,
(SHARED_FRAGMENTS_ENABLED() ?
HASHTABLE_ENTRY_SHARED : 0)
| HASHTABLE_SHARED | HASHTABLE_PERSISTENT
/* I'm seeing a number of high-ave-collision
* cases on both rac and rct; there's no easy
* way to estimate final size, so going to
* relax a little as not perf-critical */
| HASHTABLE_RELAX_CLUSTER_CHECKS
_IF_DEBUG(which == RCT_RAC ? "after_call_targets" :
"rct_ind_targets"));
STATS_RCT_ADD(which, live_tables, 1);
}
ASSERT(permod->live_table != NULL);
/* adding is a write operation */
TABLE_RWLOCK(permod->live_table, write, lock);
hashtable_app_pc_add(dcontext, tag, permod->live_table);
TABLE_RWLOCK(permod->live_table, write, unlock);
/* case 7628: used for persistence optimization: but watch overhead */
if (!rct_is_global_table(permod)) {
/* See comments above */
if (permod->live_min == NULL || tag < permod->live_min)
permod->live_min = tag;
if (tag > permod->live_max)
permod->live_max = tag;
}
os_get_module_info_unlock();
STATS_RCT_ADD(which, live_entries, 1);
DOSTATS({
if (permod == &rac_non_module_table)
STATS_INC(rac_non_module_entries);
});
return true;
}
static void
rct_table_flush_entry(dcontext_t *dcontext, app_pc tag, rct_type_t which)
{
rct_module_table_t *permod;
/* need higher level lock to properly synchronize with lookup+add */
ASSERT_OWN_MUTEX(true, (which == RCT_RAC ? &after_call_lock : &rct_module_lock));
os_get_module_info_lock();
permod = rct_get_table(tag, which);
ASSERT(permod != NULL);
/* We should have removed any persist info before calling this routine */
ASSERT(permod->persisted_table == NULL);
ASSERT(permod->live_table != NULL);
if (permod->live_table != NULL) {
/* removing is a write operation */
TABLE_RWLOCK(permod->live_table, write, lock);
hashtable_app_pc_remove(tag, permod->live_table);
TABLE_RWLOCK(permod->live_table, write, unlock);
}
os_get_module_info_unlock();
}
/* Invalidates all after call or indirect branch targets from given
* range [text_start,text_end) which must be either completely
* contained in a single module or not touch any modules.
* Assuming any existing fragments that were added to IBL tables will
* be flushed independently: this routine only flushes the policy
* information.
* Note this needs to be called on app_memory_deallocation() for RAC
* (potentially from DGC), and rct_process_module_mmap() for other RCT
* entries which should be only in modules.
*
* Returns entries flushed.
*/
static uint
rct_table_invalidate_range(dcontext_t *dcontext, rct_type_t which,
app_pc text_start, app_pc text_end)
{
uint entries_removed = 0;
rct_module_table_t *permod;
/* need higher level lock to properly synchronize with lookup+add */
ASSERT_OWN_MUTEX(true, (which == RCT_RAC ? &after_call_lock : &rct_module_lock));
ASSERT(text_start < text_end);
if (DYNAMO_OPTION(rct_sticky)) {
/* case 5329 - leaving for bug-compatibility with previous releases */
/* trade-off is spurious RCT violations vs memory leak */
return 0;
}
/* We only support removing from within a single module or not touching
* any modules */
ASSERT(get_module_base(text_start) == get_module_base(text_end));
os_get_module_info_lock();
permod = rct_get_table(text_start, which);
ASSERT(permod != NULL);
/* We should have removed any persist info before calling this routine */
ASSERT(permod->persisted_table == NULL);
ASSERT(permod->live_table != NULL);
if (permod != NULL && permod->live_table != NULL) {
TABLE_RWLOCK(permod->live_table, write, lock);
entries_removed =
hashtable_app_pc_range_remove(dcontext, permod->live_table,
(ptr_uint_t)text_start, (ptr_uint_t)text_end,
NULL);
DOCHECK(1, {
uint second_pass =
hashtable_app_pc_range_remove(dcontext, permod->live_table,
(ptr_uint_t)text_start,
(ptr_uint_t)text_end, NULL);
ASSERT(second_pass == 0 && "nothing should be missed");
/* simplest sanity check that hashtable_app_pc_range_remove() works */
});
TABLE_RWLOCK(permod->live_table, write, unlock);
}
os_get_module_info_unlock();
return entries_removed;
}
static void
rct_table_free_internal(dcontext_t *dcontext, app_pc_table_t *table)
{
hashtable_app_pc_free(dcontext, table);
ASSERT(TEST(HASHTABLE_PERSISTENT, table->table_flags));
HEAP_TYPE_FREE(dcontext, table, app_pc_table_t, ACCT_AFTER_CALL, PROTECTED);
}
void
rct_table_free(dcontext_t *dcontext, app_pc_table_t *table, bool free_data)
{
DODEBUG({
DOLOG(1, LOG_FRAGMENT|LOG_STATS, {
hashtable_app_pc_load_statistics(dcontext, table);
});
hashtable_app_pc_study(dcontext, table, 0/*table consistent*/);
});
if (!free_data) {
/* We don't need the free_data param anymore */
ASSERT(table->table_unaligned == NULL); /* don't try to free, part of mmap */
}
rct_table_free_internal(GLOBAL_DCONTEXT, table);
}
app_pc_table_t *
rct_table_copy(dcontext_t *dcontext, app_pc_table_t *src)
{
if (src == NULL)
return NULL;
else
return hashtable_app_pc_copy(GLOBAL_DCONTEXT, src);
}
app_pc_table_t *
rct_table_merge(dcontext_t *dcontext, app_pc_table_t *src1, app_pc_table_t *src2)
{
if (src1 == NULL) {
if (src2 == NULL)
return NULL;
return hashtable_app_pc_copy(GLOBAL_DCONTEXT, src2);
} else if (src2 == NULL)
return hashtable_app_pc_copy(GLOBAL_DCONTEXT, src1);
else
return hashtable_app_pc_merge(GLOBAL_DCONTEXT, src1, src2);
}
/* Up to caller to synchronize access to table. */
uint
rct_table_persist_size(dcontext_t *dcontext, app_pc_table_t *table)
{
/* Don't persist zero-entry tables */
if (table == NULL || table->entries == 0)
return 0;
else
return hashtable_app_pc_persist_size(dcontext, table);
}
/* Up to caller to synchronize access to table.
* Returns true iff all writes succeeded.
*/
bool
rct_table_persist(dcontext_t *dcontext, app_pc_table_t *table, file_t fd)
{
bool success = true;
ASSERT(fd != INVALID_FILE);
ASSERT(table != NULL); /* caller shouldn't call us o/w */
if (table != NULL)
success = hashtable_app_pc_persist(dcontext, table, fd);
return success;
}
app_pc_table_t *
rct_table_resurrect(dcontext_t *dcontext, byte *mapped_table, rct_type_t which)
{
return hashtable_app_pc_resurrect(GLOBAL_DCONTEXT, mapped_table
_IF_DEBUG(which == RCT_RAC ? "after_call_targets" :
"rct_ind_targets"));
}
void
rct_module_table_free(dcontext_t *dcontext, rct_module_table_t *permod, app_pc modpc)
{
ASSERT(os_get_module_info_locked());
if (permod->live_table != NULL) {
rct_table_free(GLOBAL_DCONTEXT, permod->live_table, true);
permod->live_table = NULL;
}
if (permod->persisted_table != NULL) {
/* persisted table: table data is from disk, but header is on heap */
rct_table_free(GLOBAL_DCONTEXT, permod->persisted_table, false);
permod->persisted_table = NULL;
/* coarse_info_t has a duplicated pointer to the persisted table,
* but it should always be flushed before we get here
*/
ASSERT(get_executable_area_coarse_info(modpc) == NULL);
}
}
void
rct_module_table_persisted_invalidate(dcontext_t *dcontext, app_pc modpc)
{
rct_module_table_t *permod;
uint i;
os_get_module_info_lock();
for (i = 0; i < RCT_NUM_TYPES; i++) {
permod = rct_get_table(modpc, i);
ASSERT(permod != NULL);
if (permod != NULL && permod->persisted_table != NULL) {
/* If the persisted table contains entries beyond what we will discover
* when we re-build its cache we must transfer those to the live table
* now. At first I was only keeping entire-module RCT entries, but we
* must keep all RAC and RCT entries since we may not see the triggering
* code before we hit the check point (may reset at a ret so we won't see
* the call before the ret check; same for Borland SEH). We can only not
* keep them on a module unload.
*/
/* Optimization: don't transfer if about to unload. This assumes
* there will be no persistence of a later coarse unit in a different
* region of the same module, which case 9651 primary_for_module
* currently ensures! (We already have read lock; ok to grab again.)
*/
if (!os_module_get_flag(modpc, MODULE_BEING_UNLOADED) && !dynamo_exited) {
/* FIXME case 10362: we could leave the file mapped in and
* use the persisted RCT table independently of the cache
*/
/* Merging will remove any dups, though today we never re-load
* pcaches so there shouldn't be any
*/
app_pc_table_t *merged =
/* all modinfo RCT tables are on global heap */
rct_table_merge(GLOBAL_DCONTEXT, permod->live_table,
permod->persisted_table);
if (permod->live_table != NULL)
rct_table_free(GLOBAL_DCONTEXT, permod->live_table, true);
permod->live_table = merged;
LOG(THREAD, LOG_FRAGMENT, 2,
"rct_module_table_persisted_invalidate "PFX": not unload, so "
"moving persisted %d entries to live table\n",
modpc, permod->persisted_table->entries);
# ifdef WINDOWS
/* We leave the MODULE_RCT_LOADED flag */
# endif
STATS_INC(rct_persisted_outlast_cache);
}
/* we rely on coarse_unit_reset_free() freeing the persisted table struct */
permod->persisted_table = NULL;
}
}
os_get_module_info_unlock();
}
/* Produces a new hashtable that contains all entries in the live and persisted
* tables for the module containing modpc that are within [limit_start, limit_end)
*/
app_pc_table_t *
rct_module_table_copy(dcontext_t *dcontext, app_pc modpc, rct_type_t which,
app_pc limit_start, app_pc limit_end)
{
app_pc_table_t *merged = NULL;
rct_module_table_t *permod;
mutex_t *lock = (which == RCT_RAC) ? &after_call_lock : &rct_module_lock;
mutex_lock(lock);
if (which == RCT_RAC) {
ASSERT(DYNAMO_OPTION(ret_after_call));
if (!DYNAMO_OPTION(ret_after_call))
return NULL;
} else {
ASSERT(TEST(OPTION_ENABLED, DYNAMO_OPTION(rct_ind_call)) ||
TEST(OPTION_ENABLED, DYNAMO_OPTION(rct_ind_jump)));
if (!TEST(OPTION_ENABLED, DYNAMO_OPTION(rct_ind_call)) &&
!TEST(OPTION_ENABLED, DYNAMO_OPTION(rct_ind_jump)))
return NULL;
}
os_get_module_info_lock();
permod = rct_get_table(modpc, which);
ASSERT(permod != NULL);
if (permod != NULL) {
/* FIXME: we could pass the limit range down to hashtable_app_pc_{copy,merge}
* for more efficiency and to avoid over-sizing the table, but this
* should be rare w/ -persist_rct_entire and single-+x-section modules
*/
merged = rct_table_merge(dcontext, permod->live_table, permod->persisted_table);
if (merged != NULL) {
DEBUG_DECLARE(uint removed = 0;)
TABLE_RWLOCK(merged, write, lock);
if (limit_start > permod->live_min) {
DEBUG_DECLARE(removed +=)
hashtable_app_pc_range_remove(dcontext, merged,
(ptr_uint_t)permod->live_min,
(ptr_uint_t)limit_start, NULL);
}
if (limit_end <= permod->live_max) {
DEBUG_DECLARE(removed +=)
hashtable_app_pc_range_remove(dcontext, merged,
(ptr_uint_t)limit_end,
(ptr_uint_t)permod->live_max+1, NULL);
}
TABLE_RWLOCK(merged, write, unlock);
STATS_RCT_ADD(which, module_persist_out_of_range, removed);
}
}
os_get_module_info_unlock();
mutex_unlock(lock);
return merged;
}
/* We return the persisted table so we can keep a pointer to it in the
* loaded coarse_info_t, but we must be careful to do a coordinated
* free of the duplicated pointer.
*/
bool
rct_module_table_set(dcontext_t *dcontext, app_pc modpc, app_pc_table_t *table,
rct_type_t which)
{
rct_module_table_t *permod;
bool used = false;
mutex_t *lock = (which == RCT_RAC) ? &after_call_lock : &rct_module_lock;
mutex_lock(lock);
os_get_module_info_lock();
permod = rct_get_table(modpc, which);
ASSERT(permod != NULL);
ASSERT(permod->persisted_table == NULL); /* can't resurrect twice */
ASSERT(table != NULL);
/* Case 9834: avoid double-add from earlier entire-module resurrect */
ASSERT(which == RCT_RAC || !os_module_get_flag(modpc, MODULE_RCT_LOADED));
/* FIXME case 8648: we're loosening security by allowing ret to target any
* after-call executed in any prior run of this app or whatever
* app is producing, instead of just this run.
*/
if (permod != NULL && permod->persisted_table == NULL) {
used = true;
/* There could be dups in persisted table that are already in
* live table (particularly from unloading and re-loading a pcache,
* though that never happens today). Everything should work fine,
* and if we do unload the pcache prior to module unload the merge
* into the live entries will remove the dups.
*/
permod->persisted_table = table;
ASSERT(permod->persisted_table->entries > 0);
/* FIXME: for case 9639 if we had the ibl table set up (say, for shared
* ibl tables) and stored the cache pc (though I guess coarse htable is
* set up) we could fill the ibl table here as well (but then we'd
* have to abort for hotp conflicts earlier in coarse_unit_load()).
* Instead we delay and do it in rac_persisted_fill_ibl().
*/
LOG(THREAD, LOG_FRAGMENT, 2,
"rct_module_table_resurrect: added %d %s entries\n",
permod->persisted_table->entries, which == RCT_RAC ? "RAC" : "RCT");
STATS_RCT_ADD(which, persisted_tables, 1);
STATS_RCT_ADD(which, persisted_entries, permod->persisted_table->entries);
}
os_get_module_info_unlock();
mutex_unlock(lock);
return used;
}
bool
rct_module_persisted_table_exists(dcontext_t *dcontext, app_pc modpc,
rct_type_t which)
{
bool exists = false;
rct_module_table_t *permod;
os_get_module_info_lock();
permod = rct_get_table(modpc, which);
exists = (permod != NULL && permod->persisted_table != NULL);
os_get_module_info_unlock();
return exists;
}
uint
rct_module_live_entries(dcontext_t *dcontext, app_pc modpc, rct_type_t which)
{
uint entries = 0;
rct_module_table_t *permod;
os_get_module_info_lock();
permod = rct_get_table(modpc, which);
if (permod != NULL && permod->live_table != NULL)
entries = permod->live_table->entries;
os_get_module_info_unlock();
return entries;
}
static void
coarse_persisted_fill_ibl_helper(dcontext_t *dcontext, ibl_table_t *ibl_table,
coarse_info_t *info, app_pc_table_t *ptable,
ibl_branch_type_t branch_type)
{
uint i;
fragment_t wrapper;
app_pc tag;
cache_pc body_pc;
DEBUG_DECLARE(uint added = 0;)
ASSERT(ptable != NULL);
if (ptable == NULL)
return;
ASSERT(os_get_module_info_locked());
/* Make sure this thread's local ptrs & state are up to
* date in case a resize occurred while it was in the cache. */
update_private_ibt_table_ptrs(dcontext, ibl_table _IF_DEBUG(NULL));
/* Avoid hash collision asserts while adding by sizing up front;
* FIXME: we may over-size for INDJMP table
*/
TABLE_RWLOCK(ibl_table, write, lock);
hashtable_ibl_check_size(dcontext, ibl_table, 0, ptable->entries);
TABLE_RWLOCK(ibl_table, write, unlock);
/* FIXME: we should hold ptable's read lock but it's lower ranked
* than the fragment table's lock, so we rely on os module lock
*/
for (i = 0; i < ptable->capacity; i++) {
tag = ptable->table[i];
if (APP_PC_ENTRY_IS_REAL(tag)) {
/* FIXME: should we persist the cache pcs to save time here? That won't
* be ideal if we ever use the mmapped table directly (for per-module
* tables: case 9672). We could support both tag-only and tag-cache
* pairs by having the 1st entry be a flags word.
*/
fragment_coarse_lookup_in_unit(dcontext, info, tag, NULL, &body_pc);
/* may not be present, given no checks in rct_entries_in_region() */
if (body_pc != NULL &&
/* We can have same entry in both RAC and RCT table, and we use
* both tables to fill the INDJMP table
*/
(branch_type != IBL_INDJMP ||
!IBL_ENTRY_IS_EMPTY(hashtable_ibl_lookup(dcontext, (ptr_uint_t)tag,
ibl_table)))) {
fragment_coarse_wrapper(&wrapper, tag, body_pc);
fragment_add_ibl_target_helper(dcontext, &wrapper, ibl_table);
DOSTATS({ added++; });
}
}
}
LOG(THREAD, LOG_FRAGMENT, 2,
"coarse_persisted_fill_ibl %s: added %d of %d entries\n",
get_branch_type_name(branch_type), added, ptable->entries);
STATS_ADD(perscache_ibl_prefill, added);
}
/* Case 9639: fill ibl table from persisted RAC/RCT table entries */
static void
coarse_persisted_fill_ibl(dcontext_t *dcontext, coarse_info_t *info,
ibl_branch_type_t branch_type)
{
per_thread_t *pt = GET_PT(dcontext);
ibl_table_t *ibl_table;
rct_module_table_t *permod;
/* Caller must hold info lock */
ASSERT_OWN_MUTEX(true, &info->lock);
ASSERT(exists_coarse_ibl_pending_table(dcontext, info, branch_type));
ASSERT(TEST(COARSE_FILL_IBL_MASK(branch_type),
DYNAMO_OPTION(coarse_fill_ibl)));
if (!exists_coarse_ibl_pending_table(dcontext, info, branch_type))
return;
os_get_module_info_lock();
ibl_table = GET_IBT_TABLE(pt, FRAG_SHARED|FRAG_COARSE_GRAIN, branch_type);
if (branch_type == IBL_RETURN || branch_type == IBL_INDJMP) {
permod = rct_get_table(info->base_pc, RCT_RAC);
ASSERT(permod != NULL && permod->persisted_table != NULL);
if (permod != NULL && permod->persisted_table != NULL) {
LOG(THREAD, LOG_FRAGMENT, 2,
"coarse_persisted_fill_ibl %s: adding RAC %d entries\n",
get_branch_type_name(branch_type), permod->persisted_table->entries);
coarse_persisted_fill_ibl_helper(dcontext, ibl_table, info,
permod->persisted_table, branch_type);
}
}
if (branch_type == IBL_INDCALL || branch_type == IBL_INDJMP) {
permod = rct_get_table(info->base_pc, RCT_RCT);
ASSERT(permod != NULL && permod->persisted_table != NULL);
if (permod != NULL && permod->persisted_table != NULL) {
LOG(THREAD, LOG_FRAGMENT, 2,
"coarse_persisted_fill_ibl %s: adding RCT %d entries\n",
get_branch_type_name(branch_type), permod->persisted_table->entries);
coarse_persisted_fill_ibl_helper(dcontext, ibl_table, info,
permod->persisted_table, branch_type);
}
}
os_get_module_info_unlock();
/* We only fill for the 1st thread (if using per-thread ibl
* tables). We'd need per-thread flags to do otherwise, and the
* goal is only to help startup performance.
* FIXME case 9639: later threads may do startup work in various apps,
* and we may want a better solution here.
*/
info->ibl_pending_used |= COARSE_FILL_IBL_MASK(branch_type);
}
#endif /* defined(RETURN_AFTER_CALL) || defined(RCT_IND_BRANCH) */
#ifdef RETURN_AFTER_CALL
/* returns NULL if not found */
app_pc
fragment_after_call_lookup(dcontext_t *dcontext, app_pc tag)
{
return rct_table_lookup(dcontext, tag, RCT_RAC);
}
void
fragment_add_after_call(dcontext_t *dcontext, app_pc tag)
{
mutex_lock(&after_call_lock);
if (!rct_table_add(dcontext, tag, RCT_RAC))
STATS_INC(num_existing_after_call);
else
STATS_INC(num_future_after_call);
mutex_unlock(&after_call_lock);
}
/* flushing a fragment invalidates the after call entry */
void
fragment_flush_after_call(dcontext_t *dcontext, app_pc tag)
{
mutex_lock(&after_call_lock);
rct_table_flush_entry(dcontext, tag, RCT_RAC);
mutex_unlock(&after_call_lock);
STATS_INC(num_future_after_call_removed);
STATS_DEC(num_future_after_call);
}
/* see comments in rct_table_invalidate_range() */
uint
invalidate_after_call_target_range(dcontext_t *dcontext,
app_pc text_start, app_pc text_end)
{
uint entries_removed;
mutex_lock(&after_call_lock);
entries_removed =
rct_table_invalidate_range(dcontext, RCT_RAC, text_start, text_end);
mutex_unlock(&after_call_lock);
STATS_ADD(num_future_after_call_removed, entries_removed);
STATS_SUB(num_future_after_call, entries_removed);
LOG(THREAD, LOG_FRAGMENT, 2,
"invalidate_rct_target_range "PFX"-"PFX": removed %d entries\n",
text_start, text_end, entries_removed);
return entries_removed;
}
#endif /* RETURN_AFTER_CALL */
/***********************************************************************/
#ifdef RCT_IND_BRANCH
/*
* RCT indirect branch policy bookkeeping. Mostly a set of wrappers
* around the basic hashtable functionality.
*
* FIXME: all of these routines should be moved to rct.c after we move
* the hashtable primitives to fragment.h as static inline's
*/
/* returns NULL if not found */
app_pc
rct_ind_branch_target_lookup(dcontext_t *dcontext, app_pc tag)
{
return rct_table_lookup(dcontext, tag, RCT_RCT);
}
/* returns true if a new entry for target was added,
* or false if target was already known
*/
/* Note - entries are expected to be within MEM_IMAGE */
bool
rct_add_valid_ind_branch_target(dcontext_t *dcontext, app_pc tag)
{
ASSERT_OWN_MUTEX(true, &rct_module_lock);
DOLOG(2, LOG_FRAGMENT, {
/* FIXME: would be nice to add a heavy weight check that we're
* really only a PE IMAGE via is_in_code_section()
*/
});
if (!rct_table_add(dcontext, tag, RCT_RCT))
return false;
else {
STATS_INC(rct_ind_branch_entries);
return true; /* new entry */
}
}
/* invalidate an indirect branch target and free any associated memory */
/* FIXME: note that this is currently not used and
* invalidate_ind_branch_target_range() will be the likely method to
* use for most cases when a whole module range is invalidated.
*/
void
rct_flush_ind_branch_target_entry(dcontext_t *dcontext, app_pc tag)
{
ASSERT_OWN_MUTEX(true, &rct_module_lock); /* synch with adding */
rct_table_flush_entry(dcontext, tag, RCT_RCT);
STATS_DEC(rct_ind_branch_entries);
STATS_INC(rct_ind_branch_entries_removed);
}
/* see comments in rct_table_invalidate_range() */
uint
invalidate_ind_branch_target_range(dcontext_t *dcontext,
app_pc text_start, app_pc text_end)
{
uint entries_removed;
ASSERT_OWN_MUTEX(true, &rct_module_lock); /* synch with adding */
entries_removed =
rct_table_invalidate_range(dcontext, RCT_RCT, text_start, text_end);
STATS_ADD(rct_ind_branch_entries_removed, entries_removed);
STATS_SUB(rct_ind_branch_entries, entries_removed);
return entries_removed;
}
#endif /* RCT_IND_BRANCH */
/****************************************************************************/
/* CACHE CONSISTENCY */
/* Handle exits from the cache from our self-modifying code sandboxing
* instrumentation.
*/
void
fragment_self_write(dcontext_t *dcontext)
{
ASSERT(!is_self_couldbelinking());
/* need to delete just this fragment, then start interpreting
* at the instr after the self-write instruction
*/
dcontext->next_tag = EXIT_TARGET_TAG(dcontext, dcontext->last_fragment,
dcontext->last_exit);
LOG(THREAD, LOG_ALL, 2, "Sandboxing exit from fragment "PFX" @"PFX"\n",
dcontext->last_fragment->tag,
EXIT_CTI_PC(dcontext->last_fragment, dcontext->last_exit));
LOG(THREAD, LOG_ALL, 2, "\tset next_tag to "PFX"\n", dcontext->next_tag);
/* We come in here both for actual selfmod and for exec count thresholds,
* to avoid needing separate LINK_ flags.
*/
if (DYNAMO_OPTION(sandbox2ro_threshold) > 0) {
if (vm_area_selfmod_check_clear_exec_count(dcontext, dcontext->last_fragment)) {
/* vm_area_* deleted this fragment by flushing so nothing more to do */
return;
}
}
LOG(THREAD, LOG_ALL, 1, "WARNING: fragment "PFX" @"PFX" overwrote its own code\n",
dcontext->last_fragment->tag,
EXIT_CTI_PC(dcontext->last_fragment, dcontext->last_exit));
STATS_INC(num_self_writes);
if (TEST(FRAG_WAS_DELETED, dcontext->last_fragment->flags)) {
/* Case 8177: case 3559 unionized fragment_t.in_xlate, so we cannot delete a
* fragment that has already been unlinked in the first stage of a flush.
* The flush queue check, which comes after this (b/c we want to be
* nolinking), will delete.
*/
ASSERT(((per_thread_t *) dcontext->fragment_field)->flush_queue_nonempty);
STATS_INC(num_self_writes_after_flushes);
} else {
#ifdef PROGRAM_SHEPHERDING
/* we can't call fragment_delete if vm_area_t deletes it by flushing */
if (!vm_area_fragment_self_write(dcontext, dcontext->last_fragment->tag))
#endif
{
fragment_delete(dcontext, dcontext->last_fragment, FRAGDEL_ALL);
STATS_INC(num_fragments_deleted_selfmod);
}
}
}
/* coarse_grain says to use just the min_pc and max_pc of f.
* otherwise a full walk of the original code is done to see if
* any piece of the fragment really does overlap.
* returns the tag of the bb that actually overlaps (i.e., finds the
* component bb of a trace that does the overlapping).
*/
bool
fragment_overlaps(dcontext_t *dcontext, fragment_t *f,
byte *region_start, byte *region_end, bool coarse_grain,
overlap_info_t *info_res, app_pc *bb_tag)
{
overlap_info_t info;
info.overlap = false;
if ((f->flags & FRAG_IS_TRACE) != 0) {
uint i;
trace_only_t *t = TRACE_FIELDS(f);
/* look through all blocks making up the trace */
ASSERT(t->bbs != NULL);
/* trace should have at least one bb */
ASSERT(t->num_bbs > 0);
for (i=0; i<t->num_bbs; i++) {
if (app_bb_overlaps(dcontext, t->bbs[i].tag, f->flags,
region_start, region_end, &info)) {
if (bb_tag != NULL)
*bb_tag = t->bbs[i].tag;
break;
}
}
} else {
app_bb_overlaps(dcontext, f->tag, f->flags,
region_start, region_end, &info);
if (info.overlap && bb_tag != NULL)
*bb_tag = f->tag;
}
if (info_res != NULL)
*info_res = info;
return info.overlap;
}
#ifdef DEBUG
void
study_all_hashtables(dcontext_t *dcontext)
{
per_thread_t *pt = (per_thread_t *) dcontext->fragment_field;
ibl_branch_type_t branch_type;
for (branch_type = IBL_BRANCH_TYPE_START;
branch_type < IBL_BRANCH_TYPE_END; branch_type++) {
if (!DYNAMO_OPTION(disable_traces)) {
per_thread_t *ibl_pt = pt;
if (DYNAMO_OPTION(shared_trace_ibt_tables))
ibl_pt = shared_pt;
hashtable_ibl_study(dcontext, &ibl_pt->trace_ibt[branch_type],
0/*table consistent*/);
}
if (DYNAMO_OPTION(bb_ibl_targets)) {
per_thread_t *ibl_pt = pt;
if (DYNAMO_OPTION(shared_bb_ibt_tables))
ibl_pt = shared_pt;
hashtable_ibl_study(dcontext, &ibl_pt->bb_ibt[branch_type],
0/*table consistent*/);
}
}
if (PRIVATE_TRACES_ENABLED())
hashtable_fragment_study(dcontext, &pt->trace, 0/*table consistent*/);
hashtable_fragment_study(dcontext, &pt->bb, 0/*table consistent*/);
hashtable_fragment_study(dcontext, &pt->future, 0/*table consistent*/);
if (DYNAMO_OPTION(shared_bbs))
hashtable_fragment_study(dcontext, shared_bb, 0/*table consistent*/);
if (DYNAMO_OPTION(shared_traces))
hashtable_fragment_study(dcontext, shared_trace, 0/*table consistent*/);
if (SHARED_FRAGMENTS_ENABLED())
hashtable_fragment_study(dcontext, shared_future, 0/*table consistent*/);
# ifdef RETURN_AFTER_CALL
if (dynamo_options.ret_after_call && rac_non_module_table.live_table != NULL) {
hashtable_app_pc_study(dcontext, rac_non_module_table.live_table,
0/*table consistent*/);
}
# endif
# if defined(RCT_IND_BRANCH) && defined(UNIX)
if ((TEST(OPTION_ENABLED, DYNAMO_OPTION(rct_ind_call)) ||
TEST(OPTION_ENABLED, DYNAMO_OPTION(rct_ind_jump))) &&
rct_global_table.live_table != NULL) {
hashtable_app_pc_study(dcontext, rct_global_table.live_table,
0/*table consistent*/);
}
# endif /* RCT_IND_BRANCH */
# if defined(WIN32) && (defined(RETURN_AFTER_CALL) || defined(RCT_IND_BRANCH))
{
module_iterator_t *mi = module_iterator_start();
uint i;
rct_module_table_t *permod;
while (module_iterator_hasnext(mi)) {
module_area_t *data = module_iterator_next(mi);
for (i = 0; i < RCT_NUM_TYPES; i++) {
permod = os_module_get_rct_htable(data->start, i);
ASSERT(permod != NULL);
if (permod->persisted_table != NULL) {
LOG(THREAD, LOG_FRAGMENT, 2,
"%s persisted hashtable for %s "PFX"-"PFX"\n",
i == RCT_RAC ? "RAC" : "RCT",
GET_MODULE_NAME(&data->names), data->start, data->end);
hashtable_app_pc_study(dcontext, permod->persisted_table,
0/*table consistent*/);
}
if (permod->live_table != NULL) {
LOG(THREAD, LOG_FRAGMENT, 2,
"%s live hashtable for %s "PFX"-"PFX"\n",
i == RCT_RAC ? "RAC" : "RCT",
GET_MODULE_NAME(&data->names), data->start, data->end);
hashtable_app_pc_study(dcontext, permod->live_table,
0/*table consistent*/);
}
}
}
module_iterator_stop(mi);
}
# endif /* defined(RETURN_AFTER_CALL) || defined(RCT_IND_BRANCH) */
}
#endif /* DEBUG */
/****************************************************************************
* FLUSHING
*
* Two-stage freeing of fragments via immediate unlinking followed by lazy
* deletion.
*/
uint
get_flushtime_last_update(dcontext_t *dcontext)
{
per_thread_t *pt = (per_thread_t *) dcontext->fragment_field;
return pt->flushtime_last_update;
}
void
set_flushtime_last_update(dcontext_t *dcontext, uint val)
{
per_thread_t *pt = (per_thread_t *) dcontext->fragment_field;
pt->flushtime_last_update = val;
}
void
set_at_syscall(dcontext_t *dcontext, bool val)
{
ASSERT(dcontext != GLOBAL_DCONTEXT);
dcontext->upcontext_ptr->at_syscall = val;
}
bool
get_at_syscall(dcontext_t *dcontext)
{
ASSERT(dcontext != GLOBAL_DCONTEXT);
return dcontext->upcontext_ptr->at_syscall;
}
/* Assumes caller takes care of synchronization.
* Returns false iff was_I_flushed ends up being deleted right now from
* a private cache OR was_I_flushed has been flushed from a shared cache
* and is pending final deletion.
*/
static bool
check_flush_queue(dcontext_t *dcontext, fragment_t *was_I_flushed)
{
per_thread_t *pt = (per_thread_t *) dcontext->fragment_field;
bool not_flushed = true;
ASSERT_OWN_MUTEX(true, &pt->linking_lock);
/* first check private queue and act on pending deletions */
if (pt->flush_queue_nonempty) {
bool local_prot = local_heap_protected(dcontext);
if (local_prot)
SELF_PROTECT_LOCAL(dcontext, WRITABLE);
/* remove local vm areas on t->Q queue and all frags in their lists */
not_flushed = not_flushed &&
vm_area_flush_fragments(dcontext, was_I_flushed);
pt->flush_queue_nonempty = false;
LOG(THREAD, LOG_FRAGMENT, 2, "Hashtable state after flushing the queue:\n");
DOLOG(2, LOG_FRAGMENT, {
study_all_hashtables(dcontext);
});
if (local_prot)
SELF_PROTECT_LOCAL(dcontext, READONLY);
}
/* now check shared queue to dec ref counts */
if (DYNAMO_OPTION(shared_deletion) &&
/* No lock needed: any racy incs to global are in safe direction, and our inc
* is atomic so we shouldn't see any partial-word-updated values here. This
* check is our shared deletion algorithm's only perf hit when there's no
* actual shared flushing.
*/
pt->flushtime_last_update < flushtime_global) {
/* dec ref count on any pending shared areas */
not_flushed = not_flushed &&
vm_area_check_shared_pending(dcontext, was_I_flushed);
/* Remove unlinked markers if called for.
* FIXME If a thread's flushtime is updated due to shared syscall sync,
* its tables won't be rehashed here -- the thread's flushtime will be
* equal to the global flushtime so the 'if' isn't entered. We have
* multiple options as to other points for rehashing -- a table add, a
* table delete, any entry into DR. For now, we've chosen a table add
* (see check_table_size()).
*/
if (SHARED_IB_TARGETS() &&
(INTERNAL_OPTION(rehash_unlinked_threshold) < 100 ||
INTERNAL_OPTION(rehash_unlinked_always))) {
ibl_branch_type_t branch_type;
for (branch_type = IBL_BRANCH_TYPE_START;
branch_type < IBL_BRANCH_TYPE_END; branch_type++) {
ibl_table_t *table = &pt->bb_ibt[branch_type];
if (table->unlinked_entries > 0 &&
(INTERNAL_OPTION(rehash_unlinked_threshold) <
(100 * table->unlinked_entries /
(table->unlinked_entries + table->entries)) ||
INTERNAL_OPTION(rehash_unlinked_always))) {
STATS_INC(num_ibt_table_rehashes);
LOG(THREAD, LOG_FRAGMENT, 1,
"Rehash table %s: linked %u, unlinked %u\n",
table->name, table->entries, table->unlinked_entries);
hashtable_ibl_unlinked_remove(dcontext, table);
}
}
}
}
/* FIXME This is the ideal location for inserting refcounting logic
* for freeing a resized shared IBT table, as is done for shared
* deletion above.
*/
return not_flushed;
}
/* Note that an all-threads-synch flush does NOT set the self-flushing flag,
* so use is_self_allsynch_flushing() instead.
*/
bool
is_self_flushing()
{
/* race condition w/ flusher being updated -- but since only testing vs self,
* if flusher update is atomic, should be safe
*/
return (get_thread_private_dcontext() == flusher);
}
bool
is_self_allsynch_flushing()
{
/* race condition w/ allsynch_flusher being updated -- but since only testing
* vs self, if flusher update is atomic, should be safe
*/
return (allsynch_flusher != NULL &&
get_thread_private_dcontext() == allsynch_flusher);
}
/* N.B.: only accurate if called on self (else a race condition) */
bool
is_self_couldbelinking()
{
dcontext_t *dcontext = get_thread_private_dcontext();
/* if no dcontext yet then can't be couldbelinking */
return (dcontext != NULL &&
!RUNNING_WITHOUT_CODE_CACHE() /*case 7966: has no pt*/ &&
is_couldbelinking(dcontext));
}
/* N.B.: can only call if target thread is self, suspended, or waiting for flush */
bool
is_couldbelinking(dcontext_t *dcontext)
{
per_thread_t *pt = (per_thread_t *) dcontext->fragment_field;
/* the lock is only needed for self writing or another thread reading
* but if different thread we require thread is suspended or waiting for
* flush so is ok */
/* FIXME : add an assert that the thread that owns the dcontext is either
* the caller, at the flush sync wait, or is suspended by thread_synch
* routines */
return (!RUNNING_WITHOUT_CODE_CACHE() /*case 7966: has no pt*/ &&
pt != NULL/*PR 536058: no pt*/ &&
pt->could_be_linking);
}
static void
wait_for_flusher_nolinking(dcontext_t *dcontext)
{
/* FIXME: can have livelock w/ these types of synch loops,
* any way to work into deadlock-avoidance?
*/
per_thread_t *pt = (per_thread_t *) dcontext->fragment_field;
ASSERT(!pt->could_be_linking);
while (pt->wait_for_unlink) {
LOG(THREAD, LOG_DISPATCH|LOG_THREADS, 2,
"Thread "TIDFMT" waiting for flush (flusher is %d @flushtime %d)\n",
/* safe to deref flusher since flusher is waiting for our signal */
dcontext->owning_thread, flusher->owning_thread, flushtime_global);
mutex_unlock(&pt->linking_lock);
STATS_INC(num_wait_flush);
wait_for_event(pt->finished_all_unlink);
LOG(THREAD, LOG_DISPATCH|LOG_THREADS, 2,
"Thread "TIDFMT" resuming after flush\n", dcontext->owning_thread);
mutex_lock(&pt->linking_lock);
}
}
static void
wait_for_flusher_linking(dcontext_t *dcontext)
{
/* FIXME: can have livelock w/ these types of synch loops,
* any way to work into deadlock-avoidance?
*/
per_thread_t *pt = (per_thread_t *) dcontext->fragment_field;
ASSERT(pt->could_be_linking);
while (pt->wait_for_unlink) {
LOG(THREAD, LOG_DISPATCH|LOG_THREADS, 2,
"Thread "TIDFMT" waiting for flush (flusher is %d @flushtime %d)\n",
/* safe to deref flusher since flusher is waiting for our signal */
dcontext->owning_thread, flusher->owning_thread, flushtime_global);
mutex_unlock(&pt->linking_lock);
signal_event(pt->waiting_for_unlink);
STATS_INC(num_wait_flush);
wait_for_event(pt->finished_with_unlink);
LOG(THREAD, LOG_DISPATCH|LOG_THREADS, 2,
"Thread "TIDFMT" resuming after flush\n", dcontext->owning_thread);
mutex_lock(&pt->linking_lock);
}
}
#ifdef DEBUG
static void
check_safe_for_flush_synch(dcontext_t *dcontext)
{
/* We cannot hold any locks at synch points that wait for flushers, as we
* could prevent forward progress of a couldbelinking thread that the
* flusher will wait for.
*/
/* FIXME: will fail w/ -single_thread_in_DR, along w/ the other similar
* asserts for flushing and cache entering
*/
# ifdef DEADLOCK_AVOIDANCE
ASSERT(thread_owns_no_locks(dcontext) ||
/* if thread exits while a trace is in progress (case 8055) we're
* holding thread_initexit_lock, which prevents any flusher from
* proceeding and hitting a deadlock point, so we should be safe
*/
thread_owns_one_lock(dcontext, &thread_initexit_lock) ||
/* it is safe to be the all-thread-syncher and hit a flush synch
* point, as no flusher can be active (all threads should be suspended
* except this thread)
*/
thread_owns_two_locks(dcontext, &thread_initexit_lock,
&all_threads_synch_lock));
# endif /* DEADLOCK_AVOIDANCE */
}
#endif /* DEBUG */
#ifdef CLIENT_INTERFACE
static void
process_client_flush_requests(dcontext_t *dcontext, dcontext_t *alloc_dcontext,
client_flush_req_t *req, bool flush)
{
client_flush_req_t *iter = req;
while (iter != NULL) {
client_flush_req_t *next = iter->next;
if (flush) {
/* Note that we don't free futures from potentially linked-to region b/c we
* don't have lazy linking (xref case 2236) */
/* FIXME - if there's more then one of these would be nice to batch them
* especially for the synch all ones. */
if (iter->flush_callback != NULL) {
/* FIXME - for implementation simplicity we do a synch-all flush so
* that we can inform the client right away, it might be nice to use
* the more performant regular flush when possible. */
flush_fragments_from_region(dcontext, iter->start, iter->size,
true/*force synchall*/);
(*iter->flush_callback)(iter->flush_id);
} else {
/* do a regular flush */
flush_fragments_from_region(dcontext, iter->start, iter->size,
false/*don't force synchall*/);
}
}
HEAP_TYPE_FREE(alloc_dcontext, iter, client_flush_req_t, ACCT_CLIENT,
UNPROTECTED);
iter = next;
}
}
#endif
/* Returns false iff was_I_flushed ends up being deleted
* if cache_transition is true, assumes entering the cache now.
*/
bool
enter_nolinking(dcontext_t *dcontext, fragment_t *was_I_flushed, bool cache_transition)
{
per_thread_t *pt = (per_thread_t *) dcontext->fragment_field;
bool not_flushed = true;
/*case 7966: has no pt, no flushing either */
if (RUNNING_WITHOUT_CODE_CACHE())
return true;
DOCHECK(1, { check_safe_for_flush_synch(dcontext); });
/* FIXME: once we have this working correctly, come up with scheme
* that avoids synch in common case
*/
mutex_lock(&pt->linking_lock);
ASSERT(pt->could_be_linking);
wait_for_flusher_linking(dcontext);
not_flushed = not_flushed && check_flush_queue(dcontext, was_I_flushed);
pt->could_be_linking = false;
mutex_unlock(&pt->linking_lock);
if (!cache_transition)
return not_flushed;
/* now we act on pending actions that can only be done while nolinking
* FIXME: optimization if add more triggers here: use a single
* master trigger as a first test to avoid testing all
* conditionals every time
*/
if (reset_pending != 0) {
mutex_lock(&reset_pending_lock);
if (reset_pending != 0) {
uint target = reset_pending;
reset_pending = 0;
/* fcache_reset_all_caches_proactively() will unlock */
fcache_reset_all_caches_proactively(target);
LOG(THREAD, LOG_DISPATCH, 2,
"Just reset all caches, next_tag is "PFX"\n", dcontext->next_tag);
/* fragment is gone for sure, so return false */
return false;
}
mutex_unlock(&reset_pending_lock);
}
/* FIXME: perf opt: make global flag can check w/ making a call,
* or at least inline the call
*/
if (fcache_is_flush_pending(dcontext)) {
not_flushed = not_flushed &&
fcache_flush_pending_units(dcontext, was_I_flushed);
}
#ifdef UNIX
/* i#61/PR 211530: nudges on Linux do not use separate threads */
while (dcontext->nudge_pending != NULL) {
/* handle_nudge may not return, so we can't call it w/ inconsistent state */
pending_nudge_t local = *dcontext->nudge_pending;
heap_free(dcontext, dcontext->nudge_pending, sizeof(local) HEAPACCT(ACCT_OTHER));
dcontext->nudge_pending = local.next;
if (dcontext->interrupted_for_nudge != NULL) {
fragment_t *f = dcontext->interrupted_for_nudge;
LOG(THREAD, LOG_ASYNCH, 3, "\tre-linking outgoing for interrupted F%d\n",
f->id);
SHARED_FLAGS_RECURSIVE_LOCK(f->flags, acquire, change_linking_lock);
link_fragment_outgoing(dcontext, f, false);
SHARED_FLAGS_RECURSIVE_LOCK(f->flags, release, change_linking_lock);
if (TEST(FRAG_HAS_SYSCALL, f->flags)) {
mangle_syscall_code(dcontext, f, EXIT_CTI_PC(f, dcontext->last_exit),
true/*skip exit cti*/);
}
dcontext->interrupted_for_nudge = NULL;
}
handle_nudge(dcontext, &local.arg);
/* we may have done a reset, so do not enter cache now */
return false;
}
#endif
#ifdef CLIENT_INTERFACE
/* Handle flush requests queued via dr_flush_fragments()/dr_delay_flush_region() */
/* thread private list */
process_client_flush_requests(dcontext, dcontext, dcontext->client_data->flush_list,
true/*flush*/);
dcontext->client_data->flush_list = NULL;
/* global list */
if (client_flush_requests != NULL) { /* avoid acquiring lock every cxt switch */
client_flush_req_t *req;
mutex_lock(&client_flush_request_lock);
req = client_flush_requests;
client_flush_requests = NULL;
mutex_unlock(&client_flush_request_lock);
/* NOTE - we must release the lock before doing the flush. */
process_client_flush_requests(dcontext, GLOBAL_DCONTEXT, req, true/*flush*/);
/* FIXME - this is an ugly, yet effective, hack. The problem is there is no
* good way to tell currently if we flushed was_I_flushed. Since it could be
* gone by now we pretend that it was flushed if we did any flushing at all.
* Dispatch should refind the fragment if it wasn't flushed. */
if (req != NULL)
not_flushed = false;
}
#endif
return not_flushed;
}
/* Returns false iff was_I_flushed ends up being deleted */
bool
enter_couldbelinking(dcontext_t *dcontext, fragment_t *was_I_flushed, bool cache_transition)
{
per_thread_t *pt = (per_thread_t *) dcontext->fragment_field;
bool not_flushed;
/*case 7966: has no pt, no flushing either */
if (RUNNING_WITHOUT_CODE_CACHE())
return true;
DOCHECK(1, { check_safe_for_flush_synch(dcontext); });
mutex_lock(&pt->linking_lock);
ASSERT(!pt->could_be_linking);
/* ensure not still marked at_syscall */
ASSERT(!DYNAMO_OPTION(syscalls_synch_flush) || !get_at_syscall(dcontext));
/* for thread-shared flush and thread-private flush+execareas atomicity,
* to avoid non-properly-nested locks (could have flusher hold
* pt->linking_lock for the entire flush) we need an additional
* synch point here for shared flushing to synch w/ all threads.
* I suppose switching from non-nested locks to this loop isn't
* nec. helping deadlock avoidance -- can still hang -- but this
* should be better performance-wise (only a few writes and a
* conditional in the common case here, no extra locks)
*/
pt->soon_to_be_linking = true;
wait_for_flusher_nolinking(dcontext);
pt->soon_to_be_linking = false;
pt->could_be_linking = true;
not_flushed = check_flush_queue(dcontext, was_I_flushed);
mutex_unlock(&pt->linking_lock);
return not_flushed;
}
/* NOTE - this routine may be called more then one time for the same exiting thread,
* xref case 8047. Any once only reference counting, cleanup, etc. should be done in
* fragment_thread_exit(). This routine is just a stripped down version of
* enter_nolinking() to keep an exiting thread from deadlocking with flushing. */
void
enter_threadexit(dcontext_t *dcontext)
{
per_thread_t *pt = (per_thread_t *) dcontext->fragment_field;
/*case 7966: has no pt, no flushing either */
if (RUNNING_WITHOUT_CODE_CACHE() || pt == NULL/*PR 536058: no pt*/)
return;
mutex_lock(&pt->linking_lock);
/* must dec ref count on shared regions before we die */
check_flush_queue(dcontext, NULL);
pt->could_be_linking = false;
if (pt->wait_for_unlink) {
/* make sure don't get into deadlock w/ flusher */
pt->about_to_exit = true; /* let flusher know can ignore us */
signal_event(pt->waiting_for_unlink); /* wake flusher up */
}
mutex_unlock(&pt->linking_lock);
}
/* caller must hold shared_cache_flush_lock */
void
increment_global_flushtime()
{
ASSERT_OWN_MUTEX(true, &shared_cache_flush_lock);
/* reset will turn flushtime_global back to 0, so we schedule one
* when we're approaching overflow
*/
if (flushtime_global == UINT_MAX/2) {
ASSERT_NOT_TESTED(); /* FIXME: add -stress_flushtime_global_max */
SYSLOG_INTERNAL_WARNING("flushtime_global approaching UINT_MAX, resetting");
schedule_reset(RESET_ALL);
}
ASSERT(flushtime_global < UINT_MAX);
/* compiler should 4-byte-align so no cache line crossing
* (asserted in fragment_init()
*/
flushtime_global++;
LOG(GLOBAL, LOG_VMAREAS, 2, "new flush timestamp: %u\n",
flushtime_global);
}
/* The master flusher routines are split into 3:
* stage1) flush_fragments_synch_unlink_priv
* stage2) flush_fragments_unlink_shared
* stage3) flush_fragments_end_synch
* which MUST be used together. They are split to allow the caller to
* perform custom operations at certain synchronization points in the
* middle of flushing without using callback routines. The stages and
* points are:
*
* stage1: head synch, priv flushee unlink
* here caller can produce a custom list of fragments to flush while
* all threads are fully synched
* stage2: shared flushee unlink
*
* between stage2 and stage3, a region flush performs additional actions:
* region flush: exec area lock
* here caller can do custom exec area manipulations
* region flush: exec area unlock
*
* stage3: tail synch
*
* When doing a region flush with no custom region removals (the default is
* removing [base,base+size)), use the routine flush_fragments_and_remove_region().
* When doing a region flush with custom removals, use
* flush_fragments_in_region_start() and flush_fragments_in_region_finish().
* When doing a flush with no region removals at all use flush_fragments_from_region().
*
* The thread requesting a flush must be !couldbelinking and must not
* be holding any locks that are ever grabbed while a thread is
* couldbelinking (pretty much all locks), as it must wait for other
* threads who are couldbelinking. The thread_initexit_lock is held
* from stage 1 to stage 3 (if there are fragments to flush), and
* region flushing also holds the executable_areas lock between stage
* 2 and stage 3. After stage 1 no thread is couldbelinking() until
* the synchronization release in stage 3.
*
* The general delayed-deletion flushing strategy involves first
* freezing the threads via the thread_initexit_lock.
* It then walks the thread list and marks all vm areas that overlap
* base...base+size as to-be-deleted, along with unlinking all fragments in those
* vm areas and unlinking shared_syscall for that thread.
* Fragments in the target area are not actually deleted until their owning thread
* checks its pending deletion queue, which it does prior to entering the fcache.
* Also flushes shared fragments, in a similar manner, with deletion delayed
* until all threads are out of the shared cache.
*/
/* Variables shared between the 3 flush stage routines
* flush_fragments_synch_unlink_priv(),
* flush_fragments_unlink_shared(), and flush_fragments_end_synch.
* As there can only be one flush at a time, we only need one copy,
* protected by the thread_initexit_lock.
*/
/* Not persistent across code cache execution, so unprotected */
DECLARE_NEVERPROT_VAR(static thread_record_t **flush_threads, NULL);
DECLARE_NEVERPROT_VAR(static int flush_num_threads, 0);
DECLARE_NEVERPROT_VAR(static int pending_delete_threads, 0);
DECLARE_NEVERPROT_VAR(static int shared_flushed, 0);
DECLARE_NEVERPROT_VAR(static bool flush_synchall, false);
#ifdef DEBUG
DECLARE_NEVERPROT_VAR(static int num_flushed, 0);
DECLARE_NEVERPROT_VAR(static int flush_last_stage, 0);
#endif
static void
flush_fragments_free_futures(app_pc base, size_t size)
{
int i;
dcontext_t *tgt_dcontext;
ASSERT_OWN_MUTEX(true, &thread_initexit_lock);
ASSERT((allsynch_flusher == NULL && flusher == get_thread_private_dcontext()) ||
(flusher == NULL && allsynch_flusher == get_thread_private_dcontext()));
ASSERT(flush_num_threads > 0);
ASSERT(flush_threads != NULL);
if (DYNAMO_OPTION(free_unmapped_futures) && !RUNNING_WITHOUT_CODE_CACHE()) {
/* We need to free the futures after all fragments have been unlinked,
* as unlinking will create new futures
*/
acquire_recursive_lock(&change_linking_lock);
for (i=0; i<flush_num_threads; i++) {
tgt_dcontext = flush_threads[i]->dcontext;
if (tgt_dcontext != NULL) {
fragment_delete_futures_in_region(tgt_dcontext, base, base + size);
thcounter_range_remove(tgt_dcontext, base, base + size);
}
}
if (SHARED_FRAGMENTS_ENABLED())
fragment_delete_futures_in_region(GLOBAL_DCONTEXT, base, base + size);
release_recursive_lock(&change_linking_lock);
} /* else we leak them */
}
/* This routine begins a flush that requires full thread synch: currently,
* it is used for flushing coarse-grain units and for dr_flush_region()
*/
static void
flush_fragments_synchall_start(dcontext_t *ignored, app_pc base, size_t size,
bool exec_invalid)
{
dcontext_t *my_dcontext = get_thread_private_dcontext();
app_pc exec_start = NULL, exec_end = NULL;
bool all_synched = true;
int i;
const thread_synch_state_t desired_state =
THREAD_SYNCH_SUSPENDED_VALID_MCONTEXT_OR_NO_XFER;
DEBUG_DECLARE(bool ok;)
KSTART(synchall_flush);
LOG(GLOBAL, LOG_FRAGMENT, 2,
"\nflush_fragments_synchall_start: thread "TIDFMT" suspending all threads\n",
get_thread_id());
STATS_INC(flush_synchall);
/* suspend all DR-controlled threads at safe locations */
DEBUG_DECLARE(ok =)
synch_with_all_threads(desired_state, &flush_threads, &flush_num_threads,
THREAD_SYNCH_NO_LOCKS_NO_XFER,
/* if we fail to suspend a thread (e.g., for
* privilege reasons), ignore it since presumably
* the failed thread is some injected thread not
* running to-be-flushed code, so we continue
* with the flush!
*/
THREAD_SYNCH_SUSPEND_FAILURE_IGNORE);
ASSERT(ok);
/* now we own the thread_initexit_lock */
ASSERT(OWN_MUTEX(&all_threads_synch_lock) && OWN_MUTEX(&thread_initexit_lock));
/* We do NOT set flusher as is_self_flushing() is all about
* couldbelinking, while our synch is of a different nature. The
* trace_abort() is_self_flushing() check is the only worrisome
* one: FIXME.
*/
ASSERT(flusher == NULL);
ASSERT(allsynch_flusher == NULL);
allsynch_flusher = my_dcontext;
flush_synchall = true;
ASSERT(flush_last_stage == 0);
DODEBUG({ flush_last_stage = 1; });
LOG(GLOBAL, LOG_FRAGMENT, 2,
"flush_fragments_synchall_start: walking the threads\n");
/* We rely on coarse fragments not touching more than one vmarea region
* for our ibl invalidation. It's
* ok to invalidate more than we need to so we don't care if there are
* multiple coarse units within this range. We just need the exec areas
* bounds that overlap the flush region.
*/
if (!executable_area_overlap_bounds(base, base+size, &exec_start, &exec_end,
0, true/*doesn't matter w/ 0*/)) {
/* caller checks for overlap but lock let go so can get here; go ahead
* and do synch per flushing contract.
*/
exec_start = base;
exec_end = base+size;
}
LOG(GLOBAL, LOG_FRAGMENT, 2,
"flush_fragments_synchall_start: from "PFX"-"PFX" => coarse "PFX"-"PFX"\n",
base, base+size, exec_start, exec_end);
/* FIXME: share some of this code that I duplicated from reset */
for (i = 0; i < flush_num_threads; i++) {
dcontext_t *dcontext = flush_threads[i]->dcontext;
if (dcontext != NULL) { /* include my_dcontext here */
DEBUG_DECLARE(uint removed;)
LOG(GLOBAL, LOG_FRAGMENT, 2,
"\tconsidering thread #%d "TIDFMT"\n", i, flush_threads[i]->id);
if (dcontext != my_dcontext) {
/* must translate BEFORE freeing any memory! */
if (!thread_synch_successful(flush_threads[i])) {
/* FIXME case 9480: if we do get here for low-privilege handles
* or exceeding our synch try count, best to not move the thread
* as we won't get a clean translation. Chances are it is
* not in the region being flushed.
*/
SYSLOG_INTERNAL_ERROR_ONCE("failed to synch with thread during "
"synchall flush");
LOG(THREAD, LOG_FRAGMENT|LOG_SYNCH, 2,
"failed to synch with thread #%d\n", i);
STATS_INC(flush_synchall_fail);
all_synched = false;
} else if (is_thread_currently_native(flush_threads[i])) {
/* native_exec's regain-control point is in our DLL,
* and lost-control threads are truly native, so no
* state to worry about except for hooks -- and we're
* not freeing the interception buffer.
*/
LOG(GLOBAL, LOG_FRAGMENT, 2,
"\tcurrently native so no translation needed\n");
} else if (thread_synch_state_no_xfer(dcontext)) {
/* Case 6821: do not translate other synch-all-thread users.
* They have no fragment state, so leave alone.
* All flush call points are in syscalls or from nudges.
* Xref case 8901 on ensuring nudges don't try to return
* to the code cache.
*/
LOG(GLOBAL, LOG_FRAGMENT, 2,
"\tat THREAD_SYNCH_NO_LOCKS_NO_XFER so no translation needed\n");
STATS_INC(flush_synchall_races);
} else {
translate_from_synchall_to_dispatch(flush_threads[i], desired_state);
}
}
if (dcontext == my_dcontext || thread_synch_successful(flush_threads[i])) {
last_exit_deleted(dcontext);
/* case 7394: need to abort other threads' trace building
* since the reset xfer to dispatch will disrupt it.
* also, with PR 299808, we now have thread-shared
* "undeletable" trace-temp fragments, so we need to abort
* all traces.
*/
if (is_building_trace(dcontext)) {
LOG(THREAD, LOG_FRAGMENT, 2,
"\tsquashing trace of thread #%d\n", i);
trace_abort(dcontext);
}
}
/* Since coarse fragments never cross coarse/non-coarse executable region
* bounds, we can bound their bodies by taking
* executable_area_distinct_bounds(). This lets us remove by walking the
* ibl tables and looking only at tags, rather than walking the htable of
* each coarse unit. FIXME: not clear this is a perf win: I arbitrarily
* picked it under assumption that ibl tables are relatively small. It
* would be a clearer win if we could do the fine fragments this way
* also, but fine fragments are not constrained and could be missed using
* only a tag-based range remove.
*/
DEBUG_DECLARE(removed =)
fragment_remove_all_ibl_in_region(dcontext, exec_start, exec_end);
LOG(THREAD, LOG_FRAGMENT, 2,
"\tremoved %d ibl entries in "PFX"-"PFX"\n",
removed, exec_start, exec_end);
/* Free any fine private fragments in the region */
vm_area_allsynch_flush_fragments(dcontext, dcontext, base, base+size,
exec_invalid, all_synched/*ignored*/);
if (!SHARED_IBT_TABLES_ENABLED() && SHARED_FRAGMENTS_ENABLED()) {
/* Remove shared fine fragments from private ibl tables */
vm_area_allsynch_flush_fragments(dcontext, GLOBAL_DCONTEXT,
base, base+size,
exec_invalid, all_synched/*ignored*/);
}
}
}
/* Removed shared coarse fragments from ibl tables, before freeing any */
if (SHARED_IBT_TABLES_ENABLED() && SHARED_FRAGMENTS_ENABLED())
fragment_remove_all_ibl_in_region(GLOBAL_DCONTEXT, exec_start, exec_end);
/* Free coarse units and shared fine fragments, as well as removing shared fine
* entries in any shared ibl tables
*/
if (SHARED_FRAGMENTS_ENABLED()) {
vm_area_allsynch_flush_fragments(GLOBAL_DCONTEXT, GLOBAL_DCONTEXT,
base, base+size, exec_invalid, all_synched);
}
}
static void
flush_fragments_synchall_end(dcontext_t *ignored)
{
thread_record_t **temp_threads = flush_threads;
DEBUG_DECLARE(dcontext_t *my_dcontext = get_thread_private_dcontext();)
LOG(GLOBAL, LOG_FRAGMENT, 2,
"flush_fragments_synchall_end: resuming all threads\n");
/* We need to clear this before we release the locks. We use a temp var
* so we can use end_synch_with_all_threads.
*/
flush_threads = NULL;
ASSERT(flusher == NULL);
flush_synchall = false;
ASSERT(dynamo_all_threads_synched);
ASSERT(allsynch_flusher == my_dcontext);
allsynch_flusher = NULL;
end_synch_with_all_threads(temp_threads, flush_num_threads, true/*resume*/);
KSTOP(synchall_flush);
}
/* This routine begins a flush of the group of fragments in the memory
* region [base, base+size) by synchronizing with each thread and unlinking
* all private fragments in the region.
*
* The exec_invalid parameter must be set to indicate whether the
* executable area is being invalidated as well or this is just a capacity
* flush (or a flush to change instrumentation).
*
* If size==0 then no unlinking occurs; however, the full synch is
* performed (so the caller should check size if no action is desired on
* zero size).
*
* If size>0 and there is no executable area overlap, then no synch is
* performed and false is returned. The caller must acquire the executable
* areas lock and re-check the overlap if exec area manipulation is to be
* performed. Returns true otherwise.
*/
bool
flush_fragments_synch_unlink_priv(dcontext_t *dcontext, app_pc base, size_t size,
/* WARNING: case 8572: the caller owning this lock
* is incompatible w/ suspend-the-world flushing!
*/
bool own_initexit_lock, bool exec_invalid,
bool force_synchall _IF_DGCDIAG(app_pc written_pc))
{
dcontext_t *tgt_dcontext;
per_thread_t *tgt_pt;
int i;
LOG(THREAD, LOG_FRAGMENT, 2,
"FLUSH STAGE 1: synch_unlink_priv(thread "TIDFMT" flushtime %d): "PFX"-"PFX"\n",
dcontext->owning_thread, flushtime_global, base, base + size);
/* Case 9750: to specify a region of size 0, do not pass in NULL as the base!
* Use EMPTY_REGION_{BASE,SIZE} instead.
*/
ASSERT(base != NULL || size != 0);
/* our flushing design requires that flushers are NOT couldbelinking
* and are not holding any locks
*/
ASSERT(!is_self_couldbelinking());
#if defined(DEADLOCK_AVOIDANCE) && defined(DEBUG)
if (own_initexit_lock) {
/* We can get here while holding the all_threads_synch_lock
* for detach (& prob. TerminateProcess(0) with other threads
* still active) on XP and 2003 via os_thread_stack_exit()
* (for other thread exit) if we have executed off that stack.
* Can be seen with Araktest detach on violation using the stack
* attack button. */
ASSERT(thread_owns_first_or_both_locks_only(dcontext,
&thread_initexit_lock,
&all_threads_synch_lock));
} else {
ASSERT_OWN_NO_LOCKS();
}
#endif
ASSERT(dcontext == get_thread_private_dcontext());
/* quick check for overlap first by using read lock and avoiding
* thread_initexit_lock:
* if no overlap, must hold lock through removal of region
* if overlap, ok to release for a bit
*/
if (size > 0 && !executable_vm_area_executed_from(base, base+size)) {
/* Only a curiosity since we can have a race (not holding exec areas lock) */
ASSERT_CURIOSITY((!SHARED_FRAGMENTS_ENABLED() ||
!thread_vm_area_overlap(GLOBAL_DCONTEXT, base, base+size)) &&
!thread_vm_area_overlap(dcontext, base, base+size));
return false;
}
/* Only a curiosity since we can have a race (not holding exec areas lock) */
ASSERT_CURIOSITY(size == 0 ||
executable_vm_area_overlap(base, base+size, false/*no lock*/));
STATS_INC(num_flushes);
if (force_synchall ||
(size > 0 && executable_vm_area_coarse_overlap(base, base+size))) {
/* Coarse units do not support individual unlinking (though prior to
* freezing they do, we ignore that) and instead require all-thread-synch
* in order to flush. For that we cannot be already holding
* thread_initexit_lock! FIXME case 8572: the only caller who does hold it
* now is os_thread_stack_exit(). For now relying on that stack not
* overlapping w/ any coarse regions.
*/
ASSERT(!own_initexit_lock);
/* The synchall will flush fine as well as coarse so we'll be done */
flush_fragments_synchall_start(dcontext, base, size, exec_invalid);
return true;
}
/* Take a snapshot of the threads in the system.
* Grab the thread lock to prevent threads from being created or
* exited for the duration of this routine
* FIXME -- is that wise? could be a relatively long time!
* It's unlikely a new thread will run code in a region being
* unmapped...but we would like to prevent threads from exiting
* while we're messing with their data.
* FIXME: need to special-case every instance where thread_initexit_lock
* is grabbed, to avoid deadlocks! we already do for a thread exiting.
*/
if (!own_initexit_lock)
mutex_lock(&thread_initexit_lock);
ASSERT_OWN_MUTEX(true, &thread_initexit_lock);
flusher = dcontext;
get_list_of_threads(&flush_threads, &flush_num_threads);
ASSERT(flush_last_stage == 0);
DODEBUG({ flush_last_stage = 1; });
/* FIXME: we can optimize this even more to not grab thread_initexit_lock */
if (RUNNING_WITHOUT_CODE_CACHE()) /* case 7966: nothing to flush, ever */
return true;
/* Set the ref count of threads who may be using a deleted fragment. We
* include ourselves in the ref count as we could be invoked with a cache
* return point before any synch (as done w/ hotpatches) and should NOT
* consider ourselves done w/ all cache fragments right now. We assume we
* will soon hit a real synch point to dec the count for us, and don't try
* to specially handle the single-threaded case here.
*/
pending_delete_threads = flush_num_threads;
DODEBUG({ num_flushed = 0; });
#ifdef WINDOWS
/* Make sure exit fcache if currently inside syscall. For thread-shared
* shared_syscall, we re-link after the shared fragments have all been
* unlinked and removed from the ibl tables. All we lose is the bounded time
* on a thread checking its private flush queue, which in a thread-shared
* configuration doesn't seem so bad. For thread-private this will work as
* well, with the same time bound lost, but we're not as worried about memory
* usage of thread-private configurations.
*/
if (DYNAMO_OPTION(shared_syscalls) && IS_SHARED_SYSCALL_THREAD_SHARED)
unlink_shared_syscall(GLOBAL_DCONTEXT);
#endif
/* i#849: unlink while we clear out ibt */
if (!special_ibl_xfer_is_thread_private())
unlink_special_ibl_xfer(GLOBAL_DCONTEXT);
for (i=0; i<flush_num_threads; i++) {
tgt_dcontext = flush_threads[i]->dcontext;
tgt_pt = (per_thread_t *) tgt_dcontext->fragment_field;
LOG(THREAD, LOG_FRAGMENT, 2,
" considering thread #%d/%d = "TIDFMT"\n", i+1, flush_num_threads,
flush_threads[i]->id);
ASSERT(is_thread_known(tgt_dcontext->owning_thread));
/* can't do anything, even check if thread has any vm areas overlapping flush
* region, until sure thread is in fcache or somewhere that won't change
* vm areas or linking state
*/
mutex_lock(&tgt_pt->linking_lock);
/* Must explicitly check for self and avoid synch then, o/w will lock up
* if ever called from a could_be_linking location (currently only
* happens w/ app syscalls)
*/
if (tgt_dcontext != dcontext && tgt_pt->could_be_linking) {
/* remember we have a global lock, thread_initexit_lock, so two threads
* cannot be here at the same time!
*/
LOG(THREAD, LOG_FRAGMENT, 2,
"\twaiting for thread "TIDFMT"\n", tgt_dcontext->owning_thread);
tgt_pt->wait_for_unlink = true;
mutex_unlock(&tgt_pt->linking_lock);
wait_for_event(tgt_pt->waiting_for_unlink);
mutex_lock(&tgt_pt->linking_lock);
tgt_pt->wait_for_unlink = false;
LOG(THREAD, LOG_FRAGMENT, 2,
"\tdone waiting for thread "TIDFMT"\n", tgt_dcontext->owning_thread);
} else {
LOG(THREAD, LOG_FRAGMENT, 2,
"\tthread "TIDFMT" synch not required\n", tgt_dcontext->owning_thread);
}
/* it is now safe to access link, vm, and trace info in tgt_dcontext
* => FIXME: rename, since not just accessing linking info?
* FIXME: if includes vm access, are syscalls now bad?
* what about handle_modified_code, considered nolinking since straight
* from cache via fault handler? reading should be ok, but exec area splits
* and whatnot?
*/
if (tgt_pt->about_to_exit) {
/* thread is about to exit, it's waiting for us to give up
* thread_initexit_lock -- we don't need to flush it
*/
goto next_thread;
}
/* if a trace-in-progress crosses this region, must squash the trace
* (all traces are essentially frozen now since threads stop in dispatch)
*/
if (size > 0 /* else, no region to cross */ &&
is_building_trace(tgt_dcontext)) {
void *trace_vmlist = cur_trace_vmlist(tgt_dcontext);
if (trace_vmlist != NULL &&
vm_list_overlaps(tgt_dcontext, trace_vmlist, base, base+size)) {
LOG(THREAD, LOG_FRAGMENT, 2,
"\tsquashing trace of thread "TIDFMT"\n", tgt_dcontext->owning_thread);
trace_abort(tgt_dcontext);
}
}
#ifdef WINDOWS
/* Make sure exit fcache if currently inside syscall. If thread has no
* vm area overlap, will be re-linked down below before going to the
* next thread, unless we have shared fragments, in which case we force
* the thread to check_flush_queue() on its next synch point. It will
* re-link in vm_area_flush_fragments().
*/
if (DYNAMO_OPTION(shared_syscalls) && !IS_SHARED_SYSCALL_THREAD_SHARED)
unlink_shared_syscall(tgt_dcontext);
#endif
/* i#849: unlink while we clear out ibt */
if (special_ibl_xfer_is_thread_private())
unlink_special_ibl_xfer(tgt_dcontext);
/* Optimization for shared deletion strategy: perform flush work
* for a thread waiting at a system call on behalf of that thread
* (which we do before the flush tail synch, as if we did it now we
* would need to inc flushtime_global up front, and then we'd have synch
* issues trying to prevent thread we hadn't synched with from checking
* the pending list before we put flushed fragments on it).
* We do count these threads in the ref count as we check the pending
* queue on their behalf after adding the newly flushed fragments to
* the queue, so the ref count gets decremented right away.
*
* We must do this AFTER unlinking shared_syscall's post-syscall ibl, to
* avoid races -- the thread will hit a real synch point before accessing
* any fragments or link info.
* Do this BEFORE checking whether fragments in region to catch all threads.
*/
ASSERT(!tgt_pt->at_syscall_at_flush);
if (DYNAMO_OPTION(syscalls_synch_flush) && get_at_syscall(tgt_dcontext)) {
/* we have to know exactly which threads were at_syscall here when
* we get to post-flush, so we cache in this special bool
*/
DEBUG_DECLARE(bool tables_updated;)
tgt_pt->at_syscall_at_flush = true;
#ifdef DEBUG
tables_updated =
#endif
update_all_private_ibt_table_ptrs(tgt_dcontext, tgt_pt);
STATS_INC(num_shared_flush_atsyscall);
DODEBUG({
if (tables_updated)
STATS_INC(num_shared_tables_updated_atsyscall);
});
}
/* don't need to go any further if thread has no frags in region */
if (size == 0 || !thread_vm_area_overlap(tgt_dcontext, base, base+size)) {
LOG(THREAD, LOG_FRAGMENT, 2,
"\tthread "TIDFMT" has no fragments in region to flush\n",
tgt_dcontext->owning_thread);
#ifdef WINDOWS
/* restore, since won't be restored in vm_area_flush_fragments */
if (DYNAMO_OPTION(shared_syscalls)) {
if (SHARED_FRAGMENTS_ENABLED()) {
/* we cannot re-link shared_syscall here as that would allow
* the target thread to enter to-be-flushed fragments prior
* to their being unlinked and removed from ibl tables -- so
* we force this thread to re-link in check_flush_queue.
* we could re-link after unlink/removal of fragments,
* if it's worth optimizing == case 6194/PR 210655.
*/
tgt_pt->flush_queue_nonempty = true;
STATS_INC(num_flushq_relink_syscall);
} else if (!IS_SHARED_SYSCALL_THREAD_SHARED) {
/* no shared fragments, and no private ones being flushed,
* so this thread is all set
*/
link_shared_syscall(tgt_dcontext);
}
}
#endif
if (special_ibl_xfer_is_thread_private()) {
if (SHARED_FRAGMENTS_ENABLED()) {
/* see shared_syscall relink comment: we have to delay the relink */
tgt_pt->flush_queue_nonempty = true;
STATS_INC(num_flushq_relink_special_ibl_xfer);
} else
link_special_ibl_xfer(dcontext);
}
goto next_thread;
}
LOG(THREAD, LOG_FRAGMENT, 2, "\tflushing fragments for thread "TIDFMT"\n",
flush_threads[i]->id);
DOLOG(2, LOG_FRAGMENT, {
if (tgt_dcontext != dcontext) {
LOG(tgt_dcontext->logfile, LOG_FRAGMENT, 2,
"thread "TIDFMT" is flushing our fragments\n",
dcontext->owning_thread);
}
});
if (size > 0) {
/* unlink all frags in overlapping regions, and mark regions for deletion */
tgt_pt->flush_queue_nonempty = true;
#ifdef DEBUG
num_flushed +=
#endif
vm_area_unlink_fragments(tgt_dcontext, base, base + size, 0
_IF_DGCDIAG(written_pc));
}
next_thread:
/* for thread-shared, we CANNOT let any thread become could_be_linking, for normal
* flushing synch -- for thread-private, we can, but we CANNOT let any thread
* that we've already synched with for flushing go and change the exec areas
* vector! the simplest solution is to have thread-private, like thread-shared,
* stop all threads at a cache exit point.
* FIXME: optimize thread-private by allowing already-synched threads to
* continue but not grab executable_areas lock, while letting
* to-be-synched threads grab the lock (otherwise could wait forever)
*/
/* Since we must let go of lock for proper nesting, we use a new
* synch to stop threads at cache exit, since we need them all
* out of DR for duration of shared flush.
*/
if (tgt_dcontext != dcontext && !tgt_pt->could_be_linking)
tgt_pt->wait_for_unlink = true; /* stop at cache exit */
mutex_unlock(&tgt_pt->linking_lock);
}
return true;
}
/* This routine continues a flush of one of two groups of fragments:
* 1) if list!=NULL, the list of shared fragments beginning at list and
* chained by next_vmarea (we assume that private fragments are
* easily deleted by the owning thread and do not require a flush).
* Caller should call vm_area_remove_fragment() for each target fragment,
* building a custom list chained by next_vmarea, and pass that list to this
* routine.
* 2) otherwise, all fragments in the memory region [base, base+size).
*
* This routine MUST be called after flush_fragments_synch_unlink_priv(), and
* must be followed with flush_fragments_end_synch().
*/
void
flush_fragments_unlink_shared(dcontext_t *dcontext, app_pc base, size_t size,
fragment_t *list _IF_DGCDIAG(app_pc written_pc))
{
/* we would assert that size > 0 || list != NULL but we have to pass
* in 0 and NULL for unit flushing when no live fragments are in the unit
*/
LOG(THREAD, LOG_FRAGMENT, 2,
"FLUSH STAGE 2: unlink_shared(thread "TIDFMT"): flusher is "TIDFMT"\n",
dcontext->owning_thread, (flusher == NULL) ? -1 : flusher->owning_thread);
ASSERT_OWN_MUTEX(true, &thread_initexit_lock);
ASSERT(flush_threads != NULL);
ASSERT(flush_num_threads > 0);
ASSERT(flush_last_stage == 1);
DODEBUG({ flush_last_stage = 2; });
if (RUNNING_WITHOUT_CODE_CACHE()) /* case 7966: nothing to flush, ever */
return;
if (flush_synchall) /* no more flush work to do */
return;
if (SHARED_FRAGMENTS_ENABLED()) {
/* Flushing shared fragments: the strategy is again immediate
* unlinking (plus hashtable removal and vm area list removal)
* with delayed deletion. Unlinking is atomic, and hashtable
* removal is for now since there are no shared bbs in ibls from
* the cache. But deletion needs to ensure that every thread who may
* hold a pointer to a shared fragment or may be inside a shared
* fragment is ok with the fragment being deleted. We use a reference count
* for each flushed region to ensure that every thread has reached a synch
* point before we actually free the fragments in the region.
*
* We do have pathological cases where a single thread at a wait syscall or
* an infinite loop in the cache will prevent all freeing. One solution
* to the syscall problem is to store a flag saying that a thread is at
* the shared_syscall routine. That routine is private and unlinked so
* we know the thread will hit a synch routine if it exits, so no race.
* We then wouldn't bother to inc the ref count for that thread.
*
* Our fallback proactive deletion is to do a full reset when the size
* of the pending deletion list gets too long, though we could be more
* efficient by only resetting the pending list, and by only suspending
* those threads that haven't dec-ed a ref count in a particular region.
*/
LOG(THREAD, LOG_FRAGMENT, 2, " flushing shared fragments\n");
if (DYNAMO_OPTION(shared_deletion)) {
/* We use shared_cache_flush_lock to make atomic the increment of
* flushtime_global and the adding of pending deletion fragments with
* that flushtime, wrt other threads checking the pending list.
*/
mutex_lock(&shared_cache_flush_lock);
}
/* Increment flush count for shared deletion algorithm and for list-based
* flushing (such as for shared cache units). We could wait
* and only do this if we actually flush shared fragments, but it's
* simpler when done in this routine, and nearly every flush does flush
* shared fragments.
*/
increment_global_flushtime();
/* Both vm_area_unlink_fragments and unlink_fragments_for_deletion call
* back to flush_invalidate_ibl_shared_target to remove shared
* fragments from private/shared ibl tables
*/
if (list == NULL) {
shared_flushed =
vm_area_unlink_fragments(GLOBAL_DCONTEXT, base, base + size,
pending_delete_threads _IF_DGCDIAG(written_pc));
} else {
shared_flushed = unlink_fragments_for_deletion(GLOBAL_DCONTEXT, list,
pending_delete_threads);
}
if (DYNAMO_OPTION(shared_deletion))
mutex_unlock(&shared_cache_flush_lock);
DODEBUG({
num_flushed += shared_flushed;
if (shared_flushed > 0)
STATS_INC(num_shared_flushes);
});
}
#ifdef WINDOWS
/* Re-link thread-shared shared_syscall */
if (DYNAMO_OPTION(shared_syscalls) && IS_SHARED_SYSCALL_THREAD_SHARED)
link_shared_syscall(GLOBAL_DCONTEXT);
#endif
if (!special_ibl_xfer_is_thread_private())
link_special_ibl_xfer(GLOBAL_DCONTEXT);
STATS_ADD(num_flushed_fragments, num_flushed);
DODEBUG({
if (num_flushed > 0) {
LOG(THREAD, LOG_FRAGMENT, 1, "Flushed %5d fragments from "PFX"-"PFX"\n",
num_flushed, base, ((char *)base)+size);
} else {
STATS_INC(num_empty_flushes);
LOG(THREAD, LOG_FRAGMENT, 2, "Flushed 0 fragments from "PFX"-"PFX"\n",
base, ((char *)base)+size);
}
});
}
/* Invalidates (does not remove) shared fragment f from the private/shared
* ibl tables. Can only be called in flush stage 2.
*/
void
flush_invalidate_ibl_shared_target(dcontext_t *dcontext, fragment_t *f)
{
ASSERT(is_self_flushing());
ASSERT(!flush_synchall);
ASSERT_OWN_MUTEX(true, &thread_initexit_lock);
ASSERT(flush_threads != NULL);
ASSERT(flush_num_threads > 0);
ASSERT(flush_last_stage == 2);
ASSERT(TEST(FRAG_SHARED, f->flags));
/*case 7966: has no pt, no flushing either */
if (RUNNING_WITHOUT_CODE_CACHE()) {
ASSERT_NOT_REACHED(); /* shouldn't get here */
return;
}
if (!SHARED_IB_TARGETS())
return;
if (SHARED_IBT_TABLES_ENABLED()) {
/* Remove from shared ibl tables.
* dcontext need not be GLOBAL_DCONTEXT.
*/
fragment_prepare_for_removal(dcontext, f);
} else {
/* We can't tell afterward which entries to remove from the private
* ibl tables, as we no longer keep fragment_t* pointers (we used to
* look for FRAG_WAS_DELETED) and we can't do a range remove (tags
* don't map regularly to regions). So we must invalidate each
* fragment as we process it. It's ok to walk the thread list
* here since we're post-synch for all threads.
*/
int i;
for (i=0; i<flush_num_threads; i++) {
fragment_prepare_for_removal(flush_threads[i]->dcontext, f);
}
}
}
/* Must ONLY be called as the third part of flushing (after
* flush_fragments_synch_unlink_priv() and flush_fragments_unlink_shared()).
* Uses the static shared variables flush_threads and flush_num_threads.
*/
void
flush_fragments_end_synch(dcontext_t *dcontext, bool keep_initexit_lock)
{
dcontext_t *tgt_dcontext;
per_thread_t *tgt_pt;
int i;
LOG(THREAD, LOG_FRAGMENT, 2,
"FLUSH STAGE 3: end_synch(thread "TIDFMT"): flusher is "TIDFMT"\n",
dcontext->owning_thread, (flusher == NULL) ? -1 : flusher->owning_thread);
if (!is_self_flushing() && !flush_synchall/* doesn't set flusher */) {
LOG(THREAD, LOG_FRAGMENT, 2, "\tnothing was flushed\n");
ASSERT_DO_NOT_OWN_MUTEX(!keep_initexit_lock, &thread_initexit_lock);
ASSERT_OWN_MUTEX(keep_initexit_lock, &thread_initexit_lock);
return;
}
ASSERT_OWN_MUTEX(true, &thread_initexit_lock);
ASSERT(flush_threads != NULL);
ASSERT(flush_num_threads > 0);
ASSERT(flush_last_stage == 2);
DODEBUG({ flush_last_stage = 0; });
if (flush_synchall) {
flush_fragments_synchall_end(dcontext);
return;
}
/* now can let all threads at DR synch point go
* FIXME: if implement thread-private optimization above, this would turn into
* re-setting exec areas lock to treat all threads uniformly
*/
for (i=flush_num_threads-1; i>=0; i--) {
/*case 7966: has no pt, no flushing either */
if (RUNNING_WITHOUT_CODE_CACHE())
continue;
tgt_dcontext = flush_threads[i]->dcontext;
tgt_pt = (per_thread_t *) tgt_dcontext->fragment_field;
/* re-acquire lock */
mutex_lock(&tgt_pt->linking_lock);
/* Optimization for shared deletion strategy: perform flush work
* for a thread waiting at a system call, as we didn't add it to the
* ref count in the pre-flush synch.
* We assume that shared_syscall is still unlinked at this point and
* will not be relinked until we let the thread go.
*/
if (DYNAMO_OPTION(syscalls_synch_flush) && tgt_pt->at_syscall_at_flush) {
/* Act on behalf of the thread as though it's at a synch point, but
* only wrt shared fragments (we don't free private fragments here,
* though we could -- should we? may make flush time while holding
* lock take too long? FIXME)
* Currently this works w/ syscalls from dispatch, and w/
* -shared_syscalls by using unprotected storage (thus a slight hole
* but very hard to exploit for security purposes: can get stale code
* executed, but we already have that window, or crash us).
* FIXME: Does not work w/ -ignore_syscalls, but those are private
* for now.
*/
DEBUG_DECLARE(uint pre_flushtime = flushtime_global;)
vm_area_check_shared_pending(tgt_dcontext, NULL);
/* lazy deletion may inc flushtime_global, so may have a higher
* value than our cached one, but should never be lower
*/
ASSERT(tgt_pt->flushtime_last_update >= pre_flushtime);
tgt_pt->at_syscall_at_flush = false;
}
if (tgt_dcontext != dcontext) {
if (tgt_pt->could_be_linking) {
signal_event(tgt_pt->finished_with_unlink);
} else {
/* we don't need to wait on a !could_be_linking thread
* so we use this bool to tell whether we should signal
* the event.
* FIXME: really we want a pulse that wakes ALL waiting
* threads and then resets the event!
*/
tgt_pt->wait_for_unlink = false;
if (tgt_pt->soon_to_be_linking)
signal_event(tgt_pt->finished_all_unlink);
}
}
mutex_unlock(&tgt_pt->linking_lock);
}
/* thread init/exit can proceed now */
flusher = NULL;
global_heap_free(flush_threads, flush_num_threads*sizeof(thread_record_t*)
HEAPACCT(ACCT_THREAD_MGT));
flush_threads = NULL;
if (!keep_initexit_lock)
mutex_unlock(&thread_initexit_lock);
}
/* This routine performs flush stages 1 and 2 (synch_unlink_priv()
* and unlink_shared()) and then returns after grabbing the
* executable_areas lock so that removal of this area from the global list
* is atomic w/ the flush and local removals, before letting the threads go
* -- we return while holding both the thread_initexit_lock and the exec
* areas lock
*
* FIXME: this only helps thread-shared: for thread-private, should let
* already-synched threads continue but not grab executable_areas lock, while
* letting to-be-synched threads grab the lock.
*
* returning while holding locks isn't ideal -- but other choices are worse:
* 1) grab exec areas lock and then let go of initexit lock -- bad nesting
* 2) do all work here, which has to include:
* A) remove region (nearly all uses) == flush_fragments_and_remove_region
* B) change region from rw to r (splitting if nec) (vmareas.c app rw->r)
* C) restore writability and change to selfmod (splitting if nec)
* (vmareas.c handle_modified_code)
* D) remove region and then look up a certain pc and return whether
* flushing overlapped w/ it (vmareas.c handle_modified_code)
*/
void
flush_fragments_in_region_start(dcontext_t *dcontext, app_pc base, size_t size,
bool own_initexit_lock, bool free_futures,
bool exec_invalid, bool force_synchall
_IF_DGCDIAG(app_pc written_pc))
{
KSTART(flush_region);
while (true) {
if (flush_fragments_synch_unlink_priv(dcontext, base, size, own_initexit_lock,
exec_invalid, force_synchall
_IF_DGCDIAG(written_pc))) {
break;
} else {
/* grab lock and then re-check overlap */
executable_areas_lock();
if (!executable_vm_area_executed_from(base, base+size)) {
LOG(THREAD, LOG_FRAGMENT, 2,
"\tregion not executable, so no fragments to flush\n");
/* caller will release lock! */
STATS_INC(num_noncode_flushes);
return;
}
/* unlock and try again */
executable_areas_unlock();
}
}
flush_fragments_unlink_shared(dcontext, base, size, NULL
_IF_DGCDIAG(written_pc));
/* We need to free the futures after all fragments have been unlinked */
if (free_futures) {
flush_fragments_free_futures(base, size);
}
executable_areas_lock();
}
/* must ONLY be called as the second half of flush_fragments_in_region_start().
* uses the shared variables flush_threads and flush_num_threads
*/
void
flush_fragments_in_region_finish(dcontext_t *dcontext, bool keep_initexit_lock)
{
/* done w/ exec areas lock; also free any non-executed coarse units */
free_nonexec_coarse_and_unlock();
flush_fragments_end_synch(dcontext, keep_initexit_lock);
KSTOP(flush_region);
}
/* flush and remove region from exec list, atomically */
void
flush_fragments_and_remove_region(dcontext_t *dcontext, app_pc base, size_t size,
bool own_initexit_lock, bool free_futures)
{
flush_fragments_in_region_start(dcontext, base, size, own_initexit_lock,
free_futures, true/*exec invalid*/,
false/*don't force synchall*/ _IF_DGCDIAG(NULL));
/* ok to call on non-exec region, so don't need to test return value
* both flush routines will return quickly if nothing to flush/was flushed
*/
remove_executable_region(base, size, true/*have lock*/);
flush_fragments_in_region_finish(dcontext, own_initexit_lock);
/* verify initexit lock is in the right state */
ASSERT_OWN_MUTEX(own_initexit_lock, &thread_initexit_lock);
ASSERT_DO_NOT_OWN_MUTEX(!own_initexit_lock, &thread_initexit_lock);
}
/* Flushes fragments from the region without any changes to the exec list.
* Does not free futures and caller can't be holding the initexit lock.
* FIXME - add argument parameters (free futures etc.) as needed. */
void
flush_fragments_from_region(dcontext_t *dcontext, app_pc base, size_t size,
bool force_synchall)
{
/* we pass false to flush_fragments_in_region_start() below for owning the initexit
* lock */
ASSERT_DO_NOT_OWN_MUTEX(true, &thread_initexit_lock);
/* ok to call on non-exec region, so don't need to test return value
* both flush routines will return quickly if nothing to flush/was flushed */
flush_fragments_in_region_start(dcontext, base, size, false /*don't own initexit*/,
false/*don't free futures*/, false/*exec valid*/,
force_synchall _IF_DGCDIAG(NULL));
flush_fragments_in_region_finish(dcontext, false);
}
/* Invalidate all fragments in all caches. Currently executed
* fragments may be alive until they reach an exit.
*
* Note not necessarily immediately freeing memory like
* fcache_reset_all_caches_proactively().
*/
void
invalidate_code_cache()
{
dcontext_t *dcontext = get_thread_private_dcontext();
LOG(GLOBAL, LOG_FRAGMENT, 2, "invalidate_code_cache()\n");
flush_fragments_in_region_start(dcontext, UNIVERSAL_REGION_BASE,
UNIVERSAL_REGION_SIZE,
false, true /* remove futures */,
false /*exec valid*/, false /*don't force synchall*/
_IF_DGCDIAG(NULL));
flush_fragments_in_region_finish(dcontext, false);
}
/* Flushes all areas stored in the vector toflush.
* Synchronization of toflush is up to caller, but as locks cannot be
* held when flushing, toflush must be thread-private.
* Currently only used for pcache hotp interop (case 9970).
*/
void
flush_vmvector_regions(dcontext_t *dcontext, vm_area_vector_t *toflush,
bool free_futures, bool exec_invalid)
{
vmvector_iterator_t vmvi;
app_pc start, end;
ASSERT(toflush != NULL && !TEST(VECTOR_SHARED, toflush->flags));
ASSERT(!RUNNING_WITHOUT_CODE_CACHE());
ASSERT(DYNAMO_OPTION(coarse_units) && DYNAMO_OPTION(use_persisted)
IF_HOTP(&& DYNAMO_OPTION(hot_patching)));
if (vmvector_empty(toflush))
return;
vmvector_iterator_start(toflush, &vmvi);
while (vmvector_iterator_hasnext(&vmvi)) {
vmvector_iterator_next(&vmvi, &start, &end);
/* FIXME case 10086: optimization: batch the flushes together so we only
* synch once. Currently this is rarely used, and with few areas in the
* vector, so we don't bother. To batch them we'd need to assume the
* worst and do a synchall up front, which could be more costly than 2
* or 3 separate flushes that do not involve coarse code.
*/
ASSERT_OWN_NO_LOCKS();
flush_fragments_in_region_start(dcontext, start, end - start,
false/*no lock*/, free_futures, exec_invalid,
false/*don't force synchall*/ _IF_DGCDIAG(NULL));
flush_fragments_in_region_finish(dcontext, false/*no lock*/);
STATS_INC(num_flush_vmvector);
}
vmvector_iterator_stop(&vmvi);
}
/****************************************************************************/
#if defined(INTERNAL) || defined(CLIENT_INTERFACE)
void
fragment_output(dcontext_t *dcontext, fragment_t *f)
{
ASSERT(!TEST(FRAG_SHARED, f->flags) ||
self_owns_recursive_lock(&change_linking_lock));
if (SHOULD_OUTPUT_FRAGMENT(f->flags)) {
per_thread_t *pt = (dcontext == GLOBAL_DCONTEXT) ? shared_pt :
(per_thread_t *) dcontext->fragment_field;
output_trace(dcontext, pt, f,
IF_DEBUG_ELSE(GLOBAL_STAT(num_fragments),
GLOBAL_STAT(num_traces)+GLOBAL_STAT(num_bbs))+1);
}
}
void
init_trace_file(per_thread_t *pt)
{
if (INTERNAL_OPTION(tracedump_binary)) {
/* first 4 bytes in binary file gives size of linkcounts
* 0 if no linkcounts
*/
tracedump_file_header_t hdr =
{CURRENT_API_VERSION, IF_X64_ELSE(true, false), 0 };
#ifdef PROFILE_LINKCOUNT
if (dynamo_options.profile_counts) {
hdr.linkcount_size = sizeof(linkcount_type_t);
ASSERT_NOT_IMPLEMENTED(false == DYNAMO_OPTION(inline_trace_ibl));
/* Cannot use PROFILE_LINKCOUNT with inline_trace_ibl */
}
#endif
os_write(pt->tracefile, &hdr, sizeof(hdr));
}
}
void
exit_trace_file(per_thread_t *pt)
{
#ifdef PROFILE_LINKCOUNT
if (dynamo_options.tracedump_threshold > 0) {
if (INTERNAL_OPTION(tracedump_binary)) {
os_write(pt->tracefile, &pt->tracedump_num_below_threshold,
sizeof(pt->tracedump_num_below_threshold));
os_write(pt->tracefile, &pt->tracedump_count_below_threshold,
sizeof(pt->tracedump_count_below_threshold));
} else {
print_file(pt->tracefile, "\nTraces below dump threshold of %d: %d\n",
dynamo_options.tracedump_threshold, pt->tracedump_num_below_threshold);
print_file(pt->tracefile, "Total count below dump threshold: "
LINKCOUNT_FORMAT_STRING"\n", pt->tracedump_count_below_threshold);
}
}
#endif
close_log_file(pt->tracefile);
}
/* Binary trace dump is used to save time and space.
* The format is in fragment.h.
* FIXME: add general symbol table support to disassembly?
* We'd dump num_targets, then fcache_enter, ibl, trace_cache_incr addrs?
* Reader would then add exit stubs & other traces to table?
* But links will target cache pcs...
*/
#define TRACEBUF_SIZE 2048
#define TRACEBUF_MAKE_ROOM(p, buf, sz) do { \
if (p+sz >= &buf[TRACEBUF_SIZE]) { \
os_write(pt->tracefile, buf, (p - buf)); \
p = buf; \
} \
} while (0)
static void
output_trace_binary(dcontext_t *dcontext, per_thread_t *pt, fragment_t *f,
stats_int_t trace_num)
{
/* FIXME:
* We do not support PROFILE_RDTSC or various small fields
*/
/* FIXME: should allocate buffer elsewhere */
byte buf[TRACEBUF_SIZE];
byte *p = buf;
trace_only_t *t = TRACE_FIELDS(f);
linkstub_t *l;
tracedump_trace_header_t hdr = {
(int) trace_num, f->tag, f->start_pc, f->prefix_size, 0, f->size,
(INTERNAL_OPTION(tracedump_origins) ? t->num_bbs : 0),
IF_X64_ELSE(!TEST(FRAG_32_BIT, f->flags), false),
};
tracedump_stub_data_t stub;
/* Should we widen the identifier? */
IF_X64(ASSERT(CHECK_TRUNCATE_TYPE_int(trace_num)));
for (l = FRAGMENT_EXIT_STUBS(f); l != NULL; l = LINKSTUB_NEXT_EXIT(l))
hdr.num_exits++;
TRACEBUF_MAKE_ROOM(p, buf, sizeof(hdr));
*((tracedump_trace_header_t *)p) = hdr;
p += sizeof(hdr);
if (INTERNAL_OPTION(tracedump_origins)) {
uint i;
for (i=0; i<t->num_bbs; i++) {
instr_t *inst;
instrlist_t *ilist;
int size = 0;
TRACEBUF_MAKE_ROOM(p, buf, sizeof(app_pc));
*((app_pc *)p) = t->bbs[i].tag;
p += sizeof(app_pc);
/* we assume that the target is readable, since we dump prior
* to unloading of modules on flush events
*/
ilist = build_app_bb_ilist(dcontext, t->bbs[i].tag, INVALID_FILE);
for (inst = instrlist_first(ilist); inst; inst = instr_get_next(inst)) {
size += instr_length(dcontext, inst);
}
TRACEBUF_MAKE_ROOM(p, buf, sizeof(int));
*((int *)p) = size;
p += sizeof(int);
for (inst = instrlist_first(ilist); inst; inst = instr_get_next(inst)) {
TRACEBUF_MAKE_ROOM(p, buf, instr_length(dcontext, inst));
/* PR 302353: we can't use instr_encode() as it will
* try to re-relativize rip-rel instrs, which may fail
*/
ASSERT(instr_get_raw_bits(inst) != NULL);
memcpy(p, instr_get_raw_bits(inst), instr_length(dcontext, inst));
p += instr_length(dcontext, inst);
}
/* free the instrlist_t elements */
instrlist_clear_and_destroy(dcontext, ilist);
}
}
ASSERT(SEPARATE_STUB_MAX_SIZE == DIRECT_EXIT_STUB_SIZE(0));
for (l = FRAGMENT_EXIT_STUBS(f); l != NULL; l = LINKSTUB_NEXT_EXIT(l)) {
cache_pc stub_pc = EXIT_STUB_PC(dcontext, f, l);
stub.cti_offs = l->cti_offset;
stub.stub_pc = stub_pc;
stub.target = EXIT_TARGET_TAG(dcontext, f, l);
stub.linked = TEST(LINK_LINKED, l->flags);
stub.stub_size = EXIT_HAS_STUB(l->flags, f->flags) ?
DIRECT_EXIT_STUB_SIZE(f->flags) :
0 /* no stub neededd: -no_indirect_stubs */;
ASSERT(DIRECT_EXIT_STUB_SIZE(f->flags) <= SEPARATE_STUB_MAX_SIZE);
TRACEBUF_MAKE_ROOM(p, buf, STUB_DATA_FIXED_SIZE);
memcpy(p, &stub, STUB_DATA_FIXED_SIZE);
p += STUB_DATA_FIXED_SIZE;
#ifdef PROFILE_LINKCOUNT
if (dynamo_options.profile_counts) {
*((linkcount_type_t *)p) = l->count;
p += sizeof(linkcount_type_t);
}
#endif
if (TEST(LINK_SEPARATE_STUB, l->flags) && stub_pc != NULL) {
TRACEBUF_MAKE_ROOM(p, buf, DIRECT_EXIT_STUB_SIZE(f->flags));
ASSERT(stub_pc < f->start_pc || stub_pc >= f->start_pc+f->size);
memcpy(p, stub_pc, DIRECT_EXIT_STUB_SIZE(f->flags));
p += DIRECT_EXIT_STUB_SIZE(f->flags);
} else { /* ensure client's method of identifying separate stubs works */
ASSERT(stub_pc == NULL /* no stub at all */ ||
(stub_pc >= f->start_pc && stub_pc < f->start_pc+f->size));
}
}
if (f->size >= TRACEBUF_SIZE) {
os_write(pt->tracefile, buf, (p - buf));
p = buf;
os_write(pt->tracefile, f->start_pc, f->size);
} else {
/* single syscall for all but largest traces */
TRACEBUF_MAKE_ROOM(p, buf, f->size);
memcpy(p, f->start_pc, f->size);
p += f->size;
os_write(pt->tracefile, buf, (p - buf));
p = buf;
}
}
/* Output the contents of the specified trace.
* Does full disassembly of every instruction.
* If deleted_at != -1, it is taken to be the fragment id that
* caused the flushing of this fragment from the cache
* If f is shared, pt argument must be shared_pt, and caller must hold
* the change_linking_lock.
*/
static void
output_trace(dcontext_t *dcontext, per_thread_t *pt, fragment_t *f,
stats_int_t deleted_at)
{
trace_only_t *t = TRACE_FIELDS(f);
#ifdef WINDOWS
char buf[MAXIMUM_PATH];
#endif
stats_int_t trace_num;
bool locked_vmareas = false, ok;
dr_isa_mode_t old_mode;
ASSERT(SHOULD_OUTPUT_FRAGMENT(f->flags));
ASSERT(TEST(FRAG_IS_TRACE, f->flags));
ASSERT(!TEST(FRAG_SELFMOD_SANDBOXED, f->flags)); /* no support for selfmod */
/* already been output? caller should check this flag, just like trace flag */
ASSERT(!TEST(FRAG_TRACE_OUTPUT, f->flags));
ASSERT(!TEST(FRAG_SHARED, f->flags) ||
self_owns_recursive_lock(&change_linking_lock));
f->flags |= FRAG_TRACE_OUTPUT;
#ifdef PROFILE_LINKCOUNT
if (dynamo_options.tracedump_threshold > 0) {
linkcount_type_t count = get_total_linkcount(f);
if (count < (linkcount_type_t) dynamo_options.tracedump_threshold) {
pt->tracedump_num_below_threshold++;
pt->tracedump_count_below_threshold += count;
return;
}
}
#endif
LOG(THREAD, LOG_FRAGMENT, 4, "output_trace: F%d("PFX")\n", f->id, f->tag);
/* Recreate in same mode as original fragment */
ok = dr_set_isa_mode(dcontext, FRAG_ISA_MODE(f->flags), &old_mode);
ASSERT(ok);
/* xref 8131/8202 if dynamo_resetting we don't need to grab the tracedump
* mutex to ensure we're the only writer and grabbing here on reset path
* can lead to a rank-order violation. */
if (!dynamo_resetting) {
/* We must grab shared_vm_areas lock first to avoid rank order (i#1157) */
locked_vmareas = acquire_vm_areas_lock_if_not_already(dcontext, FRAG_SHARED);
mutex_lock(&tracedump_mutex);
}
trace_num = tcount;
tcount++;
if (!TEST(FRAG_SHARED, f->flags)) {
/* No lock is needed because we use thread-private files.
* If dumping traces for a different thread (dynamo_other_thread_exit
* for ex.) caller is responsible for the necessary synchronization. */
ASSERT(pt != shared_pt);
if (!dynamo_resetting) {
mutex_unlock(&tracedump_mutex);
if (locked_vmareas) {
locked_vmareas = false;
release_vm_areas_lock(dcontext, FRAG_SHARED);
}
}
} else {
ASSERT(pt == shared_pt);
}
/* binary dump requested? */
if (INTERNAL_OPTION(tracedump_binary)) {
output_trace_binary(dcontext, pt, f, trace_num);
goto output_trace_done;
}
/* just origins => just bb tags in text */
if (INTERNAL_OPTION(tracedump_origins) &&
!INTERNAL_OPTION(tracedump_text)) {
uint i;
print_file(pt->tracefile, "Trace %d\n", tcount);
#ifdef DEBUG
print_file(pt->tracefile, "Fragment %d\n", f->id);
#endif
for (i = 0; i < t->num_bbs; i++) {
print_file(pt->tracefile, "\tbb %d = "PFX"\n", i, t->bbs[i].tag);
}
print_file(pt->tracefile, "\n");
goto output_trace_done;
}
/* full text dump */
print_file(pt->tracefile, "=============================================="
"=============================\n\n");
print_file(pt->tracefile, "TRACE # %d\n", tcount);
#ifdef DEBUG
print_file(pt->tracefile, "Fragment # %d\n", f->id);
#endif
print_file(pt->tracefile, "Tag = "PFX"\n", f->tag);
print_file(pt->tracefile, "Thread = %d\n", get_thread_id());
if (deleted_at > -1) {
print_file(pt->tracefile,
"*** Flushed from cache when top fragment id was %d\n",
deleted_at);
}
#ifdef WINDOWS
/* FIXME: for fragments flushed by unloaded modules, naturally we
* won't be able to get the module name b/c by now it is unloaded,
* the only fix is to keep a listing of mapped modules and only
* unmap in our listing at this point
*/
get_module_name(f->tag, buf, sizeof(buf));
if (buf[0] != '\0')
print_file(pt->tracefile, "Module of basic block 0 = %s\n", buf);
else
print_file(pt->tracefile, "Module of basic block 0 = <unknown>\n");
#endif
#if defined(INTERNAL) || defined(CLIENT_INTERFACE)
if (INTERNAL_OPTION(tracedump_origins)) {
uint i;
print_file(pt->tracefile, "\nORIGINAL CODE:\n");
for (i = 0; i < t->num_bbs; i++) {
/* we only care about the printing that the build routine does */
print_file(pt->tracefile, "basic block # %d: ", i);
/* we assume that the target is readable, since we dump prior
* to unloading of modules on flush events
*/
ASSERT(is_readable_without_exception(t->bbs[i].tag, sizeof(t->bbs[i])));
disassemble_app_bb(dcontext, t->bbs[i].tag, pt->tracefile);
}
print_file(pt->tracefile, "END ORIGINAL CODE\n\n");
}
#endif
#ifdef PROFILE_RDTSC
if (dynamo_options.profile_times) {
/* Cycle counts are too high due to extra instructions needed
* to save regs and store time, so we subtract a constant for
* each fragment entry.
* Experiments using large-iter loops show that:
* empty loop overhead = 2 cycles
* 2 pushes, 2 moves, 2 pops = 7 cycles - 2 cycles = 5 cycles
* add in 1 rdtsc = 38 cycles - 2 cycles = 36 cycles
* add in 2 rdtsc = 69 cycles - 2 cycles = 67 cycles
* If we assume the time transfer happens in the middle of the rdtsc, we
* should use 36 cycles.
*/
trace_only_t *tr = TRACE_FIELDS(f);
const int adjustment = 37;
uint64 real_time = tr->total_time;
uint64 temp;
uint time_top, time_bottom;
print_file(pt->tracefile, "Size = %d (+ %d for profiling)\n",
f->size-profile_call_size(), profile_call_size());
print_file(pt->tracefile, "Profiling:\n");
print_file(pt->tracefile, "\tcount = "UINT64_FORMAT_STRING"\n", tr->count);
print_file(pt->tracefile, "\tmeasured cycles = "PFX"\n", real_time);
temp = tr->count * (uint64)adjustment;
if (real_time < temp) {
print_file(pt->tracefile,
"\t ERROR: adjustment too large, cutting off at 0, should use < %d\n",
(int)(real_time / tr->count));
real_time = 0;
} else {
real_time -= temp;
}
print_file(pt->tracefile, "\tadjusted cycles = "PFX"\n", real_time);
divide_uint64_print(real_time, kilo_hertz, false, 6,
&time_top, &time_bottom);
print_file(pt->tracefile, "\ttime = %u.%.6u ms\n",
time_top, time_bottom);
} else {
print_file(pt->tracefile, "Size = %d\n", f->size);
}
#else
print_file(pt->tracefile, "Size = %d\n", f->size);
#endif /* PROFILE_RDTSC */
#ifdef PROFILE_LINKCOUNT
if (dynamo_options.profile_counts) {
int num_exits = 0;
linkstub_t *ls;
print_file(pt->tracefile, "Exit stubs:\n");
for (ls = FRAGMENT_EXIT_STUBS(f); ls; ls = LINKSTUB_NEXT_EXIT(ls)) {
int id = -1;
app_pc target;
if ((ls->flags & LINK_INDIRECT) != 0) {
target = 0;
id = -1;
} else {
# ifdef DEBUG
fragment_t *targetf = fragment_lookup(dcontext,
EXIT_TARGET_TAG(dcontext, f, ls));
id = (targetf != NULL) ? targetf->id : -1;
# else
id = -1;
# endif
target = EXIT_TARGET_TAG(dcontext, f, ls);
}
print_file(pt->tracefile,
"\t#%d: target = "PFX" (F#%d), count = "
LINKCOUNT_FORMAT_STRING "%s\n",
num_exits, target, id, ls->count,
((ls->flags & LINK_LINKED) != 0) ? ", linked" : "");
num_exits++;
}
# ifdef SIDELINE_COUNT_STUDY
if (dynamo_options.sideline && t->count_old_pre > (linkcount_type_t)0) {
print_file(pt->tracefile, "Sideline: pre-opt count = "
LINKCOUNT_FORMAT_STRING "\n",
t->count_old_pre);
print_file(pt->tracefile, "\tpost-opt count = "
LINKCOUNT_FORMAT_STRING "\n",
t->count_old_post);
print_file(pt->tracefile, "\tnew trace count = "
LINKCOUNT_FORMAT_STRING "\n",
get_total_linkcount(f));
}
# endif
}
#endif /* PROFILE_LINKCOUNT */
#if defined(INTERNAL) || defined(CLIENT_INTERFACE)
print_file(pt->tracefile, "Body:\n");
disassemble_fragment_body(dcontext, f, pt->tracefile);
print_file(pt->tracefile, "END TRACE %d\n\n", tcount);
#endif
output_trace_done:
dr_set_isa_mode(dcontext, old_mode, NULL);
if (TEST(FRAG_SHARED, f->flags) && !dynamo_resetting) {
ASSERT_OWN_MUTEX(true, &tracedump_mutex);
mutex_unlock(&tracedump_mutex);
if (locked_vmareas)
release_vm_areas_lock(dcontext, FRAG_SHARED);
} else {
ASSERT_DO_NOT_OWN_MUTEX(true, &tracedump_mutex);
}
}
#endif /* defined(INTERNAL) || defined(CLIENT_INTERFACE) */
/****************************************************************************/
#ifdef PROFILE_RDTSC
/* This routine is called at the start of every trace
* it assumes the caller has saved the caller-saved registers
* (eax, ecx, edx)
*
* See insert_profile_call() for the assembly code prefix that calls
* this routine. The end time is computed in the assembly code and passed
* in to have as accurate times as possible (don't want the profiling overhead
* added into the times). Also, the assembly code computes the new start
* time after this routine returns.
*
* We could generate a custom copy of this routine
* for every thread, but we don't do that now -- don't need to.
*/
void
profile_fragment_enter(fragment_t *f, uint64 end_time)
{
dcontext_t *dcontext;
#ifdef WINDOWS
/* must get last error prior to getting dcontext */
int error_code = get_last_error();
#endif
trace_only_t *t = TRACE_FIELDS(f);
/* this is done in assembly: uint64 end_time = get_time() */
dcontext = get_thread_private_dcontext();
/* increment this fragment's execution count */
t->count++;
/***********************************************************************/
/* all-in-cache sequence profiling */
/* top ten cache times */
dcontext->cache_frag_count++;
/***********************************************************************/
/* we rely on dispatch being the only way to enter the fcache.
* dispatch sets prev_fragment to null prior to entry */
if (dcontext->prev_fragment != NULL) {
trace_only_t *last_t = TRACE_FIELDS(dcontext->prev_fragment);
ASSERT((dcontext->prev_fragment->flags & FRAG_IS_TRACE) != 0);
/* charge time to last fragment */
last_t->total_time += (end_time - dcontext->start_time);
}
/* set up for next fragment */
dcontext->prev_fragment = f;
/* this is done in assembly: dcontext->start_time = get_time() */
#ifdef WINDOWS
/* retore app's error code */
set_last_error(error_code);
#endif
}
/* this routine is called from dispatch after exiting the fcache
* it finishes up the final fragment's time slot
*/
void
profile_fragment_dispatch(dcontext_t *dcontext)
{
/* end time slot ASAP, should we try to move this to assembly routines exiting
* the cache? probably not worth it, in a long-running program shouldn't
* be exiting the cache that often */
uint64 end_time = get_time();
bool tagtable = LINKSTUB_INDIRECT(dcontext->last_exit->flags);
if (dcontext->prev_fragment != NULL && (dcontext->prev_fragment->flags & FRAG_IS_TRACE) != 0) {
/* end time slot, charge time to last fragment
* there's more overhead here than other time endings, so subtract
* some time off. these numbers are pretty arbitrary:
*/
trace_only_t *last_t = TRACE_FIELDS(dcontext->prev_fragment);
const uint64 adjust = (tagtable) ? 72 : 36;
uint64 add = end_time - dcontext->start_time;
if (add < adjust) {
SYSLOG_INTERNAL_ERROR("ERROR: profile_fragment_dispatch: add was %d, tagtable %d",
(int)add, tagtable);
add = 0;
} else
add -= adjust;
ASSERT((dcontext->prev_fragment->flags & FRAG_IS_TRACE) != 0);
last_t->total_time += add;
}
}
#endif /* PROFILE_RDTSC */
/*******************************************************************************
* COARSE-GRAIN FRAGMENT HASHTABLE INSTANTIATION
*/
/* Synch model: the htable lock should be held during all
* app_to_cache_t manipulations, to be consistent (not strictly
* necessary as there are no removals from the htable except in
* all-thread-synch flushes). Copies of a looked-up cache_pc are safe
* to use w/o the htable lock, or any lock -- they're pointers into
* cache units, and there are no deletions of cache units, except in
* all-thread-synch flushes.
*/
/* 2 macros w/ name and types are duplicated in fragment.h -- keep in sync */
#define NAME_KEY coarse
#define ENTRY_TYPE app_to_cache_t
static app_to_cache_t a2c_empty = { NULL, NULL };
static app_to_cache_t a2c_sentinel = { /*assume invalid*/(app_pc)PTR_UINT_MINUS_1,
NULL };
/* FIXME: want to inline the app_to_cache_t struct just like lookuptable
* does and use it for main table -- no support for that right now
*/
/* not defining HASHTABLE_USE_LOOKUPTABLE */
#define ENTRY_TAG(f) ((ptr_uint_t)(f).app)
#define ENTRY_EMPTY (a2c_empty)
#define ENTRY_SENTINEL (a2c_sentinel)
#define ENTRY_IS_EMPTY(f) ((f).app == a2c_empty.app)
#define ENTRY_IS_SENTINEL(f) ((f).app == a2c_sentinel.app)
#define ENTRY_IS_INVALID(f) (false) /* no invalid entries */
#define ENTRIES_ARE_EQUAL(t,f,g) ((f).app == (g).app)
#define HASHTABLE_WHICH_HEAP(flags) (ACCT_FRAG_TABLE)
/* note that we give the th table a lower-ranked coarse_th_htable_rwlock */
#define COARSE_HTLOCK_RANK coarse_table_rwlock /* for use after hashtablex.h */
#define HTLOCK_RANK COARSE_HTLOCK_RANK
#define HASHTABLE_SUPPORT_PERSISTENCE 1
#include "hashtablex.h"
/* All defines are undef-ed at end of hashtablex.h
* Would be nice to re-use ENTRY_IS_EMPTY, etc., though w/ multiple htables
* in same file can't realistically get away w/o custom defines like these:
*/
#define A2C_ENTRY_IS_EMPTY(a2c) ((a2c).app == NULL)
#define A2C_ENTRY_IS_SENTINEL(a2c) ((a2c).app == a2c_sentinel.app)
#define A2C_ENTRY_IS_REAL(a2c) (!A2C_ENTRY_IS_EMPTY(a2c) && !A2C_ENTRY_IS_SENTINEL(a2c))
/* required routines for hashtable interface that we don't need for this instance */
static void
hashtable_coarse_init_internal_custom(dcontext_t *dcontext, coarse_table_t *htable)
{ /* nothing */
}
static void
hashtable_coarse_resized_custom(dcontext_t *dcontext, coarse_table_t *htable,
uint old_capacity, app_to_cache_t *old_table,
app_to_cache_t *old_table_unaligned,
uint old_ref_count, uint old_table_flags)
{ /* nothing */
}
# ifdef DEBUG
static void
hashtable_coarse_study_custom(dcontext_t *dcontext, coarse_table_t *htable,
uint entries_inc/*amnt table->entries was pre-inced*/)
{ /* nothing */
}
# endif
static void
hashtable_coarse_free_entry(dcontext_t *dcontext, coarse_table_t *htable,
app_to_cache_t entry)
{
/* nothing to do, data is inlined */
}
/* i#670: To handle differing app addresses from different module
* bases across different executions, we store the persist-time abs
* addrs in our tables and always shift on lookup. For frozen exit
* stubs, we have the frozen fcache return convert to a cur-base abs
* addr so this shift will then restore to persist-time.
*
* XXX: this is needed for -persist_trust_textrel, but for Windows
* bases will only be different when we need to apply relocations, and
* there we could just add an extra relocation per exit stub, which
* would end up being lower overhead for long-running apps, but may be
* higher than the overhead of shifting for short-running. For now,
* I'm enabling this as cross-platform, and we can do perf
* measurements later if desired.
*
* Storing persist-time abs addr versus storing module offsets: note
* that whatever we do we want a frozen-unit-only solution so we don't
* want to, say, always store module offsets for non-frozen units.
* This is because we support mixing coarse and fine and we have to
* use abs addrs in fine tables b/c they're not per-module. We could
* store offsets in frozen only, but then we'd have to do extra work
* when freezing. Plus, by storing absolute, when the module base
* lines up we can avoid the shift on the exit stubs (although today
* we only avoid exit stub overhead for trace heads when the base
* matches).
*/
static inline app_to_cache_t
coarse_lookup_internal(dcontext_t *dcontext, app_pc tag, coarse_table_t *table)
{
/* note that for mod_shift we don't need to compare to bounds b/c
* this is a table for this module only
*/
app_to_cache_t a2c =
hashtable_coarse_lookup(dcontext, (ptr_uint_t)(tag + table->mod_shift), table);
if (table->mod_shift != 0 && A2C_ENTRY_IS_REAL(a2c))
a2c.app -= table->mod_shift;
return a2c;
}
/* I would #define hashtable_coarse_lookup as DO_NOT_USE but we have to use for
* pclookup
*/
/* Pass 0 for the initial capacity to use the default.
* Initial capacities are number of entires and NOT bits in mask or anything.
*/
void
fragment_coarse_htable_create(coarse_info_t *info, uint init_capacity,
uint init_th_capacity)
{
coarse_table_t *th_htable;
coarse_table_t *htable;
uint init_size;
ASSERT(SHARED_FRAGMENTS_ENABLED());
/* Case 9537: If we start the new table small and grow it we have large
* collision chains as we map the lower address space of a large table into
* the same lower fraction of a smaller table, so we create our table fully
* sized up front.
*/
if (init_capacity != 0) {
init_size = hashtable_bits_given_entries(init_capacity,
DYNAMO_OPTION(coarse_htable_load));
} else
init_size = INIT_HTABLE_SIZE_COARSE;
LOG(GLOBAL, LOG_FRAGMENT, 2, "Coarse %s htable %d capacity => %d bits\n",
info->module, init_capacity, init_size);
htable = NONPERSISTENT_HEAP_TYPE_ALLOC(GLOBAL_DCONTEXT, coarse_table_t,
ACCT_FRAG_TABLE);
hashtable_coarse_init(GLOBAL_DCONTEXT, htable, init_size,
DYNAMO_OPTION(coarse_htable_load),
(hash_function_t)INTERNAL_OPTION(alt_hash_func),
0 /* hash_mask_offset */,
HASHTABLE_ENTRY_SHARED | HASHTABLE_SHARED |
HASHTABLE_RELAX_CLUSTER_CHECKS
_IF_DEBUG("coarse htable"));
htable->mod_shift = 0;
info->htable = (void *) htable;
/* We could create th_htable lazily independently of htable but not worth it */
if (init_th_capacity != 0) {
init_size = hashtable_bits_given_entries(init_th_capacity,
DYNAMO_OPTION(coarse_th_htable_load));
} else
init_size = INIT_HTABLE_SIZE_COARSE_TH;
LOG(GLOBAL, LOG_FRAGMENT, 2, "Coarse %s th htable %d capacity => %d bits\n",
info->module, init_th_capacity, init_size);
th_htable = NONPERSISTENT_HEAP_TYPE_ALLOC(GLOBAL_DCONTEXT, coarse_table_t,
ACCT_FRAG_TABLE);
hashtable_coarse_init(GLOBAL_DCONTEXT, th_htable, init_size,
DYNAMO_OPTION(coarse_th_htable_load),
(hash_function_t)INTERNAL_OPTION(alt_hash_func),
0 /* hash_mask_offset */,
HASHTABLE_ENTRY_SHARED | HASHTABLE_SHARED |
HASHTABLE_RELAX_CLUSTER_CHECKS
_IF_DEBUG("coarse th htable"));
th_htable->mod_shift = 0;
/* We give th table a lower lock rank for coarse_body_from_htable_entry().
* FIXME: add param to init() that takes in lock rank?
*/
ASSIGN_INIT_READWRITE_LOCK_FREE(th_htable->rwlock, coarse_th_table_rwlock);
info->th_htable = (void *) th_htable;
}
/* Adds all entries from stable into dtable, offsetting by dst_cache_offset,
* which is the offset from dst->cache_start_pc at which
* the src cache has been placed.
*/
static void
fragment_coarse_htable_merge_helper(dcontext_t *dcontext,
coarse_info_t *dst, coarse_table_t *dtable,
coarse_info_t *src, coarse_table_t *stable,
ssize_t dst_cache_offset)
{
app_to_cache_t a2c, look_a2c;
uint i;
/* assumption: dtable is private to this thread and so does not need synch */
DODEBUG({ dtable->is_local = true; });
TABLE_RWLOCK(stable, read, lock);
for (i = 0; i < stable->capacity; i++) {
a2c = stable->table[i];
if (A2C_ENTRY_IS_REAL(a2c)) {
look_a2c = coarse_lookup_internal(dcontext, a2c.app, dtable);
if (A2C_ENTRY_IS_EMPTY(look_a2c)) {
a2c.cache += dst_cache_offset;
if (!dst->frozen) { /* adjust absolute value */
ASSERT_NOT_TESTED();
a2c.cache += (dst->cache_start_pc - src->cache_start_pc);
}
hashtable_coarse_add(dcontext, a2c, dtable);
} else {
/* Our merging-with-dups strategy requires that we not
* merge them in this early
*/
ASSERT_NOT_REACHED();
}
}
}
TABLE_RWLOCK(stable, read, unlock);
DODEBUG({ dtable->is_local = false; });
}
/* Merges the main and th htables from info1 and info2 into new htables for dst.
* If !add_info2, makes room for but does not add entries from info2.
* If !add_th_htable, creates but does not add entries to dst->th_htable.
* FIXME: we can't use hashtable_coarse_merge() b/c we don't want to add
* entries from the 2nd table, and we do custom mangling of entries when adding.
* Is it worth parametrizing hashtable_coarse_merge() to share anything?
*/
void
fragment_coarse_htable_merge(dcontext_t *dcontext, coarse_info_t *dst,
coarse_info_t *info1, coarse_info_t *info2,
bool add_info2, bool add_th_htable)
{
coarse_table_t *thht_dst, *thht1, *thht2;
coarse_table_t *ht_dst, *ht1, *ht2;
uint merged_entries = 0;
ASSERT(SHARED_FRAGMENTS_ENABLED());
ASSERT(info1 != NULL && info2 != NULL);
ht1 = (coarse_table_t *) info1->htable;
ht2 = (coarse_table_t *) info2->htable;
thht1 = (coarse_table_t *) info1->th_htable;
thht2 = (coarse_table_t *) info2->th_htable;
ASSERT(dst != NULL && dst->htable == NULL && dst->th_htable == NULL);
/* We go to the trouble of determining non-dup total entries to
* avoid repeatedly increasing htable size on merges and hitting
* collision asserts. FIXME: should we shrink afterward instead?
* Or just ignore collision asserts until at full size?
*/
merged_entries = hashtable_coarse_num_unique_entries(dcontext, ht1, ht2);
STATS_ADD(coarse_merge_dups, ht1->entries + ht2->entries - merged_entries);
LOG(THREAD, LOG_FRAGMENT, 2, "Merging %s: %d + %d => %d (%d unique) entries\n",
info1->module, ht1->entries, ht2->entries, ht1->entries + ht2->entries,
merged_entries);
/* We could instead copy ht1 and then add ht2; for simplicity we re-use
* our create-empty routine and add both
*/
fragment_coarse_htable_create(dst, merged_entries,
/* Heuristic to never over-size, yet try to avoid
* collision asserts while resizing the table;
* FIXME: if we shrink the main htable afterward
* could do the same here and start at sum of
* entries */
MAX(thht1->entries, thht2->entries));
ht_dst = (coarse_table_t *) dst->htable;
thht_dst = (coarse_table_t *) dst->th_htable;
ASSERT(ht_dst != NULL && thht_dst != NULL);
/* For now we only support frozen tables; else will have to change
* the offsets to be stubs for main table and cache for th table
*/
ASSERT(info1->frozen && info2->frozen);
fragment_coarse_htable_merge_helper(dcontext, dst, ht_dst, info1, ht1, 0);
if (add_info2) {
fragment_coarse_htable_merge_helper(dcontext, dst, ht_dst, info2, ht2,
info1->cache_end_pc -
info1->cache_start_pc);
}
if (add_th_htable) {
ASSERT_NOT_TESTED();
fragment_coarse_htable_merge_helper(dcontext, dst, thht_dst, info1, thht1, 0);
fragment_coarse_htable_merge_helper(dcontext, dst, thht_dst, info2, thht2,
/* stubs_end_pc is not allocated end */
info1->mmap_pc + info1->mmap_size -
info1->stubs_start_pc);
}
}
#ifndef DEBUG
/* not in header file unless debug, in which case it's static */
static void
coarse_body_from_htable_entry(dcontext_t *dcontext, coarse_info_t *info,
app_pc tag, cache_pc res,
cache_pc *stub_pc_out/*OUT*/,
cache_pc *body_pc_out/*OUT*/);
#endif
static void
study_and_free_coarse_htable(coarse_info_t *info, coarse_table_t *htable,
bool never_persisted _IF_DEBUG(const char *name))
{
LOG(GLOBAL, LOG_FRAGMENT, 1, "Coarse %s %s hashtable stats:\n",
info->module, name);
DOLOG(1, LOG_FRAGMENT|LOG_STATS, {
hashtable_coarse_load_statistics(GLOBAL_DCONTEXT, htable);
});
DODEBUG({
hashtable_coarse_study(GLOBAL_DCONTEXT, htable, 0/*table consistent*/);
});
DOLOG(3, LOG_FRAGMENT, {
hashtable_coarse_dump_table(GLOBAL_DCONTEXT, htable);
});
#ifdef CLIENT_INTERFACE
/* Only raise deletion events if client saw creation events: so no persisted
* units
*/
if (!info->persisted && htable == info->htable &&
dr_fragment_deleted_hook_exists()) {
app_to_cache_t a2c;
uint i;
dcontext_t *dcontext = get_thread_private_dcontext();
cache_pc body = NULL;
TABLE_RWLOCK(htable, read, lock);
for (i = 0; i < htable->capacity; i++) {
a2c = htable->table[i];
if (A2C_ENTRY_IS_REAL(a2c)) {
if (info->frozen)
body = a2c.cache;
else { /* make sure not an entrance stub w/ no body */
coarse_body_from_htable_entry(dcontext, info, a2c.app, a2c.cache,
NULL, &body);
}
if (body != NULL) {
instrument_fragment_deleted(get_thread_private_dcontext(), a2c.app,
FRAGMENT_COARSE_WRAPPER_FLAGS);
}
}
}
TABLE_RWLOCK(htable, read, unlock);
}
#endif
/* entries are inlined so nothing external to free */
if (info->persisted && !never_persisted) {
/* ensure won't try to free (part of mmap) */
ASSERT(htable->table_unaligned == NULL);
}
hashtable_coarse_free(GLOBAL_DCONTEXT, htable);
NONPERSISTENT_HEAP_TYPE_FREE(GLOBAL_DCONTEXT, htable, coarse_table_t, ACCT_FRAG_TABLE);
}
void
fragment_coarse_free_entry_pclookup_table(dcontext_t *dcontext, coarse_info_t *info)
{
if (info->pclookup_htable != NULL) {
ASSERT(DYNAMO_OPTION(coarse_pclookup_table));
study_and_free_coarse_htable(info, (coarse_table_t *) info->pclookup_htable,
true/*never persisted*/ _IF_DEBUG("pclookup"));
info->pclookup_htable = NULL;
}
}
void
fragment_coarse_htable_free(coarse_info_t *info)
{
ASSERT_OWN_MUTEX(!info->is_local, &info->lock);
if (info->htable == NULL) {
/* lazily initialized, so common to have empty units */
ASSERT(info->th_htable == NULL);
ASSERT(info->pclookup_htable == NULL);
return;
}
study_and_free_coarse_htable(info, (coarse_table_t *) info->htable, false
_IF_DEBUG("main"));
info->htable = NULL;
study_and_free_coarse_htable(info, (coarse_table_t *) info->th_htable, false
_IF_DEBUG("tracehead"));
info->th_htable = NULL;
if (info->pclookup_last_htable != NULL) {
generic_hash_destroy(GLOBAL_DCONTEXT, info->pclookup_last_htable);
info->pclookup_last_htable = NULL;
}
fragment_coarse_free_entry_pclookup_table(GLOBAL_DCONTEXT, info);
}
uint
fragment_coarse_num_entries(coarse_info_t *info)
{
coarse_table_t *htable;
ASSERT(info != NULL);
htable = (coarse_table_t *) info->htable;
if (htable == NULL)
return 0;
return htable->entries;
}
/* Add coarse fragment represented by wrapper f to the hashtable
* for unit info.
*/
void
fragment_coarse_add(dcontext_t *dcontext, coarse_info_t *info,
app_pc tag, cache_pc cache)
{
coarse_table_t *htable;
app_to_cache_t a2c = {tag, cache};
ASSERT(info != NULL && info->htable != NULL);
htable = (coarse_table_t *) info->htable;
DOCHECK(1, {
/* We have lock rank order problems b/c this lookup may acquire
* the th table read lock, and we can't split its rank from
* the main table's. So we live w/ a racy assert that could
* have false positives or negatives.
*/
cache_pc stub;
cache_pc body;
/* OK to have dup entries for entrance stubs in other units,
* or a stub already present for a trace head */
fragment_coarse_lookup_in_unit(dcontext, info, tag, &stub, &body);
ASSERT(body == NULL);
ASSERT(stub == NULL ||
coarse_is_trace_head_in_own_unit(dcontext, tag, stub,
/* case 10876: pass what we're adding */
(ptr_uint_t)cache + info->cache_start_pc,
true, info));
/* There can only be one body. But similarly to above,
* fragment_coarse_lookup may look in the secondary unit, so
* we can't do this lookup holding the write lock.
*/
if (!coarse_is_entrance_stub(cache)) {
coarse_info_t *xinfo = get_executable_area_coarse_info(tag);
fragment_t *f;
ASSERT(xinfo != NULL);
/* If an official unit, tag should not exist in any other official unit */
ASSERT((info != xinfo && info != xinfo->non_frozen) ||
fragment_coarse_lookup(dcontext, tag) == NULL);
f = fragment_lookup(dcontext, tag);
/* ok for trace to shadow coarse trace head */
ASSERT(f == NULL || TEST(FRAG_IS_TRACE, f->flags));
}
});
TABLE_RWLOCK(htable, write, lock);
/* Table is not an ibl table so we ignore resize return value */
hashtable_coarse_add(dcontext, a2c, htable);
TABLE_RWLOCK(htable, write, unlock);
#ifdef SHARING_STUDY
if (INTERNAL_OPTION(fragment_sharing_study)) {
ASSERT_NOT_IMPLEMENTED(false && "need to pass f in to add_shared_block");
}
#endif
}
/* Returns the body pc of the coarse trace head fragment corresponding to tag, or
* NULL if not found. Caller must hold the th table's read or write lock!
*/
static cache_pc
fragment_coarse_th_lookup(dcontext_t *dcontext, coarse_info_t *info, app_pc tag)
{
cache_pc res = NULL;
app_to_cache_t a2c;
coarse_table_t *htable;
ASSERT(info != NULL);
ASSERT(info->htable != NULL);
htable = (coarse_table_t *) info->th_htable;
ASSERT(TABLE_PROTECTED(htable));
a2c = coarse_lookup_internal(dcontext, tag, htable);
if (!A2C_ENTRY_IS_EMPTY(a2c)) {
ASSERT(BOOLS_MATCH(info->frozen, info->stubs_start_pc != NULL));
/* for frozen, th_htable only holds stubs */
res = ((ptr_uint_t)a2c.cache) + info->stubs_start_pc;
}
return res;
}
/* Performs two actions while holding the trace head table's write lock,
* making them atomic (solving the race in case 8795):
* 1) unlinks the coarse fragment's entrance pc and points it at the
* trace head exit routine;
* 2) adds the coarse fragment's body pc to the trace head hashtable.
*
* Case 8628: if we split the main htable into stubs and bodies then
* we can eliminate the th htable as it will be part of the body table.
*
* We could merge this info in to the thcounter table, and assume that
* most trace heads are coarse so we're not wasting much memory with
* the extra field. But then we have to walk entire stub table on
* cache flush and individually clear body pcs from monitor table, so
* better to have own th table?
*/
void
fragment_coarse_th_unlink_and_add(dcontext_t *dcontext, app_pc tag,
cache_pc stub_pc, cache_pc body_pc)
{
ASSERT(stub_pc != NULL);
if (body_pc != NULL) {
/* trace head is in this unit, so we have to add it to our th htable */
coarse_info_t *info = get_fcache_coarse_info(body_pc);
coarse_table_t *th_htable;
app_to_cache_t a2c = {tag, body_pc};
ASSERT(info != NULL && info->th_htable != NULL);
ASSERT(!info->frozen);
th_htable = (coarse_table_t *) info->th_htable;
TABLE_RWLOCK(th_htable, write, lock);
ASSERT(fragment_coarse_th_lookup(dcontext, info, tag) == NULL);
unlink_entrance_stub(dcontext, stub_pc, FRAG_IS_TRACE_HEAD, info);
/* Table is not an ibl table so we ignore resize return value */
hashtable_coarse_add(dcontext, a2c, th_htable);
TABLE_RWLOCK(th_htable, write, unlock);
LOG(THREAD, LOG_FRAGMENT, 4,
"adding to coarse th table for %s: "PFX"->"PFX"\n",
info->module, tag, body_pc);
} else {
/* ensure lives in another unit */
ASSERT(fragment_coarse_lookup(dcontext, tag) != stub_pc);
unlink_entrance_stub(dcontext, stub_pc, FRAG_IS_TRACE_HEAD, NULL);
}
}
/* Only use when building up a brand-new table. Otherwise use
* fragment_coarse_th_unlink_and_add().
*/
void
fragment_coarse_th_add(dcontext_t *dcontext, coarse_info_t *info,
app_pc tag, cache_pc cache)
{
coarse_table_t *th_htable;
app_to_cache_t a2c = {tag, cache};
ASSERT(info != NULL && info->th_htable != NULL);
ASSERT(info->frozen); /* only used when merging units */
th_htable = (coarse_table_t *) info->th_htable;
TABLE_RWLOCK(th_htable, write, lock);
ASSERT(fragment_coarse_th_lookup(dcontext, info, tag) == NULL);
/* Table is not an ibl table so we ignore resize return value */
hashtable_coarse_add(dcontext, a2c, th_htable);
TABLE_RWLOCK(th_htable, write, unlock);
}
/* The input here is the result of a lookup in the main htable.
* For a frozen unit, this actually looks up the stub pc since res is
* always the body pc.
* For a non-frozen unit this determines where to obtain the body pc.
* FIXME: case 8628 will simplify this whole thing
*/
/* exported only for assert in push_pending_freeze() */
IF_DEBUG_ELSE(,static) void
coarse_body_from_htable_entry(dcontext_t *dcontext, coarse_info_t *info,
app_pc tag, cache_pc res,
cache_pc *stub_pc_out/*OUT*/,
cache_pc *body_pc_out/*OUT*/)
{
cache_pc stub_pc = NULL, body_pc = NULL;
/* Should be passing absolute pc, not offset */
ASSERT(!info->frozen || res == NULL || res >= info->cache_start_pc);
if (info->frozen) {
/* We still need the stub table for lazy linking and for
shifting links from a trace head to a trace.
*/
body_pc = res;
if (stub_pc_out != NULL) {
TABLE_RWLOCK((coarse_table_t *) info->th_htable, read, lock);
stub_pc = fragment_coarse_th_lookup(dcontext, info, tag);
TABLE_RWLOCK((coarse_table_t *) info->th_htable, read, unlock);
}
} else {
/* In a non-frozen unit, htable entries are always stubs */
DOCHECK(CHKLVL_DEFAULT+1, { ASSERT(coarse_is_entrance_stub(res)); });
stub_pc = res;
if (body_pc_out != NULL) {
/* keep the th table entry and stub link status linked atomically */
TABLE_RWLOCK((coarse_table_t *) info->th_htable, read, lock);
body_pc = fragment_coarse_th_lookup(dcontext, info, tag);
if (body_pc != NULL) {
/* We can't just check coarse_is_trace_head(res) because a
* shadowed trace head is directly linked to its trace. The
* trace has lookup order precedence, but the head must show up
* if asked about here!
*/
/* FIXME: we could have a flags OUT param and set FRAG_IS_TRACE_HEAD
* to help out fragment_lookup_fine_and_coarse*().
*/
} else {
if (entrance_stub_linked(res, info)) {
/* We only want to set body_pc if it is present in this unit */
cache_pc tgt = entrance_stub_jmp_target(res);
if (get_fcache_coarse_info(tgt) == info)
body_pc = tgt;
else
body_pc = NULL;
} else
body_pc = NULL;
DOCHECK(CHKLVL_DEFAULT+1, {
ASSERT(!coarse_is_trace_head(res) ||
/* allow targeting trace head in another unit */
body_pc == NULL);
});
}
TABLE_RWLOCK((coarse_table_t *) info->th_htable, read, unlock);
}
}
if (stub_pc_out != NULL)
*stub_pc_out = stub_pc;
if (body_pc_out != NULL)
*body_pc_out = body_pc;
}
/* Coarse fragments have two entrance points: the actual fragment
* body, and the entrance stub that is used for indirection and
* convenient incremental building for intra-unit non-frozen linking
* as well as always used at the source end for inter-unit linking.
* This routine returns both. If the stub_pc returned is non-NULL, it
* only indicates that there is an outgoing link from a fragment
* present in this unit that targets the queried tag (and for
* intra-unit links in a frozen unit, such a link may exist but
* stub_pc will be NULL as the entrance stub indirection will have
* been replaced with a direct link). The fragment corresponding to
* the tag is only present in this unit if the body_pc returned is
* non-NULL.
*/
void
fragment_coarse_lookup_in_unit(dcontext_t *dcontext, coarse_info_t *info, app_pc tag,
/* FIXME: have separate type for stub pc vs body pc? */
cache_pc *stub_pc_out/*OUT*/,
cache_pc *body_pc_out/*OUT*/)
{
cache_pc res = NULL;
cache_pc stub_pc = NULL, body_pc = NULL;
app_to_cache_t a2c;
coarse_table_t *htable;
ASSERT(info != NULL);
if (info->htable == NULL) /* not initialized yet, so no code there */
goto coarse_lookup_return;
htable = (coarse_table_t *) info->htable;
TABLE_RWLOCK(htable, read, lock);
a2c = coarse_lookup_internal(dcontext, tag, htable);
if (!A2C_ENTRY_IS_EMPTY(a2c)) {
LOG(THREAD, LOG_FRAGMENT, 5, "%s: %s %s tag="PFX" => app="PFX" cache="PFX"\n",
__FUNCTION__, info->module, info->frozen ? "frozen" : "",
tag, a2c.app, a2c.cache);
ASSERT(BOOLS_MATCH(info->frozen, info->cache_start_pc != NULL));
/* for frozen, htable only holds body pc */
res = ((ptr_uint_t)a2c.cache) + info->cache_start_pc;
}
if (res != NULL)
coarse_body_from_htable_entry(dcontext, info, tag, res, &stub_pc, &body_pc);
else if (info->frozen) /* need a separate lookup for stub */
coarse_body_from_htable_entry(dcontext, info, tag, res, &stub_pc, &body_pc);
TABLE_RWLOCK(htable, read, unlock);
/* cannot have both a shared coarse and shared fine bb for same tag
* (can have shared trace shadowing shared coarse trace head bb, or
* private bb shadowing shared coarse bb)
*/
ASSERT(body_pc == NULL || fragment_lookup_shared_bb(dcontext, tag) == NULL);
coarse_lookup_return:
if (stub_pc_out != NULL)
*stub_pc_out = stub_pc;
if (body_pc_out != NULL)
*body_pc_out = body_pc;
}
/* Returns the body pc of the coarse fragment corresponding to tag, or
* NULL if not found
*/
cache_pc
fragment_coarse_lookup(dcontext_t *dcontext, app_pc tag)
{
cache_pc res = NULL;
coarse_info_t *info = get_executable_area_coarse_info(tag);
/* We must check each unit in turn */
while (info != NULL) { /* loop over primary and secondary unit */
fragment_coarse_lookup_in_unit(dcontext, info, tag, NULL, &res);
if (res != NULL)
return res;
ASSERT(info->frozen || info->non_frozen == NULL);
info = info->non_frozen;
ASSERT(info == NULL || !info->frozen);
}
return NULL;
}
/* It's up to the caller to hold locks preventing simultaneous writes to wrapper */
void
fragment_coarse_wrapper(fragment_t *wrapper, app_pc tag, cache_pc body_pc)
{
ASSERT(wrapper != NULL);
if (wrapper == NULL)
return;
ASSERT(tag != NULL);
ASSERT(body_pc != NULL);
/* FIXME: fragile to rely on other routines not inspecting other
* fields of fragment_t -- but setting to 0 perhaps no better than garbage
* in that case. We do need to set prefix_size, incoming_stubs, and
* {next,prev}_vmarea to NULL, for sure.
*/
memset(wrapper, 0, sizeof(*wrapper));
wrapper->tag = tag;
wrapper->start_pc = body_pc;
wrapper->flags = FRAGMENT_COARSE_WRAPPER_FLAGS;
/* We don't have stub pc so can't fill in FRAG_IS_TRACE_HEAD -- so we rely
* on callers passing src info to fragment_lookup_fine_and_coarse()
*/
}
/* If finds a coarse fragment for tag, returns wrapper; else returns NULL */
fragment_t *
fragment_coarse_lookup_wrapper(dcontext_t *dcontext, app_pc tag, fragment_t *wrapper)
{
cache_pc coarse;
ASSERT(wrapper != NULL);
coarse = fragment_coarse_lookup(dcontext, tag);
if (coarse != NULL) {
fragment_coarse_wrapper(wrapper, tag, coarse);
return wrapper;
}
return NULL;
}
/* Takes in last_exit in order to mark trace headness.
* FIXME case 8600: should we replace all other lookups with this one?
* Fragile to only have select callers use this and everyone else
* ignore coarse fragments...
*/
fragment_t *
fragment_lookup_fine_and_coarse(dcontext_t *dcontext, app_pc tag,
fragment_t *wrapper, linkstub_t *last_exit)
{
fragment_t *res = fragment_lookup(dcontext, tag);
if (DYNAMO_OPTION(coarse_units)) {
ASSERT(wrapper != NULL);
if (res == NULL) {
res = fragment_coarse_lookup_wrapper(dcontext, tag, wrapper);
/* FIXME: no way to know if source is a fine fragment! */
if (res != NULL && last_exit == get_coarse_trace_head_exit_linkstub())
res->flags |= FRAG_IS_TRACE_HEAD;
} else {
/* cannot have both coarse and fine shared bb for same tag
* (can have shared trace shadowing shared coarse trace head bb,
* or private bb with same tag)
*/
ASSERT(TEST(FRAG_IS_TRACE, res->flags) ||
!TEST(FRAG_SHARED, res->flags) ||
fragment_coarse_lookup(dcontext, tag) == NULL);
}
}
return res;
}
/* Takes in last_exit in order to mark trace headness.
* FIXME: should we replace all other same_sharing lookups with this one?
* Fragile to only have select callers use this and everyone else
* ignore coarse fragments...
*/
fragment_t *
fragment_lookup_fine_and_coarse_sharing(dcontext_t *dcontext, app_pc tag,
fragment_t *wrapper, linkstub_t *last_exit,
uint share_flags)
{
fragment_t *res =
fragment_lookup_same_sharing(dcontext, tag, share_flags);
if (DYNAMO_OPTION(coarse_units) && TEST(FRAG_SHARED, share_flags)) {
ASSERT(wrapper != NULL);
if (res == NULL) {
res = fragment_coarse_lookup_wrapper(dcontext, tag, wrapper);
if (res != NULL && last_exit == get_coarse_trace_head_exit_linkstub())
res->flags |= FRAG_IS_TRACE_HEAD;
}
}
return res;
}
/* Returns the owning unit of f (versus get_executable_area_coarse_info(f->tag)
* which may return a frozen unit that must be checked for f along with its
* secondary unfrozen unit).
*/
coarse_info_t *
get_fragment_coarse_info(fragment_t *f)
{
/* We have multiple potential units per vmarea region, so we use the fcache
* pc to get the unambiguous owning unit
*/
if (!TEST(FRAG_COARSE_GRAIN, f->flags))
return NULL;
ASSERT(FCACHE_ENTRY_PC(f) != NULL);
return get_fcache_coarse_info(FCACHE_ENTRY_PC(f));
}
/* Pass in info if you know it; else this routine will look it up.
* Checks for stub targeting a trace head, or targeting a trace thus
* indicating that this is a shadowed trace head.
* If body_in or info_in is NULL, this routine will look them up.
*/
bool
coarse_is_trace_head_in_own_unit(dcontext_t *dcontext, app_pc tag, cache_pc stub,
cache_pc body_in, bool body_valid,
coarse_info_t *info_in)
{
coarse_info_t *info = info_in;
ASSERT(stub != NULL);
if (coarse_is_trace_head(stub))
return true;
if (info == NULL)
info = get_stub_coarse_info(stub);
if (info == NULL)
return false;
/* If a coarse stub is linked to a fine fragment and there exists
* a body for that target tag in the same unit as the stub, we assume that
* we have a shadowed coarse trace head.
*/
if (entrance_stub_linked(stub, info) &&
/* Target is fine if no info for it */
get_fcache_coarse_info(entrance_stub_jmp_target(stub)) == NULL) {
cache_pc body = body_in;
if (!body_valid) {
ASSERT(body == NULL);
fragment_coarse_lookup_in_unit(dcontext, info, tag, NULL, &body);
}
if (body != NULL)
return true;
}
return false;
}
/* Returns whether an entry exists. */
bool
fragment_coarse_replace(dcontext_t *dcontext, coarse_info_t *info, app_pc tag,
cache_pc new_value)
{
app_to_cache_t old_entry = {tag, NULL/*doesn't matter*/};
app_to_cache_t new_entry = {tag, new_value};
coarse_table_t *htable;
bool res = false;
ASSERT(info != NULL && info->htable != NULL);
htable = (coarse_table_t *) info->htable;
TABLE_RWLOCK(htable, read, lock);
res = hashtable_coarse_replace(old_entry, new_entry, info->htable);
TABLE_RWLOCK(htable, read, unlock);
return res;
}
/**************************************************
* PC LOOKUP
*/
/* Table for storing results of prior coarse pclookups (i#658) */
#define PCLOOKUP_LAST_HTABLE_INIT_SIZE 6 /*should remain small*/
/* Alarm signals can result in many pclookups from a variety of places
* (unlike usual DGC patterns) so we bound the size of the table.
* We want to err on the side of using more memory, since failing to
* cache all the instrs that are writing DGC can result in 2x slowdowns.
* It's not worth fancy replacement: we just clear the table if it gets
* really large and start over. Remember that this is per-coarse-unit.
*/
#define PCLOOKUP_LAST_HTABLE_MAX_ENTRIES 8192
typedef struct _pclookup_last_t {
app_pc tag;
cache_pc entry;
} pclookup_last_t;
static void
pclookup_last_free(pclookup_last_t *last)
{
HEAP_TYPE_FREE(GLOBAL_DCONTEXT, last, pclookup_last_t, ACCT_FRAG_TABLE, PROTECTED);
}
/* Returns the tag for the coarse fragment whose body contains pc.
* If that returned tag != NULL, also returns the body pc in the optional OUT param.
* FIXME: verify not called too often: watch the kstat.
*/
app_pc
fragment_coarse_pclookup(dcontext_t *dcontext, coarse_info_t *info, cache_pc pc,
/*OUT*/cache_pc *body_out)
{
app_to_cache_t a2c;
coarse_table_t *htable;
generic_table_t *pc_htable;
pclookup_last_t *last;
cache_pc body_pc;
ssize_t closest_distance = SSIZE_T_MAX;
app_pc closest = NULL;
uint i;
ASSERT(info != NULL);
if (info->htable == NULL)
return NULL;
KSTART(coarse_pclookup);
htable = (coarse_table_t *) info->htable;
if (info->pclookup_last_htable == NULL) {
/* lazily allocate table of all pclookups to avoid htable walk
* on frequent codemod instrs (i#658).
* I tried using a small array instead but chrome v8 needs at least
* 12 entries, and rather than LFU or LRU to avoid worst-case,
* it seems reasonable to simply store all lookups.
* Then the worst case is some extra memory, not 4x slowdowns.
*/
mutex_lock(&info->lock);
if (info->pclookup_last_htable == NULL) {
/* coarse_table_t isn't quite enough b/c we need the body pc,
* which would require an extra lookup w/ coarse_table_t
*/
pc_htable = generic_hash_create(GLOBAL_DCONTEXT,
PCLOOKUP_LAST_HTABLE_INIT_SIZE,
80 /* load factor: not perf-critical */,
HASHTABLE_ENTRY_SHARED | HASHTABLE_SHARED |
HASHTABLE_RELAX_CLUSTER_CHECKS,
(void(*)(void*)) pclookup_last_free
_IF_DEBUG("pclookup last table"));
/* Only when fully initialized can we set it, as we hold no lock for it */
info->pclookup_last_htable = (void *) pc_htable;
}
mutex_unlock(&info->lock);
}
pc_htable = (generic_table_t *) info->pclookup_last_htable;
ASSERT(pc_htable != NULL);
TABLE_RWLOCK(pc_htable, read, lock);
last = (pclookup_last_t *) generic_hash_lookup(GLOBAL_DCONTEXT, pc_htable,
(ptr_uint_t)pc);
if (last != NULL) {
closest = last->tag;
ASSERT(pc >= last->entry);
closest_distance = pc - last->entry;
}
TABLE_RWLOCK(pc_htable, read, unlock);
if (closest == NULL) {
/* do the htable walk */
TABLE_RWLOCK(htable, read, lock);
for (i = 0; i < htable->capacity; i++) {
a2c = htable->table[i];
/* must check for sentinel */
if (A2C_ENTRY_IS_REAL(a2c)) {
a2c.app -= htable->mod_shift;
ASSERT(BOOLS_MATCH(info->frozen, info->cache_start_pc != NULL));
/* for frozen, htable only holds body pc */
a2c.cache += (ptr_uint_t) info->cache_start_pc;
/* We have no body length so we must walk entire table */
coarse_body_from_htable_entry(dcontext, info, a2c.app,
a2c.cache, NULL, &body_pc);
if (body_pc != NULL &&
body_pc <= pc && (pc - body_pc) < closest_distance) {
closest_distance = pc - body_pc;
closest = a2c.app;
}
}
}
if (closest != NULL) {
/* Update the cache of results. Note that since this is coarse we
* don't have to do anything special to invalidate on codemod as the
* whole coarse unit will be thrown out.
*/
TABLE_RWLOCK(pc_htable, write, lock);
/* Check for race (i#1191) */
last = (pclookup_last_t *) generic_hash_lookup(GLOBAL_DCONTEXT, pc_htable,
(ptr_uint_t)pc);
if (last != NULL) {
closest = last->tag;
ASSERT(pc >= last->entry);
closest_distance = pc - last->entry;
} else {
last = HEAP_TYPE_ALLOC(GLOBAL_DCONTEXT, pclookup_last_t,
ACCT_FRAG_TABLE, PROTECTED);
last->tag = closest;
last->entry = pc - closest_distance;
if (pc_htable->entries >= PCLOOKUP_LAST_HTABLE_MAX_ENTRIES) {
/* See notes above: we don't want an enormous table.
* We just clear rather than a fancy replacement policy.
*/
generic_hash_clear(GLOBAL_DCONTEXT, pc_htable);
}
generic_hash_add(GLOBAL_DCONTEXT, pc_htable, (ptr_uint_t)pc,
(void *)last);
STATS_INC(coarse_pclookup_cached);
}
TABLE_RWLOCK(pc_htable, write, unlock);
}
TABLE_RWLOCK(htable, read, unlock);
}
if (body_out != NULL)
*body_out = pc - closest_distance;
KSTOP(coarse_pclookup);
LOG(THREAD, LOG_FRAGMENT, 4, "%s: "PFX" => "PFX"\n", __FUNCTION__, pc, closest);
return closest;
}
/* Creates a reverse lookup table. For a non-frozen unit, the caller should only
* do this while all threads are suspended, and should free the table before
* resuming other threads.
*/
void
fragment_coarse_create_entry_pclookup_table(dcontext_t *dcontext, coarse_info_t *info)
{
app_to_cache_t main_a2c;
app_to_cache_t pc_a2c;
coarse_table_t *main_htable;
coarse_table_t *pc_htable;
cache_pc body_pc;
uint i;
ASSERT(info != NULL);
if (info->htable == NULL)
return;
if (info->pclookup_htable == NULL) {
mutex_lock(&info->lock);
if (info->pclookup_htable == NULL) {
/* set up reverse lookup table */
main_htable = (coarse_table_t *) info->htable;
pc_htable = NONPERSISTENT_HEAP_TYPE_ALLOC(GLOBAL_DCONTEXT, coarse_table_t,
ACCT_FRAG_TABLE);
hashtable_coarse_init(GLOBAL_DCONTEXT, pc_htable, main_htable->hash_bits,
DYNAMO_OPTION(coarse_pclookup_htable_load),
(hash_function_t)INTERNAL_OPTION(alt_hash_func),
0 /* hash_mask_offset */,
HASHTABLE_ENTRY_SHARED | HASHTABLE_SHARED |
HASHTABLE_RELAX_CLUSTER_CHECKS
_IF_DEBUG("coarse pclookup htable"));
pc_htable->mod_shift = 0;
/* We give pc table a lower lock rank so we can add below
* while holding the lock, though the table is still local
* while we hold info->lock and do not write it out to its
* info-> field, so we could update the add() and
* deadlock-avoidance routines instead (xref case 9522).
* FIXME: add param to init() that takes in lock rank?
*/
ASSIGN_INIT_READWRITE_LOCK_FREE(pc_htable->rwlock,
coarse_pclookup_table_rwlock);
TABLE_RWLOCK(main_htable, read, lock);
TABLE_RWLOCK(pc_htable, write, lock);
for (i = 0; i < main_htable->capacity; i++) {
main_a2c = main_htable->table[i];
/* must check for sentinel */
if (A2C_ENTRY_IS_REAL(main_a2c)) {
main_a2c.app -= main_htable->mod_shift;
ASSERT(BOOLS_MATCH(info->frozen, info->cache_start_pc != NULL));
coarse_body_from_htable_entry(dcontext, info, main_a2c.app,
main_a2c.cache
/* frozen htable only holds body pc */
+ (ptr_uint_t) info->cache_start_pc,
NULL, &body_pc);
if (body_pc != NULL) {
/* We can have two tags with the same cache pc, if
* one is a single jmp that was elided (but we only
* do that if -unsafe_freeze_elide_sole_ubr).
* It's unsafe b/c of case 9677: when translating back
* to app pc we will just take 1st one in linear walk of
* htable, which may be the wrong one!
*/
pc_a2c = hashtable_coarse_lookup(dcontext,
(ptr_uint_t)body_pc, pc_htable);
if (A2C_ENTRY_IS_EMPTY(pc_a2c)) {
pc_a2c.app = body_pc;
pc_a2c.cache = main_a2c.app;
hashtable_coarse_add(dcontext, pc_a2c, pc_htable);
} else {
ASSERT(DYNAMO_OPTION(unsafe_freeze_elide_sole_ubr));
}
}
}
}
TABLE_RWLOCK(pc_htable, write, unlock);
TABLE_RWLOCK(main_htable, read, unlock);
/* Only when fully initialized can we set it, as we hold no lock for it */
info->pclookup_htable = (void *) pc_htable;
}
mutex_unlock(&info->lock);
}
}
/* Returns the tag for the coarse fragment whose body _begins at_ pc */
app_pc
fragment_coarse_entry_pclookup(dcontext_t *dcontext, coarse_info_t *info, cache_pc pc)
{
app_to_cache_t pc_a2c;
coarse_table_t *pc_htable;
cache_pc body_pc;
app_pc res = NULL;
ASSERT(info != NULL);
if (info->htable == NULL)
return NULL;
/* FIXME: we could use this table for non-frozen if we updated it
* when we add to main htable; for now we only support frozen use
*/
if (!DYNAMO_OPTION(coarse_pclookup_table) ||
/* we do create a table for non-frozen while we're freezing (i#735) */
(!info->frozen && info->pclookup_htable == NULL)) {
res = fragment_coarse_pclookup(dcontext, info, pc, &body_pc);
if (body_pc == pc) {
LOG(THREAD, LOG_FRAGMENT, 4, "%s: "PFX" => "PFX"\n", __FUNCTION__, pc, res);
return res;
} else
return NULL;
}
KSTART(coarse_pclookup);
if (info->pclookup_htable == NULL) {
fragment_coarse_create_entry_pclookup_table(dcontext, info);
}
pc_htable = (coarse_table_t *) info->pclookup_htable;
ASSERT(pc_htable != NULL);
TABLE_RWLOCK(pc_htable, read, lock);
pc_a2c = hashtable_coarse_lookup(dcontext, (ptr_uint_t)pc, pc_htable);
if (!A2C_ENTRY_IS_EMPTY(pc_a2c))
res = pc_a2c.cache;
TABLE_RWLOCK(pc_htable, read, unlock);
KSTOP(coarse_pclookup);
LOG(THREAD, LOG_FRAGMENT, 4, "%s: "PFX" => "PFX"\n", __FUNCTION__, pc, res);
return res;
}
/* case 9900: must have dynamo_all_threads_synched since we haven't resolved
* lock rank ordering issues with the hashtable locks
*/
static void
fragment_coarse_entry_freeze(dcontext_t *dcontext, coarse_freeze_info_t *freeze_info,
pending_freeze_t *pending)
{
app_to_cache_t frozen_a2c;
app_to_cache_t looka2c = {0,0};
coarse_table_t *frozen_htable;
cache_pc tgt;
if (pending->entrance_stub) {
frozen_htable = (coarse_table_t *) freeze_info->dst_info->th_htable;
/* case 9900: rank order conflict with coarse_info_incoming_lock:
* do not grab frozen_htable write lock (only need read but we were
* grabbing write to match assert): mark is_local instead and rely
* on dynamo_all_threads_synched
*/
DODEBUG({ frozen_htable->is_local = true; });
ASSERT_NOT_IMPLEMENTED(dynamo_all_threads_synched && "case 9900");
} else {
frozen_htable = (coarse_table_t *) freeze_info->dst_info->htable;
}
ASSERT_OWN_WRITE_LOCK(!frozen_htable->is_local, &frozen_htable->rwlock);
looka2c = coarse_lookup_internal(dcontext, pending->tag, frozen_htable);
if (A2C_ENTRY_IS_EMPTY(looka2c)) {
frozen_a2c.app = pending->tag;
if (pending->entrance_stub) {
/* add inter-unit/trace-head stub to htable */
LOG(THREAD, LOG_FRAGMENT, 4,
" adding pending stub "PFX"."PFX" => "PFX"\n",
pending->tag, pending->cur_pc, freeze_info->stubs_cur_pc);
frozen_a2c.cache = (cache_pc)
(freeze_info->stubs_cur_pc - freeze_info->stubs_start_pc);
hashtable_coarse_add(dcontext, frozen_a2c, frozen_htable);
/* copy to new stubs area */
transfer_coarse_stub(dcontext, freeze_info, pending->cur_pc,
pending->trace_head, true/*replace*/);
} else {
/* fall-through optimization */
if (DYNAMO_OPTION(coarse_freeze_elide_ubr) &&
pending->link_cti_opnd != NULL &&
pending->link_cti_opnd + 4 == freeze_info->cache_cur_pc &&
pending->elide_ubr) {
ASSERT(!pending->trace_head);
LOG(THREAD, LOG_FRAGMENT, 4, " fall-through opt from prev fragment\n");
freeze_info->cache_cur_pc -= JMP_LONG_LENGTH;
pending->link_cti_opnd = NULL;
STATS_INC(coarse_freeze_fallthrough);
DODEBUG({ freeze_info->num_elisions++; });
}
LOG(THREAD, LOG_FRAGMENT, 4,
" adding pending %sfragment "PFX"."PFX" => "PFX"\n",
pending->trace_head ? "trace head " : "",
pending->tag, pending->cur_pc, freeze_info->cache_cur_pc);
/* add to htable the offset from start of cache */
frozen_a2c.cache = (cache_pc)
(freeze_info->cache_cur_pc - freeze_info->cache_start_pc);
hashtable_coarse_add(dcontext, frozen_a2c, frozen_htable);
/* copy to new cache */
transfer_coarse_fragment(dcontext, freeze_info, pending->cur_pc);
}
tgt = frozen_a2c.cache;
} else {
tgt = looka2c.cache;
/* Should not hit any links to TH, so should hit once, from htable walk */
ASSERT(!pending->trace_head || pending->entrance_stub);
/* May have added entrance stub for intra-unit TH as non-TH if it was
* linked to a trace, since didn't have body pc at the time, so we
* fix up here on the proactive add when adding its body
*/
if (pending->entrance_stub && pending->trace_head && freeze_info->unlink) {
cache_pc abs_tgt = tgt + (ptr_uint_t)freeze_info->stubs_start_pc;
transfer_coarse_stub_fix_trace_head(dcontext, freeze_info, abs_tgt);
}
}
if (pending->link_cti_opnd != NULL) {
/* fix up incoming link */
cache_pc patch_tgt = (cache_pc)
(((ptr_uint_t)(pending->entrance_stub ? freeze_info->stubs_start_pc :
freeze_info->cache_start_pc)) + tgt);
ASSERT(!pending->trace_head || pending->entrance_stub);
LOG(THREAD, LOG_FRAGMENT, 4, " patch link "PFX" => "PFX"."PFX"%s\n",
pending->link_cti_opnd, pending->tag, patch_tgt,
pending->entrance_stub ? " stub" : "");
insert_relative_target(pending->link_cti_opnd, patch_tgt, NOT_HOT_PATCHABLE);
}
if (pending->entrance_stub) {
DODEBUG({ frozen_htable->is_local = false; });
}
}
/* There are several strategies for copying each fragment and non-inter-unit stub
* to new, compact storage. Here we use a cache-driven approach, necessarily
* augmented with the htable as we cannot find tags for arbitrary fragments in
* the cache. We use a pending-add stack to avoid a second pass.
* By adding ubr targets last, we can elide fall-through jmps.
* FIXME case 9428:
* - Sorting entrance stubs for faster lazy linking lookup
* - Use 8-bit-relative jmps when possible for compaction, though this
* requires pc translation support and probably an extra pass when freezing.
*
* Tasks:
* Copy each new fragment and non-inter-unit stub to the new region
* Unlink all inter-unit entrance stubs, unless freezing and not
* writing to disk.
* Re-target intra-unit jmps from old entrance stub to new fragment location
* Re-target jmp to stub
* Re-target indirect stubs to new prefix
* Re-target inter-unit and trace head stubs to new prefix
*
* case 9900: must have dynamo_all_threads_synched since we haven't resolved
* lock rank ordering issues with the hashtable locks
*/
void
fragment_coarse_unit_freeze(dcontext_t *dcontext, coarse_freeze_info_t *freeze_info)
{
pending_freeze_t pending_local;
pending_freeze_t *pending;
app_to_cache_t a2c;
coarse_table_t *frozen_htable;
coarse_table_t *htable;
cache_pc body_pc;
uint i;
ASSERT(freeze_info != NULL && freeze_info->src_info != NULL);
if (freeze_info->src_info->htable == NULL)
return;
LOG(THREAD, LOG_FRAGMENT, 2,
"freezing fragments in %s\n", freeze_info->src_info->module);
/* we walk just the main htable and find trace heads by asking for the body pc */
htable = (coarse_table_t *) freeze_info->src_info->htable;
DOSTATS({
LOG(THREAD, LOG_ALL, 1,
"htable pre-freezing %s\n", freeze_info->src_info->module);
hashtable_coarse_study(dcontext, htable, 0/*clean state*/);
});
frozen_htable = (coarse_table_t *) freeze_info->dst_info->htable;
/* case 9900: rank order conflict with coarse_info_incoming_lock:
* do not grab frozen_htable write lock: mark is_local instead and rely
* on dynamo_all_threads_synched
*/
DODEBUG({ frozen_htable->is_local = true; });
ASSERT_NOT_IMPLEMENTED(dynamo_all_threads_synched && "case 9900");
/* FIXME case 9522: not grabbing TABLE_RWLOCK(htable, read, lock) due to
* rank order violation with frozen_htable write lock! We go w/ the write
* lock since lookup routines check for it. To solve we'll need a way to
* tell deadlock checking that frozen_htable is private to this thread and
* that no other thread can hold its lock. For now we only support
* all-synch, but if later we want !in_place that needs no synch we'll need
* a solution.
*/
ASSERT_NOT_IMPLEMENTED(dynamo_all_threads_synched && "case 9522");
/* FIXME: we're doing htable order, better to use either tag (original app)
* or cache (execution) order?
*/
for (i = 0; i < htable->capacity || freeze_info->pending != NULL; i++) {
/* Process pending entries first; then continue through htable */
while (freeze_info->pending != NULL) {
pending = freeze_info->pending;
freeze_info->pending = pending->next;
fragment_coarse_entry_freeze(dcontext, freeze_info, pending);
HEAP_TYPE_FREE(dcontext, pending, pending_freeze_t,
ACCT_MEM_MGT/*appropriate?*/, UNPROTECTED);
}
if (i < htable->capacity) {
a2c = htable->table[i];
/* must check for sentinel */
if (!A2C_ENTRY_IS_REAL(a2c))
continue;
} else
continue;
LOG(THREAD, LOG_FRAGMENT, 4,
" %d app="PFX", cache="PFX"\n", i, a2c.app, a2c.cache);
coarse_body_from_htable_entry(dcontext, freeze_info->src_info, a2c.app,
a2c.cache, NULL, &body_pc);
if (body_pc == NULL) {
/* We add only when targeted by fragments, so we don't have to
* figure out multiple times whether intra-unit or not
*/
LOG(THREAD, LOG_FRAGMENT, 4,
" ignoring entrance stub "PFX"\n", a2c.cache);
} else {
pending_local.tag = a2c.app;
pending_local.cur_pc = body_pc;
pending_local.entrance_stub = false;
pending_local.link_cti_opnd = NULL;
pending_local.elide_ubr = true; /* doesn't matter since no link */
pending_local.trace_head =
coarse_is_trace_head_in_own_unit(dcontext, a2c.app, a2c.cache,
body_pc, true, freeze_info->src_info);
pending_local.next = NULL;
fragment_coarse_entry_freeze(dcontext, freeze_info, &pending_local);
if (pending_local.trace_head) {
/* we do need to proactively add the entrance stub, in
* case it is only targeted by an indirect branch
*/
LOG(THREAD, LOG_FRAGMENT, 4,
" adding trace head entrance stub "PFX"\n", a2c.cache);
pending_local.tag = a2c.app;
pending_local.cur_pc = a2c.cache;
pending_local.entrance_stub = true;
pending_local.link_cti_opnd = NULL;
pending_local.elide_ubr = true; /* doesn't matter since no link */
pending_local.trace_head = true;
fragment_coarse_entry_freeze(dcontext, freeze_info, &pending_local);
}
}
}
DODEBUG({ frozen_htable->is_local = false; });
DOSTATS({
/* The act of freezing tends to improve hashtable layout */
LOG(THREAD, LOG_ALL, 1,
"htable post-freezing %s\n", freeze_info->src_info->module);
hashtable_coarse_study(dcontext, frozen_htable, 0/*clean state*/);
});
}
uint
fragment_coarse_htable_persist_size(dcontext_t *dcontext, coarse_info_t *info,
bool cache_table)
{
coarse_table_t *htable = (coarse_table_t *)
(cache_table ? info->htable : info->th_htable);
return hashtable_coarse_persist_size(dcontext, htable);
}
/* Returns true iff all writes succeeded. */
bool
fragment_coarse_htable_persist(dcontext_t *dcontext, coarse_info_t *info,
bool cache_table, file_t fd)
{
coarse_table_t *htable = (coarse_table_t *)
(cache_table ? info->htable : info->th_htable);
ASSERT(fd != INVALID_FILE);
return hashtable_coarse_persist(dcontext, htable, fd);
}
void
fragment_coarse_htable_resurrect(dcontext_t *dcontext, coarse_info_t *info,
bool cache_table, byte *mapped_table)
{
coarse_table_t **htable = (coarse_table_t **)
(cache_table ? &info->htable : &info->th_htable);
ASSERT(info->frozen);
ASSERT(mapped_table != NULL);
ASSERT(*htable == NULL);
*htable = hashtable_coarse_resurrect(dcontext, mapped_table
_IF_DEBUG(cache_table ?
"persisted cache htable" :
"persisted stub htable"));
(*htable)->mod_shift = info->mod_shift;
/* generally want to keep basic alignment */
ASSERT_CURIOSITY(ALIGNED((*htable)->table, sizeof(app_pc)));
}
/*******************************************************************************/