| /* ********************************************************** |
| * Copyright (c) 2011-2014 Google, Inc. All rights reserved. |
| * Copyright (c) 2000-2010 VMware, Inc. All rights reserved. |
| * **********************************************************/ |
| |
| /* |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are met: |
| * |
| * * Redistributions of source code must retain the above copyright notice, |
| * this list of conditions and the following disclaimer. |
| * |
| * * Redistributions in binary form must reproduce the above copyright notice, |
| * this list of conditions and the following disclaimer in the documentation |
| * and/or other materials provided with the distribution. |
| * |
| * * Neither the name of VMware, Inc. nor the names of its contributors may be |
| * used to endorse or promote products derived from this software without |
| * specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| * ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE |
| * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
| * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
| * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH |
| * DAMAGE. |
| */ |
| |
| /* Copyright (c) 2003-2007 Determina Corp. */ |
| /* Copyright (c) 2001-2003 Massachusetts Institute of Technology */ |
| /* Copyright (c) 2000-2001 Hewlett-Packard Company */ |
| |
| /* |
| * fragment.c - fragment related routines |
| */ |
| |
| #include "globals.h" |
| #include "link.h" |
| #include "fragment.h" |
| #include "fcache.h" |
| #include "emit.h" |
| #include "monitor.h" |
| #include <string.h> /* for memset */ |
| #include "instrument.h" |
| #include <stddef.h> /* for offsetof */ |
| #include <limits.h> /* UINT_MAX */ |
| #include "perscache.h" |
| #include "synch.h" |
| #ifdef UNIX |
| # include "nudge.h" |
| #endif |
| |
| /* FIXME: make these runtime parameters */ |
| #define INIT_HTABLE_SIZE_SHARED_BB (DYNAMO_OPTION(coarse_units) ? 5 : 10) |
| #define INIT_HTABLE_SIZE_SHARED_TRACE 10 |
| /* the only private bbs will be selfmod, so start small */ |
| #define INIT_HTABLE_SIZE_BB (DYNAMO_OPTION(shared_bbs) ? 5 : 10) |
| /* coarse-grain fragments do not use futures */ |
| #define INIT_HTABLE_SIZE_SHARED_FUTURE (DYNAMO_OPTION(coarse_units) ? 5 : 10) |
| #ifdef RETURN_AFTER_CALL |
| /* we have small per-module hashtables */ |
| # define INIT_HTABLE_SIZE_AFTER_CALL 5 |
| #endif |
| /* private futures are only used when we have private fragments */ |
| #define INIT_HTABLE_SIZE_FUTURE \ |
| ((DYNAMO_OPTION(shared_bbs) && DYNAMO_OPTION(shared_traces)) ? 5 : 9) |
| |
| /* per-module htables */ |
| #define INIT_HTABLE_SIZE_COARSE 5 |
| #define INIT_HTABLE_SIZE_COARSE_TH 4 |
| |
| #ifdef RCT_IND_BRANCH |
| # include "rct.h" |
| /* we have small per-module hashtables */ |
| # define INIT_HTABLE_SIZE_RCT_IBT 7 |
| |
| # ifndef RETURN_AFTER_CALL |
| # error RCT_IND_BRANCH requires RETURN_AFTER_CALL since it reuses data types |
| # endif |
| #endif |
| |
| /* if shared traces, we currently have no private traces so make table tiny |
| * FIMXE: should start out w/ no table at all |
| */ |
| #define INIT_HTABLE_SIZE_TRACE (DYNAMO_OPTION(shared_traces) ? 6 : 9) |
| /* for small table sizes resize is not an expensive operation and we start smaller */ |
| |
| /* Current flusher, protected by thread_initexit_lock. */ |
| DECLARE_FREQPROT_VAR(static dcontext_t *flusher, NULL); |
| /* Current allsynch-flusher, protected by thread_initexit_lock. */ |
| DECLARE_FREQPROT_VAR(static dcontext_t *allsynch_flusher, NULL); |
| |
| /* These global tables are kept on the heap for selfprot (case 7957) */ |
| |
| /* synchronization to these tables is accomplished via read-write locks, |
| * where the writers are removal and resizing -- addition is atomic to |
| * readers. |
| * for now none of these are read from ibl routines so we only have to |
| * synch with other DR routines |
| */ |
| static fragment_table_t *shared_bb; |
| static fragment_table_t *shared_trace; |
| |
| /* if we have either shared bbs or shared traces we need this shared: */ |
| static fragment_table_t *shared_future; |
| |
| /* Thread-shared tables are allocated in a shared per_thread_t. |
| * The structure is also used if we're dumping shared traces. |
| * Kept on the heap for selfprot (case 7957) |
| */ |
| static per_thread_t *shared_pt; |
| |
| #define USE_SHARED_PT() (SHARED_IBT_TABLES_ENABLED() || \ |
| (TRACEDUMP_ENABLED() && DYNAMO_OPTION(shared_traces))) |
| |
| /* We keep track of "old" IBT target tables in a linked list and |
| * deallocate them in fragment_exit(). */ |
| /* FIXME Deallocate tables more aggressively using a distributed, refcounting |
| * algo as is used for shared deletion. */ |
| typedef struct _dead_fragment_table_t { |
| fragment_entry_t *table_unaligned; |
| uint table_flags; |
| uint capacity; |
| uint ref_count; |
| struct _dead_fragment_table_t *next; |
| } dead_fragment_table_t; |
| |
| /* We keep these list pointers on the heap for selfprot (case 8074). */ |
| typedef struct _dead_table_lists_t { |
| dead_fragment_table_t *dead_tables; |
| dead_fragment_table_t *dead_tables_tail; |
| } dead_table_lists_t; |
| |
| static dead_table_lists_t *dead_lists; |
| |
| DECLARE_CXTSWPROT_VAR(static mutex_t dead_tables_lock, INIT_LOCK_FREE(dead_tables_lock)); |
| |
| #ifdef RETURN_AFTER_CALL |
| /* High level lock for an atomic lookup+add operation on the |
| * after call tables. */ |
| DECLARE_CXTSWPROT_VAR(static mutex_t after_call_lock, INIT_LOCK_FREE(after_call_lock)); |
| /* We use per-module tables and only need this table for non-module code; |
| * on Linux though this is the only table used, until we have a module list. |
| */ |
| static rct_module_table_t rac_non_module_table; |
| #endif |
| |
| /* allows independent sequences of flushes and delayed deletions, |
| * though with -syscalls_synch_flush additions we now hold this |
| * throughout a flush. |
| */ |
| DECLARE_CXTSWPROT_VAR(mutex_t shared_cache_flush_lock, |
| INIT_LOCK_FREE(shared_cache_flush_lock)); |
| /* Global count of flushes, used as a timestamp for shared deletion. |
| * Reads may be done w/o a lock, but writes can only be done |
| * via increment_global_flushtime() while holding shared_cache_flush_lock. |
| */ |
| DECLARE_FREQPROT_VAR(uint flushtime_global, 0); |
| |
| #ifdef CLIENT_INTERFACE |
| DECLARE_CXTSWPROT_VAR(mutex_t client_flush_request_lock, |
| INIT_LOCK_FREE(client_flush_request_lock)); |
| DECLARE_CXTSWPROT_VAR(client_flush_req_t *client_flush_requests, NULL); |
| #endif |
| |
| #if defined(RCT_IND_BRANCH) && defined(UNIX) |
| /* On Win32 we use per-module tables; on Linux we use a single global table, |
| * until we have a module list. |
| */ |
| rct_module_table_t rct_global_table; |
| #endif |
| |
| #define NULL_TAG ((app_pc)PTR_UINT_0) |
| /* FAKE_TAG is used as a deletion marker for unlinked entries */ |
| #define FAKE_TAG ((app_pc)PTR_UINT_MINUS_1) |
| |
| /* instead of an empty hashtable slot containing NULL, we fill it |
| * with a pointer to this constant fragment, which we give a tag |
| * of 0. |
| * PR 305731: rather than having a start_pc of 0, which causes |
| * an app targeting 0 to crash at 0, we point at a handler that |
| * sends the app to an ibl miss. |
| */ |
| byte * hashlookup_null_target; |
| #define HASHLOOKUP_NULL_START_PC ((cache_pc)hashlookup_null_handler) |
| static const fragment_t null_fragment = { NULL_TAG, 0, 0, 0, 0, |
| HASHLOOKUP_NULL_START_PC, }; |
| /* to avoid range check on fast path using an end of table sentinel fragment */ |
| static const fragment_t sentinel_fragment = { NULL_TAG, 0, 0, 0, 0, |
| HASHLOOKUP_SENTINEL_START_PC, }; |
| |
| /* Shared fragment IBTs: We need to preserve the open addressing traversal |
| * in the hashtable while marking a table entry as unlinked. |
| * A null_fragment won't work since it terminates the traversal, |
| * so we use an unlinked marker. The lookup table entry for |
| * an unlinked entry *always* has its start_pc_fragment set to |
| * an IBL target_delete entry. |
| */ |
| static const fragment_t unlinked_fragment = { FAKE_TAG, }; |
| |
| /* macro used in the code from time of deletion markers */ |
| /* Shared fragment IBTs: unlinked_fragment isn't a real fragment either. So they |
| * are naturally deleted during a table resize. */ |
| #define REAL_FRAGMENT(fragment) \ |
| ((fragment) != &null_fragment && \ |
| (fragment) != &unlinked_fragment && \ |
| (fragment) != &sentinel_fragment) |
| |
| #define GET_PT(dc) ((dc) == GLOBAL_DCONTEXT ? (USE_SHARED_PT() ? shared_pt : NULL) :\ |
| (per_thread_t *) (dc)->fragment_field) |
| |
| #define TABLE_PROTECTED(ptable) \ |
| (!TABLE_NEEDS_LOCK(ptable) || READWRITE_LOCK_HELD(&(ptable)->rwlock)) |
| |
| /* everything except the invisible table is in here */ |
| #define GET_FTABLE_HELPER(pt, flags, otherwise) \ |
| (TEST(FRAG_IS_TRACE, (flags)) ? \ |
| (TEST(FRAG_SHARED, (flags)) ? shared_trace : &pt->trace) : \ |
| (TEST(FRAG_SHARED, (flags)) ? \ |
| (TEST(FRAG_IS_FUTURE, (flags)) ? shared_future : shared_bb) : \ |
| (TEST(FRAG_IS_FUTURE, (flags)) ? &pt->future : \ |
| (otherwise)))) |
| |
| #define GET_FTABLE(pt, flags) GET_FTABLE_HELPER(pt, (flags), &pt->bb) |
| |
| /* indirect branch table per target type (bb vs trace) and indirect branch type */ |
| #define GET_IBT_TABLE(pt, flags, branch_type) \ |
| (TEST(FRAG_IS_TRACE, (flags)) ? \ |
| (DYNAMO_OPTION(shared_trace_ibt_tables) ? \ |
| &shared_pt->trace_ibt[(branch_type)] : \ |
| &(pt)->trace_ibt[(branch_type)]) : \ |
| (DYNAMO_OPTION(shared_bb_ibt_tables) ? \ |
| &shared_pt->bb_ibt[(branch_type)] : \ |
| &(pt)->bb_ibt[(branch_type)])) |
| |
| /********************************** STATICS ***********************************/ |
| static uint fragment_heap_size(uint flags, int direct_exits, int indirect_exits); |
| |
| static void fragment_free_future(dcontext_t *dcontext, future_fragment_t *fut); |
| |
| #if defined(RETURN_AFTER_CALL) || defined(RCT_IND_BRANCH) |
| static void |
| coarse_persisted_fill_ibl(dcontext_t *dcontext, coarse_info_t *info, |
| ibl_branch_type_t branch_type); |
| #endif |
| |
| #ifdef CLIENT_INTERFACE |
| static void |
| process_client_flush_requests(dcontext_t *dcontext, dcontext_t *alloc_dcontext, |
| client_flush_req_t *req, bool flush); |
| #endif |
| |
| #if defined(INTERNAL) || defined(CLIENT_INTERFACE) |
| /* trace logging and synch for shared trace file: */ |
| DECLARE_CXTSWPROT_VAR(static mutex_t tracedump_mutex, INIT_LOCK_FREE(tracedump_mutex)); |
| DECLARE_FREQPROT_VAR(static stats_int_t tcount, 0); /* protected by tracedump_mutex */ |
| static void exit_trace_file(per_thread_t *pt); |
| static void output_trace(dcontext_t *dcontext, per_thread_t *pt, |
| fragment_t *f, stats_int_t deleted_at); |
| static void init_trace_file(per_thread_t *pt); |
| #endif |
| |
| #define SHOULD_OUTPUT_FRAGMENT(flags) \ |
| (TEST(FRAG_IS_TRACE, (flags)) && \ |
| !TEST(FRAG_TRACE_OUTPUT, (flags)) && \ |
| TRACEDUMP_ENABLED()) |
| |
| #define FRAGMENT_COARSE_WRAPPER_FLAGS \ |
| FRAG_FAKE | FRAG_SHARED | FRAG_COARSE_GRAIN | \ |
| FRAG_LINKED_OUTGOING | FRAG_LINKED_INCOMING |
| |
| /* We use temporary fragment_t + linkstub_t structs to more easily |
| * use existing code when emitting coarse-grain fragments. |
| * Only 1-ind-exit or 1 or 2 dir exit bbs can be coarse-grain. |
| * The bb_building_lock protects use of this. |
| */ |
| DECLARE_FREQPROT_VAR( |
| static struct { |
| fragment_t f; |
| union { |
| struct { |
| direct_linkstub_t dir_exit_1; |
| direct_linkstub_t dir_exit_2; |
| } dir_exits; |
| indirect_linkstub_t ind_exit; |
| } exits; |
| } coarse_emit_fragment, {{0}}); |
| |
| #ifdef SHARING_STUDY |
| /*************************************************************************** |
| * fragment_t sharing study |
| * Only used with -fragment_sharing_study |
| * When the option is off we go ahead and waste the 4 static vars |
| * below so we don't have to have a define and separate build. |
| */ |
| typedef struct _thread_list_t { |
| uint thread_num; |
| uint count; |
| struct _thread_list_t *next; |
| } thread_list_t; |
| |
| typedef struct _shared_entry_t { |
| app_pc tag; |
| uint num_threads; |
| thread_list_t *threads; |
| uint heap_size; |
| uint cache_size; |
| struct _shared_entry_t *next; |
| } shared_entry_t; |
| # define SHARED_HASH_BITS 16 |
| static shared_entry_t ** shared_blocks; |
| DECLARE_CXTSWPROT_VAR(static mutex_t shared_blocks_lock, |
| INIT_LOCK_FREE(shared_blocks_lock)); |
| static shared_entry_t ** shared_traces; |
| DECLARE_CXTSWPROT_VAR(static mutex_t shared_traces_lock, |
| INIT_LOCK_FREE(shared_traces_lock)); |
| |
| /* assumes caller holds table's lock! */ |
| static shared_entry_t * |
| shared_block_lookup(shared_entry_t **table, fragment_t *f) |
| { |
| shared_entry_t *e; |
| uint hindex; |
| |
| hindex = HASH_FUNC_BITS((ptr_uint_t)f->tag, SHARED_HASH_BITS); |
| /* using collision chains */ |
| for (e = table[hindex]; e != NULL; e = e->next) { |
| if (e->tag == f->tag) { |
| return e; |
| } |
| } |
| return NULL; |
| } |
| |
| static void |
| reset_shared_block_table(shared_entry_t **table, mutex_t *lock) |
| { |
| shared_entry_t *e, *nxte; |
| uint i; |
| uint size = HASHTABLE_SIZE(SHARED_HASH_BITS); |
| mutex_lock(lock); |
| for (i = 0; i < size; i++) { |
| for (e = table[i]; e != NULL; e = nxte) { |
| thread_list_t *tl = e->threads; |
| thread_list_t *tlnxt; |
| nxte = e->next; |
| while (tl != NULL) { |
| tlnxt = tl->next; |
| global_heap_free(tl, sizeof(thread_list_t) HEAPACCT(ACCT_OTHER)); |
| tl = tlnxt; |
| } |
| global_heap_free(e, sizeof(shared_entry_t) HEAPACCT(ACCT_OTHER)); |
| } |
| } |
| global_heap_free(table, size*sizeof(shared_entry_t*) HEAPACCT(ACCT_OTHER)); |
| mutex_unlock(lock); |
| } |
| |
| static void |
| add_shared_block(shared_entry_t **table, mutex_t *lock, fragment_t *f) |
| { |
| shared_entry_t *e; |
| uint hindex; |
| int num_direct = 0, num_indirect = 0; |
| linkstub_t *l = FRAGMENT_EXIT_STUBS(f); |
| /* use num to avoid thread_id_t recycling problems */ |
| uint tnum = get_thread_num(get_thread_id()); |
| |
| mutex_lock(lock); |
| e = shared_block_lookup(table, f); |
| if (e != NULL) { |
| thread_list_t *tl = e->threads; |
| for (; tl != NULL; tl = tl->next) { |
| if (tl->thread_num == tnum) { |
| tl->count++; |
| LOG(GLOBAL, LOG_ALL, 2, |
| "add_shared_block: tag "PFX", but re-add #%d for thread #%d\n", |
| e->tag, tl->count, tnum); |
| mutex_unlock(lock); |
| return; |
| } |
| } |
| tl = global_heap_alloc(sizeof(thread_list_t) HEAPACCT(ACCT_OTHER)); |
| tl->thread_num = tnum; |
| tl->count = 1; |
| tl->next = e->threads; |
| e->threads = tl; |
| e->num_threads++; |
| LOG(GLOBAL, LOG_ALL, 2, "add_shared_block: tag "PFX" thread #%d => %d threads\n", |
| e->tag, tnum, e->num_threads); |
| mutex_unlock(lock); |
| return; |
| } |
| |
| /* get num stubs to find heap size */ |
| for (; l != NULL; l = LINKSTUB_NEXT_EXIT(l)) { |
| if (LINKSTUB_DIRECT(l->flags)) |
| num_direct++; |
| else { |
| ASSERT(LINKSTUB_INDIRECT(l->flags)); |
| num_indirect++; |
| } |
| } |
| |
| /* add entry to thread hashtable */ |
| e = (shared_entry_t *) global_heap_alloc(sizeof(shared_entry_t) HEAPACCT(ACCT_OTHER)); |
| e->tag = f->tag; |
| e->num_threads = 1; |
| e->heap_size = fragment_heap_size(f->flags, num_direct, num_indirect); |
| e->cache_size = (f->size + f->fcache_extra); |
| e->threads = global_heap_alloc(sizeof(thread_list_t) HEAPACCT(ACCT_OTHER)); |
| e->threads->thread_num = tnum; |
| e->threads->count = 1; |
| e->threads->next = NULL; |
| LOG(GLOBAL, LOG_ALL, 2, "add_shared_block: tag "PFX", heap %d, cache %d, thread #%d\n", |
| e->tag, e->heap_size, e->cache_size, e->threads->thread_num); |
| |
| hindex = HASH_FUNC_BITS((ptr_uint_t)f->tag, SHARED_HASH_BITS); |
| e->next = table[hindex]; |
| table[hindex] = e; |
| mutex_unlock(lock); |
| } |
| |
| static void |
| print_shared_table_stats(shared_entry_t **table, mutex_t *lock, const char *name) |
| { |
| uint i; |
| shared_entry_t *e; |
| uint size = HASHTABLE_SIZE(SHARED_HASH_BITS); |
| uint tot = 0, shared_tot = 0, shared = 0, heap = 0, cache = 0, |
| creation_count = 0; |
| |
| mutex_lock(lock); |
| for (i = 0; i < size; i++) { |
| for (e = table[i]; e != NULL; e = e->next) { |
| thread_list_t *tl = e->threads; |
| tot++; |
| shared_tot += e->num_threads; |
| for (; tl != NULL; tl = tl->next) |
| creation_count += tl->count; |
| if (e->num_threads > 1) { |
| shared++; |
| /* assume similar size for each thread -- cache padding |
| * only real difference |
| */ |
| heap += (e->heap_size * e->num_threads); |
| cache += (e->cache_size * e->num_threads); |
| } |
| } |
| } |
| mutex_unlock(lock); |
| LOG(GLOBAL, LOG_ALL, 1, "Shared %s statistics:\n", name); |
| LOG(GLOBAL, LOG_ALL, 1, "\ttotal blocks: %10d\n", tot); |
| LOG(GLOBAL, LOG_ALL, 1, "\tcreation count: %10d\n", creation_count); |
| LOG(GLOBAL, LOG_ALL, 1, "\tshared count: %10d\n", shared_tot); |
| LOG(GLOBAL, LOG_ALL, 1, "\tshared blocks: %10d\n", shared); |
| LOG(GLOBAL, LOG_ALL, 1, "\tshared heap: %10d\n", heap); |
| LOG(GLOBAL, LOG_ALL, 1, "\tshared cache: %10d\n", cache); |
| } |
| |
| void |
| print_shared_stats() |
| { |
| print_shared_table_stats(shared_blocks, &shared_blocks_lock, "basic block"); |
| print_shared_table_stats(shared_traces, &shared_traces_lock, "trace"); |
| } |
| #endif /* SHARING_STUDY ***************************************************/ |
| |
| |
| #ifdef FRAGMENT_SIZES_STUDY /*****************************************/ |
| #include <math.h> |
| /* don't bother to synchronize these */ |
| static int bb_sizes[200000]; |
| static int trace_sizes[40000]; |
| static int num_bb = 0; |
| static int num_traces = 0; |
| |
| void |
| record_fragment_size(int size, bool is_trace) |
| { |
| if (is_trace) { |
| trace_sizes[num_traces] = size; |
| num_traces++; |
| ASSERT(num_traces < 40000); |
| } else { |
| bb_sizes[num_bb] = size; |
| num_bb++; |
| ASSERT(num_bb < 200000); |
| } |
| } |
| |
| void |
| print_size_results() |
| { |
| LOG(GLOBAL, LOG_ALL, 1, "Basic block sizes (bytes):\n"); |
| print_statistics(bb_sizes, num_bb); |
| LOG(GLOBAL, LOG_ALL, 1, "Trace sizes (bytes):\n"); |
| print_statistics(trace_sizes, num_traces); |
| } |
| #endif /* FRAGMENT_SIZES_STUDY */ /*****************************************/ |
| |
| #define FRAGTABLE_WHICH_HEAP(flags) \ |
| (TESTALL(FRAG_TABLE_INCLUSIVE_HIERARCHY | FRAG_TABLE_IBL_TARGETED, \ |
| (flags)) ? ACCT_IBLTABLE : ACCT_FRAG_TABLE) |
| |
| #ifdef HASHTABLE_STATISTICS |
| # define UNPROT_STAT(stats) unprot_stats->stats |
| /* FIXME: either put in nonpersistent heap as appropriate, or |
| * preserve across resets |
| */ |
| # define ALLOC_UNPROT_STATS(dcontext, table) do { \ |
| (table)->unprot_stats = \ |
| HEAP_TYPE_ALLOC((dcontext), unprot_ht_statistics_t, \ |
| FRAGTABLE_WHICH_HEAP((table)->table_flags), \ |
| UNPROTECTED); \ |
| memset((table)->unprot_stats, 0, sizeof(unprot_ht_statistics_t)); \ |
| } while (0) |
| # define DEALLOC_UNPROT_STATS(dcontext, table) \ |
| HEAP_TYPE_FREE((dcontext), (table)->unprot_stats, unprot_ht_statistics_t, \ |
| FRAGTABLE_WHICH_HEAP((table)->table_flags), UNPROTECTED) |
| # define CHECK_UNPROT_STATS(table) ASSERT(table.unprot_stats != NULL) |
| |
| static void |
| check_stay_on_trace_stats_overflow(dcontext_t *dcontext, ibl_branch_type_t branch_type) |
| { |
| per_thread_t *pt = (per_thread_t *) dcontext->fragment_field; |
| hashtable_statistics_t *lookup_stats = &pt->trace_ibt[branch_type].unprot_stats-> |
| trace_ibl_stats[branch_type]; |
| if (lookup_stats->ib_stay_on_trace_stat < lookup_stats->ib_stay_on_trace_stat_last) { |
| lookup_stats->ib_stay_on_trace_stat_ovfl++; |
| } |
| lookup_stats->ib_stay_on_trace_stat_last = lookup_stats->ib_stay_on_trace_stat; |
| /* FIXME: ib_trace_last_ibl_exit should have an overflow check as well */ |
| } |
| #endif /* HASHTABLE_STATISTICS */ |
| |
| /* init/update the tls slots storing this table's mask and lookup base |
| * N.B.: for thread-shared the caller must call for each thread |
| */ |
| /* currently we don't support a mixture */ |
| static inline void |
| update_lookuptable_tls(dcontext_t *dcontext, ibl_table_t *table) |
| { |
| /* use dcontext->local_state, rather than get_local_state(), to support |
| * being called from other threads! |
| */ |
| local_state_extended_t *state = |
| (local_state_extended_t *) dcontext->local_state; |
| |
| ASSERT(state != NULL); |
| ASSERT(DYNAMO_OPTION(ibl_table_in_tls)); |
| /* We must hold at least the read lock here, else we could grab |
| * an inconsistent mask/lookuptable pair if another thread is in the middle |
| * of resizing the table (case 10405). |
| */ |
| ASSERT_TABLE_SYNCHRONIZED(table, READWRITE); |
| /* case 10296: for shared tables we must update the table |
| * before the mask, as the ibl lookup code accesses the mask first, |
| * and old mask + new table is ok since it will de-ref within the |
| * new table (we never shrink tables) and be a miss, whereas |
| * new mask + old table can de-ref beyond the end of the table, |
| * crashing or worse. |
| */ |
| state->table_space.table[table->branch_type].lookuptable = |
| table->table; |
| state->table_space.table[table->branch_type].hash_mask = |
| table->hash_mask; |
| } |
| |
| #ifdef DEBUG |
| static const char *ibl_bb_table_type_names[IBL_BRANCH_TYPE_END] = |
| {"ret_bb", "indcall_bb", "indjmp_bb"}; |
| static const char *ibl_trace_table_type_names[IBL_BRANCH_TYPE_END] = |
| {"ret_trace", "indcall_trace", "indjmp_trace"}; |
| #endif |
| |
| #ifdef DEBUG |
| static inline void |
| dump_lookuptable_tls(dcontext_t *dcontext) |
| { |
| /* use dcontext->local_state, rather than get_local_state(), to support |
| * being called from other threads! |
| */ |
| if (DYNAMO_OPTION(ibl_table_in_tls)) { |
| |
| local_state_extended_t *state = |
| (local_state_extended_t *) dcontext->local_state; |
| ibl_branch_type_t branch_type; |
| |
| ASSERT(state != NULL); |
| for (branch_type = IBL_BRANCH_TYPE_START; |
| branch_type < IBL_BRANCH_TYPE_END; branch_type++) { |
| LOG(THREAD, LOG_FRAGMENT, 1, |
| "\t Table %s, table "PFX", mask "PFX"\n", |
| !SHARED_BB_ONLY_IB_TARGETS() ? |
| ibl_trace_table_type_names[branch_type] : |
| ibl_bb_table_type_names[branch_type], |
| state->table_space.table[branch_type].lookuptable, |
| state->table_space.table[branch_type].hash_mask); |
| } |
| } |
| } |
| #endif |
| |
| /******************************************************************************* |
| * IBL HASHTABLE INSTANTIATION |
| */ |
| #define FRAGENTRY_FROM_FRAGMENT(f) { (f)->tag, (f)->start_pc } |
| |
| /* macros w/ name and types are duplicated in fragment.h -- keep in sync */ |
| #define NAME_KEY ibl |
| #define ENTRY_TYPE fragment_entry_t |
| /* not defining HASHTABLE_USE_LOOKUPTABLE */ |
| /* compiler won't let me use null_fragment.tag here */ |
| static const fragment_entry_t fe_empty = { NULL_TAG, HASHLOOKUP_NULL_START_PC }; |
| static const fragment_entry_t fe_sentinel = { NULL_TAG, HASHLOOKUP_SENTINEL_START_PC }; |
| #define ENTRY_TAG(fe) ((ptr_uint_t)(fe).tag_fragment) |
| #define ENTRY_EMPTY (fe_empty) |
| #define ENTRY_SENTINEL (fe_sentinel) |
| #define IBL_ENTRY_IS_EMPTY(fe) \ |
| ((fe).tag_fragment == fe_empty.tag_fragment && \ |
| (fe).start_pc_fragment == fe_empty.start_pc_fragment) |
| #define IBL_ENTRY_IS_INVALID(fe) ((fe).tag_fragment == FAKE_TAG) |
| #define IBL_ENTRY_IS_SENTINEL(fe) \ |
| ((fe).tag_fragment == fe_sentinel.tag_fragment && \ |
| (fe).start_pc_fragment == fe_sentinel.start_pc_fragment) |
| #define ENTRY_IS_EMPTY(fe) IBL_ENTRY_IS_EMPTY(fe) |
| #define ENTRY_IS_SENTINEL(fe) IBL_ENTRY_IS_SENTINEL(fe) |
| #define ENTRY_IS_INVALID(fe) IBL_ENTRY_IS_INVALID(fe) |
| #define IBL_ENTRIES_ARE_EQUAL(fe1,fe2) ((fe1).tag_fragment == (fe2).tag_fragment) |
| #define ENTRIES_ARE_EQUAL(table,fe1,fe2) IBL_ENTRIES_ARE_EQUAL(fe1,fe2) |
| #define HASHTABLE_WHICH_HEAP(flags) FRAGTABLE_WHICH_HEAP(flags) |
| #define HTLOCK_RANK table_rwlock |
| #define HASHTABLE_ENTRY_STATS 1 |
| |
| #include "hashtablex.h" |
| /* all defines are undef-ed at end of hashtablex.h */ |
| |
| /* required routines for hashtable interface that we don't need for this instance */ |
| |
| static void |
| hashtable_ibl_free_entry(dcontext_t *dcontext, ibl_table_t *table, |
| fragment_entry_t entry) |
| { |
| /* nothing to do, data is inlined */ |
| } |
| |
| /******************************************************************************* |
| * FRAGMENT HASHTABLE INSTANTIATION |
| */ |
| |
| /* macros w/ name and types are duplicated in fragment.h -- keep in sync */ |
| #define NAME_KEY fragment |
| #define ENTRY_TYPE fragment_t * |
| /* not defining HASHTABLE_USE_LOOKUPTABLE */ |
| |
| #define ENTRY_TAG(f) ((ptr_uint_t)(f)->tag) |
| /* instead of setting to 0, point at null_fragment */ |
| #define ENTRY_EMPTY ((fragment_t *)&null_fragment) |
| #define ENTRY_SENTINEL ((fragment_t *)&sentinel_fragment) |
| #define ENTRY_IS_EMPTY(f) ((f) == (fragment_t *)&null_fragment) |
| #define ENTRY_IS_SENTINEL(f) ((f) == (fragment_t *)&sentinel_fragment) |
| #define ENTRY_IS_INVALID(f) ((f) == (fragment_t *)&unlinked_fragment) |
| #define ENTRIES_ARE_EQUAL(t,f,g) ((f) == (g)) |
| #define HASHTABLE_WHICH_HEAP(flags) FRAGTABLE_WHICH_HEAP(flags) |
| #define HTLOCK_RANK table_rwlock |
| |
| #include "hashtablex.h" |
| /* all defines are undef-ed at end of hashtablex.h */ |
| |
| static void |
| hashtable_fragment_resized_custom(dcontext_t *dcontext, fragment_table_t *table, |
| uint old_capacity, fragment_t **old_table, |
| fragment_t **old_table_unaligned, |
| uint old_ref_count, uint old_table_flags) |
| { |
| /* nothing */ |
| } |
| |
| static void |
| hashtable_fragment_init_internal_custom(dcontext_t *dcontext, fragment_table_t *table) |
| { |
| /* nothing */ |
| } |
| |
| #ifdef DEBUG |
| static void |
| hashtable_fragment_study_custom(dcontext_t *dcontext, fragment_table_t *table, |
| uint entries_inc/*amnt table->entries was pre-inced*/) |
| { |
| /* nothing */ |
| } |
| #endif |
| |
| /* callers should use either hashtable_ibl_preinit or hashtable_resize instead */ |
| static void |
| hashtable_ibl_init_internal_custom(dcontext_t *dcontext, ibl_table_t *table) |
| { |
| ASSERT(null_fragment.tag == NULL_TAG); |
| ASSERT(null_fragment.start_pc == HASHLOOKUP_NULL_START_PC); |
| ASSERT(FAKE_TAG != NULL_TAG); |
| |
| ASSERT(sentinel_fragment.tag == NULL_TAG); |
| ASSERT(sentinel_fragment.start_pc == HASHLOOKUP_SENTINEL_START_PC); |
| ASSERT(HASHLOOKUP_SENTINEL_START_PC != HASHLOOKUP_NULL_START_PC); |
| |
| ASSERT(TEST(FRAG_TABLE_IBL_TARGETED, table->table_flags)); |
| ASSERT(TEST(FRAG_TABLE_INCLUSIVE_HIERARCHY, table->table_flags)); |
| |
| /* every time we resize a table we reset the flush threshold, |
| * since it is cleared in place after one flush |
| */ |
| table->groom_factor_percent = |
| TEST(FRAG_TABLE_TRACE, table->table_flags) ? |
| DYNAMO_OPTION(trace_ibt_groom) : DYNAMO_OPTION(bb_ibt_groom); |
| table->max_capacity_bits = |
| TEST(FRAG_TABLE_TRACE, table->table_flags) ? |
| DYNAMO_OPTION(private_trace_ibl_targets_max) : |
| DYNAMO_OPTION(private_bb_ibl_targets_max); |
| |
| #ifdef HASHTABLE_STATISTICS |
| if (INTERNAL_OPTION(hashtable_ibl_stats)) { |
| if (table->unprot_stats == NULL) { |
| /* first time, not a resize */ |
| ALLOC_UNPROT_STATS(dcontext, table); |
| } /* else, keep original */ |
| } |
| #endif /* HASHTABLE_STATISTICS */ |
| |
| if (SHARED_IB_TARGETS() && |
| !TEST(FRAG_TABLE_SHARED, table->table_flags)) { |
| /* currently we don't support a mixture */ |
| ASSERT(TEST(FRAG_TABLE_TARGET_SHARED, table->table_flags)); |
| ASSERT(TEST(FRAG_TABLE_IBL_TARGETED, table->table_flags)); |
| ASSERT(table->branch_type != IBL_NONE); |
| /* Only data for one set of tables is stored in TLS -- for the trace |
| * tables in the default config OR the BB tables in shared BBs |
| * only mode. |
| */ |
| if ((TEST(FRAG_TABLE_TRACE, table->table_flags) || |
| SHARED_BB_ONLY_IB_TARGETS()) && |
| DYNAMO_OPTION(ibl_table_in_tls)) |
| update_lookuptable_tls(dcontext, table); |
| } |
| } |
| |
| /* We need our own routines to init/free our added fields */ |
| static void |
| hashtable_ibl_myinit(dcontext_t *dcontext, ibl_table_t *table, uint bits, |
| uint load_factor_percent, hash_function_t func, |
| uint hash_offset, ibl_branch_type_t branch_type, |
| bool use_lookup, uint table_flags _IF_DEBUG(const char *table_name)) |
| { |
| uint flags = table_flags; |
| ASSERT(dcontext != GLOBAL_DCONTEXT || TEST(FRAG_TABLE_SHARED, flags)); |
| /* flags shared by all ibl tables */ |
| flags |= FRAG_TABLE_INCLUSIVE_HIERARCHY; |
| flags |= FRAG_TABLE_IBL_TARGETED; |
| flags |= HASHTABLE_ALIGN_TABLE; |
| /* use entry stats with all our ibl-targeted tables */ |
| flags |= HASHTABLE_USE_ENTRY_STATS; |
| #ifdef HASHTABLE_STATISTICS |
| /* indicate this is first time, not a resize */ |
| table->unprot_stats = NULL; |
| #endif |
| table->branch_type = branch_type; |
| hashtable_ibl_init(dcontext, table, bits, load_factor_percent, |
| func, hash_offset, flags _IF_DEBUG(table_name)); |
| |
| /* PR 305731: rather than having a start_pc of 0, which causes an |
| * app targeting 0 to crash at 0, we point at a handler that sends |
| * the app to an ibl miss via target_delete, which restores |
| * registers saved in the found path. |
| */ |
| if (dcontext != GLOBAL_DCONTEXT && hashlookup_null_target == NULL) { |
| ASSERT(!dynamo_initialized); |
| hashlookup_null_target = get_target_delete_entry_pc(dcontext, table); |
| #if !defined(X64) && defined(LINUX) |
| /* see comments in x86.asm: we patch to avoid text relocations */ |
| byte *pc = (byte *) hashlookup_null_handler; |
| byte *page_start = (byte *) PAGE_START(pc); |
| byte *page_end = (byte *) ALIGN_FORWARD(pc + JMP_LONG_LENGTH, PAGE_SIZE); |
| make_writable(page_start, page_end - page_start); |
| insert_relative_target(pc + 1, hashlookup_null_target, NOT_HOT_PATCHABLE); |
| make_unwritable(page_start, page_end - page_start); |
| #endif |
| } |
| } |
| |
| static void |
| hashtable_ibl_myfree(dcontext_t *dcontext, ibl_table_t *table) |
| { |
| #ifdef HASHTABLE_STATISTICS |
| if (INTERNAL_OPTION(hashtable_ibl_stats)) { |
| ASSERT(TEST(FRAG_TABLE_IBL_TARGETED, table->table_flags)); |
| DEALLOC_UNPROT_STATS(dcontext, table); |
| } |
| #endif /* HASHTABLE_STATISTICS */ |
| hashtable_ibl_free(dcontext, table); |
| } |
| |
| static void |
| hashtable_fragment_free_entry(dcontext_t *dcontext, fragment_table_t *table, |
| fragment_t *f) |
| { |
| if (TEST(FRAG_TABLE_INCLUSIVE_HIERARCHY, table->table_flags)) { |
| ASSERT_NOT_REACHED(); /* case 7691 */ |
| } else { |
| if (TEST(FRAG_IS_FUTURE, f->flags)) |
| fragment_free_future(dcontext, (future_fragment_t *)f); |
| else |
| fragment_free(dcontext, f); |
| } |
| } |
| |
| static inline bool |
| fragment_add_to_hashtable(dcontext_t *dcontext, fragment_t *e, fragment_table_t *table) |
| { |
| /* When using shared IBT tables w/trace building and BB2BB IBL, there is a |
| * race between adding a BB target to a table and having it marked by |
| * another thread as a trace head. The race exists because the two functions |
| * do not use a common lock. |
| * The race does NOT cause a correctness problem since a) the marking thread |
| * removes the trace head from the table and b) any subsequent add attempt |
| * is caught in add_ibl_target(). The table lock is used during add and |
| * remove operations and FRAG_IS_TRACE_HEAD is checked while holding |
| * the lock. So although a trace head may be present in a table temporarily -- |
| * it's being marked while an add operation that has passed the frag flags |
| * check is in progress -- it will be subsequently removed by the marking |
| * thread. |
| * However, the existence of the race does mean that |
| * we cannot ASSERT(!(FRAG_IS_TRACE_HEAD,...)) at arbitrary spots along the |
| * add_ibl_target() path since such an assert could fire due to the race. |
| * What is likely a safe point to assert is when there is only a single |
| * thread in the process. |
| */ |
| DOCHECK(1, { |
| if (TEST(FRAG_TABLE_IBL_TARGETED, table->table_flags) && |
| get_num_threads() == 1) |
| ASSERT(!TEST(FRAG_IS_TRACE_HEAD, e->flags)); |
| }); |
| |
| return hashtable_fragment_add(dcontext, e, table); |
| } |
| |
| /* updates all fragments in a given fragment table which may |
| * have IBL routine heads inlined in the indirect exit stubs |
| * |
| * FIXME: [perf] should add a filter of which branch types need updating if |
| * updating all is a noticeable performance hit. |
| * |
| * FIXME: [perf] Also it maybe better to traverse all fragments in an fcache |
| * unit instead of entries in a half-empty hashtable |
| */ |
| static void |
| update_indirect_exit_stubs_from_table(dcontext_t *dcontext, |
| fragment_table_t *ftable) |
| { |
| fragment_t *f; |
| linkstub_t *l; |
| uint i; |
| |
| for (i = 0; i < ftable->capacity; i++) { |
| f = ftable->table[i]; |
| if (!REAL_FRAGMENT(f)) |
| continue; |
| for (l = FRAGMENT_EXIT_STUBS(f); l != NULL; l = LINKSTUB_NEXT_EXIT(l)) { |
| if (LINKSTUB_INDIRECT(l->flags)) { |
| /* FIXME: should add a filter of which branch types need updating */ |
| update_indirect_exit_stub(dcontext, f, l); |
| LOG(THREAD, LOG_FRAGMENT, 5, |
| "\tIBL target table resizing: updating F%d\n", f->id); |
| STATS_INC(num_ibl_stub_resize_updates); |
| } |
| } |
| } |
| } |
| |
| static void |
| safely_nullify_tables(dcontext_t *dcontext, ibl_table_t *new_table, |
| fragment_entry_t *table, uint capacity) |
| { |
| uint i; |
| cache_pc target_delete = get_target_delete_entry_pc(dcontext, new_table); |
| |
| ASSERT(target_delete != NULL); |
| ASSERT_TABLE_SYNCHRONIZED(new_table, WRITE); |
| for (i = 0; i < capacity; i++) { |
| if (IBL_ENTRY_IS_SENTINEL(table[i])) { |
| ASSERT(i == capacity - 1); |
| continue; |
| } |
| /* We need these writes to be atomic, so check that they're aligned. */ |
| ASSERT(ALIGNED(&table[i].tag_fragment, 4)); |
| ASSERT(ALIGNED(&table[i].start_pc_fragment, 4)); |
| /* We update the tag first so that so that a thread that's skipping |
| * along a chain will exit ASAP. Breaking the chain is ok since we're |
| * nullifying the entire table. |
| */ |
| table[i].tag_fragment = fe_empty.tag_fragment; |
| /* We set the payload to target_delete to induce a cache exit. |
| * |
| * The target_delete path leads to a loss of information -- we can't |
| * tell what the src fragment was (the one that transitioned to the |
| * IBL code) and this in principle could weaken our RCT checks (see case |
| * 5085). In practical terms, RCT checks are unaffected since they |
| * are not employed on in-cache transitions such as an IBL hit. |
| * (All transitions to target_delete are a race along the hit path.) |
| * If we still want to preserve the src info, we can leave the payload |
| * as-is, possibly pointing to a cache address. The effect is that |
| * any thread accessing the old table on the IBL hit path will not exit |
| * the cache as early. (We should leave the fragment_t* value in the |
| * table untouched also so that the fragment_table_t is in a consistent |
| * state.) |
| */ |
| table[i].start_pc_fragment = target_delete; |
| } |
| STATS_INC(num_shared_ibt_table_flushes); |
| } |
| |
| /* Add an item to the dead tables list */ |
| static inline void |
| add_to_dead_table_list(dcontext_t *alloc_dc, ibl_table_t *ftable, |
| uint old_capacity, |
| fragment_entry_t *old_table_unaligned, uint old_ref_count, |
| uint old_table_flags) |
| { |
| dead_fragment_table_t *item =(dead_fragment_table_t*) |
| heap_alloc(GLOBAL_DCONTEXT, sizeof(dead_fragment_table_t) |
| HEAPACCT(ACCT_IBLTABLE)); |
| |
| LOG(GLOBAL, LOG_FRAGMENT, 2, |
| "add_to_dead_table_list %s "PFX" capacity %d\n", |
| ftable->name, old_table_unaligned, old_capacity); |
| ASSERT(old_ref_count >= 1); /* someone other the caller must be holding a reference */ |
| /* write lock must be held so that ref_count is copied accurately */ |
| ASSERT_TABLE_SYNCHRONIZED(ftable, WRITE); |
| item->capacity = old_capacity; |
| item->table_unaligned = old_table_unaligned; |
| item->table_flags = old_table_flags; |
| item->ref_count = old_ref_count; |
| item->next = NULL; |
| /* Add to the end of list. We use a FIFO because generally we'll be |
| * decrementing ref-counts for older tables before we do so for |
| * younger tables. A FIFO will yield faster searches than, say, a |
| * stack. |
| */ |
| mutex_lock(&dead_tables_lock); |
| if (dead_lists->dead_tables == NULL) { |
| ASSERT(dead_lists->dead_tables_tail == NULL); |
| dead_lists->dead_tables = item; |
| } |
| else { |
| ASSERT(dead_lists->dead_tables_tail != NULL); |
| ASSERT(dead_lists->dead_tables_tail->next == NULL); |
| dead_lists->dead_tables_tail->next = item; |
| } |
| dead_lists->dead_tables_tail = item; |
| mutex_unlock(&dead_tables_lock); |
| STATS_ADD_PEAK(num_dead_shared_ibt_tables, 1); |
| STATS_INC(num_total_dead_shared_ibt_tables); |
| } |
| |
| /* forward decl */ |
| static inline void |
| update_private_ptr_to_shared_ibt_table(dcontext_t *dcontext, |
| ibl_branch_type_t branch_type, bool trace, |
| bool adjust_old_ref_count, bool lock_table); |
| static void |
| hashtable_ibl_resized_custom(dcontext_t *dcontext, ibl_table_t *table, |
| uint old_capacity, fragment_entry_t *old_table, |
| fragment_entry_t *old_table_unaligned, |
| uint old_ref_count, uint old_table_flags) |
| { |
| dcontext_t *alloc_dc = FRAGMENT_TABLE_ALLOC_DC(dcontext, table->table_flags); |
| per_thread_t *pt = GET_PT(dcontext); |
| bool shared_ibt_table = |
| TESTALL(FRAG_TABLE_TARGET_SHARED | FRAG_TABLE_SHARED, table->table_flags); |
| ASSERT(TEST(FRAG_TABLE_IBL_TARGETED, table->table_flags)); |
| |
| /* If we change an ibl-targeted table, must patch up every |
| * inlined indirect exit stub that targets it. |
| * For our per-type ibl tables however we don't bother updating |
| * fragments _targeted_ by the resized table, instead we need to |
| * update all fragments that may be a source of an inlined IBL. |
| */ |
| |
| /* private inlined IBL heads targeting this table need to be updated */ |
| if (DYNAMO_OPTION(inline_trace_ibl) && PRIVATE_TRACES_ENABLED()) { |
| /* We'll get here on a trace table resize, while we |
| * need to patch only when the trace_ibt tables are resized. |
| */ |
| /* We assume we don't inline IBL lookup targeting tables of basic blocks |
| * and so shouldn't need to do this for now. */ |
| ASSERT(dcontext != GLOBAL_DCONTEXT && pt != NULL); /* private traces */ |
| if (TESTALL(FRAG_TABLE_INCLUSIVE_HIERARCHY | FRAG_TABLE_TRACE, |
| table->table_flags)) { |
| /* need to update all traces that could be targeting the |
| * currently resized table */ |
| LOG(THREAD, LOG_FRAGMENT, 2, |
| "\tIBL target table resizing: updating all private trace fragments\n"); |
| update_indirect_exit_stubs_from_table(dcontext, &pt->trace); |
| } |
| } |
| |
| /* if we change the trace table (or an IBL target trace |
| * table), must patch up every inlined indirect exit stub |
| * in all bb fragments in case the inlined target is the |
| * resized table |
| */ |
| if (DYNAMO_OPTION(inline_bb_ibl)) { |
| LOG(THREAD, LOG_FRAGMENT, 3, |
| "\tIBL target table resizing: updating bb fragments\n"); |
| update_indirect_exit_stubs_from_table(dcontext, &pt->bb); |
| } |
| |
| /* don't need to update any inlined lookups in shared fragments */ |
| |
| if (shared_ibt_table) { |
| if (old_ref_count > 0) { |
| /* The old table should be nullified ASAP. Since threads update |
| * their table pointers on-demand only when they exit the cache |
| * after a failed IBL lookup, they could have IBL targets for |
| * stale entries. This would likely occur only when there's an |
| * app race but in the future could occur due to cache |
| * management. |
| */ |
| safely_nullify_tables(dcontext, table, old_table, old_capacity); |
| add_to_dead_table_list(alloc_dc, table, old_capacity, |
| old_table_unaligned, |
| old_ref_count, table->table_flags); |
| } |
| /* Update the resizing thread's private ptr. */ |
| update_private_ptr_to_shared_ibt_table(dcontext, table->branch_type, |
| TEST(FRAG_TABLE_TRACE, |
| table->table_flags), |
| false, /* no adjust |
| * old ref-count */ |
| false /* already hold lock */); |
| ASSERT(table->ref_count == 1); |
| } |
| |
| /* CHECK: is it safe to update the table without holding the lock? */ |
| /* Using the table flags to drive the update of generated code may |
| * err on the side of caution, but it's the best way to guarantee |
| * that all of the necessary code is updated. |
| * We may perform extra unnecessary updates when a table that's |
| * accessed off of the dcontext/per_thread_t is grown, but that doesn't |
| * cause correctness problems and likely doesn't hurt peformance. |
| */ |
| STATS_INC(num_ibt_table_resizes); |
| update_generated_hashtable_access(dcontext); |
| } |
| |
| #ifdef DEBUG |
| static void |
| hashtable_ibl_study_custom(dcontext_t *dcontext, ibl_table_t *table, |
| uint entries_inc/*amnt table->entries was pre-inced*/) |
| { |
| # ifdef HASHTABLE_STATISTICS |
| /* For trace table(s) only, use stats from emitted ibl routines */ |
| if (TEST(FRAG_TABLE_IBL_TARGETED, table->table_flags) && |
| INTERNAL_OPTION(hashtable_ibl_stats)) { |
| per_thread_t *pt = GET_PT(dcontext); |
| ibl_branch_type_t branch_type; |
| |
| for (branch_type = IBL_BRANCH_TYPE_START; |
| branch_type < IBL_BRANCH_TYPE_END; branch_type++) { |
| /* This is convoluted since given a table we have to |
| * recover its branch type. |
| * FIXME: should simplify these assumptions one day |
| */ |
| /* Current table should be targeted only by one of the IBL routines */ |
| if (!((!DYNAMO_OPTION(disable_traces) && |
| table == &pt->trace_ibt[branch_type]) || |
| (DYNAMO_OPTION(bb_ibl_targets) && |
| table == &pt->bb_ibt[branch_type]))) |
| continue; |
| /* stats for lookup routines from bb's and trace's targeting the current table */ |
| print_hashtable_stats(dcontext, entries_inc == 0 ? "Total" : "Current", |
| table->name, |
| "trace ibl ", get_branch_type_name(branch_type), |
| &table->UNPROT_STAT(trace_ibl_stats[branch_type])); |
| print_hashtable_stats(dcontext, entries_inc == 0 ? "Total" : "Current", |
| table->name, |
| "bb ibl ", |
| get_branch_type_name(branch_type), |
| &table->UNPROT_STAT(bb_ibl_stats[branch_type])); |
| } |
| } |
| # endif /* HASHTABLE_STATISTICS */ |
| } |
| #endif /* DEBUG */ |
| |
| #if defined(DEBUG) || defined(CLIENT_INTERFACE) |
| /* filter specifies flags for fragments which are OK to be freed */ |
| /* NOTE - if this routine is ever used for non DEBUG purposes be aware that |
| * because of case 7697 we don't unlink when we free the hashtable elements. |
| * As such, if we aren't also freeing all fragments that could possibly link |
| * to fragments in this table at the same time (synchronously) we'll have |
| * problems (for ex. a trace only reset would need to unlink incoming, or |
| * allowing private->shared linking would need to ulink outgoing). |
| */ |
| static void |
| hashtable_fragment_reset(dcontext_t *dcontext, fragment_table_t *table) |
| { |
| int i; |
| fragment_t *f; |
| |
| /* case 7691: we now use separate ibl table types */ |
| ASSERT(!TEST(FRAG_TABLE_INCLUSIVE_HIERARCHY, table->table_flags)); |
| LOG(THREAD, LOG_FRAGMENT, 2, "hashtable_fragment_reset\n"); |
| DOLOG(1, LOG_FRAGMENT|LOG_STATS, { |
| hashtable_fragment_load_statistics(dcontext, table); |
| }); |
| if (TEST(FRAG_TABLE_SHARED, table->table_flags) && |
| TEST(FRAG_TABLE_IBL_TARGETED, table->table_flags)) { |
| DOLOG(4, LOG_FRAGMENT, { |
| hashtable_fragment_dump_table(dcontext, table); |
| }); |
| } |
| DODEBUG({ |
| hashtable_fragment_study(dcontext, table, 0/*table consistent*/); |
| /* ensure write lock is held if the table is shared, unless exiting |
| * or resetting (N.B.: if change reset model to not suspend all in-DR |
| * threads, will have to change this and handle rank order issues) |
| */ |
| if (!dynamo_exited && !dynamo_resetting) |
| ASSERT_TABLE_SYNCHRONIZED(table, WRITE); |
| }); |
| /* Go in reverse order (for efficiency) since using |
| * hashtable_fragment_remove_helper to keep all reachable, which is required |
| * for dynamo_resetting where we unlink fragments here and need to be able to |
| * perform lookups. |
| */ |
| i = table->capacity - 1 - 1 /* sentinel */; |
| while (i >= 0) { |
| f = table->table[i]; |
| if (f == &null_fragment) { |
| i--; |
| } else { /* i stays put */ |
| /* The shared BB table is reset at process reset or shutdown, so |
| * trace_abort() has already been called by (or for) every thread. |
| * If shared traces is true, by this point none of the shared BBs |
| * should have FRAG_TRACE_BUILDING set since the flag is cleared |
| * by trace_abort(). Of course, the flag shouldn't be present |
| * if shared traces is false so we don't need to conditionalize |
| * the assert. |
| */ |
| ASSERT(!TEST(FRAG_TRACE_BUILDING, f->flags)); |
| hashtable_fragment_remove_helper(table, i, &table->table[i]); |
| if (!REAL_FRAGMENT(f)) |
| continue; |
| /* make sure no other hashtable has shared fragments in it |
| * this routine is called on shared table, but only after dynamo_exited |
| * the per-thread IBL tables contain pointers to shared fragments |
| * and are OK |
| */ |
| ASSERT(dynamo_exited || !TEST(FRAG_SHARED, f->flags) || dynamo_resetting); |
| |
| # if defined(SIDELINE) && defined(PROFILE_LINKCOUNT) |
| if ((f->flags & FRAG_DO_NOT_SIDELINE) != 0) { |
| /* print out total count of exit counters */ |
| LOG(THREAD, LOG_SIDELINE, 2, "\tSidelined trace F%d total times executed: " |
| LINKCOUNT_FORMAT_STRING "\n", f->id, get_total_linkcount(f)); |
| } |
| # endif |
| if (TEST(FRAG_IS_FUTURE, f->flags)) { |
| DODEBUG({ ((future_fragment_t *)f)->incoming_stubs = NULL; }); |
| fragment_free_future(dcontext, (future_fragment_t *)f); |
| } else { |
| DOSTATS({ |
| if (dynamo_resetting) |
| STATS_INC(num_fragments_deleted_reset); |
| else |
| STATS_INC(num_fragments_deleted_exit); |
| }); |
| /* Xref 7697 - unlinking the fragments here can screw up the |
| * future table as we are walking in hash order, so we don't |
| * unlink. See note at top of routine for issues with not |
| * unlinking here if this code is ever used in non debug |
| * builds. */ |
| fragment_delete(dcontext, f, |
| FRAGDEL_NO_HTABLE | FRAGDEL_NO_UNLINK | |
| FRAGDEL_NEED_CHLINK_LOCK | |
| (dynamo_resetting ? 0 : FRAGDEL_NO_OUTPUT)); |
| } |
| } |
| } |
| table->entries = 0; |
| table->unlinked_entries = 0; |
| } |
| #endif /* DEBUG || CLIENT_INTERFACE */ |
| |
| /* |
| *******************************************************************************/ |
| |
| |
| #if defined(RETURN_AFTER_CALL) || defined (RCT_IND_BRANCH) |
| /******************************************************************************* |
| * APP_PC HASHTABLE INSTANTIATION |
| */ |
| /* FIXME: RCT tables no longer use future_fragment_t and can be moved out of fragment.c */ |
| |
| /* The ENTRY_* defines are undef-ed at end of hashtablex.h so we make our own. |
| * Would be nice to re-use ENTRY_IS_EMPTY, etc., though w/ multiple htables |
| * in same file can't realistically get away w/o custom defines like these: |
| */ |
| #define APP_PC_EMPTY (NULL) |
| /* assume 1 is always invalid address */ |
| #define APP_PC_SENTINEL ((app_pc)PTR_UINT_1) |
| #define APP_PC_ENTRY_IS_EMPTY(pc) ((pc) == APP_PC_EMPTY) |
| #define APP_PC_ENTRY_IS_SENTINEL(pc) ((pc) == APP_PC_SENTINEL) |
| #define APP_PC_ENTRY_IS_REAL(pc) (!APP_PC_ENTRY_IS_EMPTY(pc) && \ |
| !APP_PC_ENTRY_IS_SENTINEL(pc)) |
| /* 2 macros w/ name and types are duplicated in fragment.h -- keep in sync */ |
| #define NAME_KEY app_pc |
| #define ENTRY_TYPE app_pc |
| /* not defining HASHTABLE_USE_LOOKUPTABLE */ |
| #define ENTRY_TAG(f) ((ptr_uint_t)(f)) |
| #define ENTRY_EMPTY APP_PC_EMPTY |
| #define ENTRY_SENTINEL APP_PC_SENTINEL |
| #define ENTRY_IS_EMPTY(f) APP_PC_ENTRY_IS_EMPTY(f) |
| #define ENTRY_IS_SENTINEL(f) APP_PC_ENTRY_IS_SENTINEL(f) |
| #define ENTRY_IS_INVALID(f) (false) /* no invalid entries */ |
| #define ENTRIES_ARE_EQUAL(t,f,g) ((f) == (g)) |
| #define HASHTABLE_WHICH_HEAP(flags) (ACCT_AFTER_CALL) |
| #define HTLOCK_RANK app_pc_table_rwlock |
| #define HASHTABLE_SUPPORT_PERSISTENCE 1 |
| |
| #include "hashtablex.h" |
| /* all defines are undef-ed at end of hashtablex.h */ |
| |
| /* required routines for hashtable interface that we don't need for this instance */ |
| |
| static void |
| hashtable_app_pc_init_internal_custom(dcontext_t *dcontext, app_pc_table_t *htable) |
| { /* nothing */ |
| } |
| |
| static void |
| hashtable_app_pc_resized_custom(dcontext_t *dcontext, app_pc_table_t *htable, |
| uint old_capacity, app_pc *old_table, |
| app_pc *old_table_unaligned, |
| uint old_ref_count, uint old_table_flags) |
| { /* nothing */ |
| } |
| |
| # ifdef DEBUG |
| static void |
| hashtable_app_pc_study_custom(dcontext_t *dcontext, app_pc_table_t *htable, |
| uint entries_inc/*amnt table->entries was pre-inced*/) |
| { /* nothing */ |
| } |
| # endif |
| |
| static void |
| hashtable_app_pc_free_entry(dcontext_t *dcontext, app_pc_table_t *htable, |
| app_pc entry) |
| { |
| /* nothing to do, data is inlined */ |
| } |
| |
| #endif /* defined(RETURN_AFTER_CALL) || defined (RCT_IND_BRANCH) */ |
| /*******************************************************************************/ |
| |
| |
| bool |
| fragment_initialized(dcontext_t *dcontext) |
| { |
| return (dcontext != GLOBAL_DCONTEXT && dcontext->fragment_field != NULL); |
| } |
| |
| /* thread-shared initialization that should be repeated after a reset */ |
| void |
| fragment_reset_init(void) |
| { |
| /* case 7966: don't initialize at all for hotp_only & thin_client */ |
| if (RUNNING_WITHOUT_CODE_CACHE()) |
| return; |
| |
| mutex_lock(&shared_cache_flush_lock); |
| /* ASSUMPTION: a reset frees all deletions that use flushtimes, so we can |
| * reset the global flushtime here |
| */ |
| flushtime_global = 0; |
| mutex_unlock(&shared_cache_flush_lock); |
| |
| if (SHARED_FRAGMENTS_ENABLED()) { |
| if (DYNAMO_OPTION(shared_bbs)) { |
| hashtable_fragment_init(GLOBAL_DCONTEXT, shared_bb, |
| INIT_HTABLE_SIZE_SHARED_BB, |
| INTERNAL_OPTION(shared_bb_load), |
| (hash_function_t)INTERNAL_OPTION(alt_hash_func), |
| 0 /* hash_mask_offset */, |
| FRAG_TABLE_SHARED | FRAG_TABLE_TARGET_SHARED |
| _IF_DEBUG("shared_bb")); |
| } |
| if (DYNAMO_OPTION(shared_traces)) { |
| hashtable_fragment_init(GLOBAL_DCONTEXT, shared_trace, |
| INIT_HTABLE_SIZE_SHARED_TRACE, |
| INTERNAL_OPTION(shared_trace_load), |
| (hash_function_t)INTERNAL_OPTION(alt_hash_func), |
| 0 /* hash_mask_offset */, |
| FRAG_TABLE_SHARED | FRAG_TABLE_TARGET_SHARED |
| _IF_DEBUG("shared_trace")); |
| } |
| /* init routine will work for future_fragment_t* same as for fragment_t* */ |
| hashtable_fragment_init(GLOBAL_DCONTEXT, shared_future, |
| INIT_HTABLE_SIZE_SHARED_FUTURE, |
| INTERNAL_OPTION(shared_future_load), |
| (hash_function_t)INTERNAL_OPTION(alt_hash_func), |
| 0 /* hash_mask_offset */, |
| FRAG_TABLE_SHARED | FRAG_TABLE_TARGET_SHARED |
| _IF_DEBUG("shared_future")); |
| } |
| |
| if (SHARED_IBT_TABLES_ENABLED()) { |
| |
| ibl_branch_type_t branch_type; |
| |
| ASSERT(USE_SHARED_PT()); |
| |
| for (branch_type = IBL_BRANCH_TYPE_START; |
| branch_type < IBL_BRANCH_TYPE_END; branch_type++) { |
| if (DYNAMO_OPTION(shared_trace_ibt_tables)) { |
| hashtable_ibl_myinit(GLOBAL_DCONTEXT, &shared_pt->trace_ibt[branch_type], |
| DYNAMO_OPTION(shared_ibt_table_trace_init), |
| DYNAMO_OPTION(shared_ibt_table_trace_load), |
| HASH_FUNCTION_NONE, |
| HASHTABLE_IBL_OFFSET(branch_type), |
| branch_type, |
| false, /* no lookup table */ |
| FRAG_TABLE_SHARED | |
| FRAG_TABLE_TARGET_SHARED | |
| FRAG_TABLE_TRACE |
| _IF_DEBUG(ibl_trace_table_type_names[branch_type])); |
| #ifdef HASHTABLE_STATISTICS |
| if (INTERNAL_OPTION(hashtable_ibl_stats)) { |
| CHECK_UNPROT_STATS(&shared_pt->trace_ibt[branch_type]); |
| /* for compatibility using an entry in the per-branch type stats */ |
| INIT_HASHTABLE_STATS(shared_pt->trace_ibt[branch_type]. |
| UNPROT_STAT(trace_ibl_stats[branch_type])); |
| } else { |
| shared_pt->trace_ibt[branch_type].unprot_stats = NULL; |
| } |
| #endif /* HASHTABLE_STATISTICS */ |
| } |
| |
| if (DYNAMO_OPTION(shared_bb_ibt_tables)) { |
| hashtable_ibl_myinit(GLOBAL_DCONTEXT, &shared_pt->bb_ibt[branch_type], |
| DYNAMO_OPTION(shared_ibt_table_bb_init), |
| DYNAMO_OPTION(shared_ibt_table_bb_load), |
| HASH_FUNCTION_NONE, |
| HASHTABLE_IBL_OFFSET(branch_type), |
| branch_type, |
| false, /* no lookup table */ |
| FRAG_TABLE_SHARED | |
| FRAG_TABLE_TARGET_SHARED |
| _IF_DEBUG(ibl_bb_table_type_names[branch_type])); |
| /* mark as inclusive table for bb's - we in fact currently |
| * keep only frags that are not FRAG_IS_TRACE_HEAD */ |
| #ifdef HASHTABLE_STATISTICS |
| if (INTERNAL_OPTION(hashtable_ibl_stats)) { |
| /* for compatibility using an entry in the per-branch type stats */ |
| CHECK_UNPROT_STATS(&shared_pt->bb_ibt[branch_type]); |
| /* FIXME: we don't expect trace_ibl_stats yet */ |
| INIT_HASHTABLE_STATS(shared_pt->bb_ibt[branch_type]. |
| UNPROT_STAT(bb_ibl_stats[branch_type])); |
| } else { |
| shared_pt->bb_ibt[branch_type].unprot_stats = NULL; |
| } |
| #endif /* HASHTABLE_STATISTICS */ |
| } |
| } |
| } |
| |
| #ifdef SHARING_STUDY |
| if (INTERNAL_OPTION(fragment_sharing_study)) { |
| uint size = HASHTABLE_SIZE(SHARED_HASH_BITS) * sizeof(shared_entry_t*); |
| shared_blocks = (shared_entry_t**) global_heap_alloc(size HEAPACCT(ACCT_OTHER)); |
| memset(shared_blocks, 0, size); |
| shared_traces = (shared_entry_t**) global_heap_alloc(size HEAPACCT(ACCT_OTHER)); |
| memset(shared_traces, 0, size); |
| } |
| #endif |
| } |
| |
| /* thread-shared initialization */ |
| void |
| fragment_init() |
| { |
| /* case 7966: don't initialize at all for hotp_only & thin_client |
| * FIXME: could set initial sizes to 0 for all configurations, instead |
| */ |
| if (RUNNING_WITHOUT_CODE_CACHE()) |
| return; |
| |
| /* make sure fields are at same place */ |
| ASSERT(offsetof(fragment_t, flags) == offsetof(future_fragment_t, flags)); |
| ASSERT(offsetof(fragment_t, tag) == offsetof(future_fragment_t, tag)); |
| |
| /* ensure we can read this w/o a lock: no cache line crossing, please */ |
| ASSERT(ALIGNED(&flushtime_global, 4)); |
| |
| if (SHARED_FRAGMENTS_ENABLED()) { |
| /* tables are persistent across resets, only on heap for selfprot (case 7957) */ |
| if (DYNAMO_OPTION(shared_bbs)) { |
| shared_bb = HEAP_TYPE_ALLOC(GLOBAL_DCONTEXT, fragment_table_t, |
| ACCT_FRAG_TABLE, PROTECTED); |
| } |
| if (DYNAMO_OPTION(shared_traces)) { |
| shared_trace = HEAP_TYPE_ALLOC(GLOBAL_DCONTEXT, fragment_table_t, |
| ACCT_FRAG_TABLE, PROTECTED); |
| } |
| shared_future = HEAP_TYPE_ALLOC(GLOBAL_DCONTEXT, fragment_table_t, |
| ACCT_FRAG_TABLE, PROTECTED); |
| } |
| |
| if (USE_SHARED_PT()) |
| shared_pt = HEAP_TYPE_ALLOC(GLOBAL_DCONTEXT, per_thread_t, ACCT_OTHER, PROTECTED); |
| |
| if (SHARED_IBT_TABLES_ENABLED()) { |
| dead_lists = |
| HEAP_TYPE_ALLOC(GLOBAL_DCONTEXT, dead_table_lists_t, ACCT_OTHER, PROTECTED); |
| memset(dead_lists, 0, sizeof(*dead_lists)); |
| } |
| |
| fragment_reset_init(); |
| |
| #if defined(INTERNAL) || defined(CLIENT_INTERFACE) |
| if (TRACEDUMP_ENABLED() && DYNAMO_OPTION(shared_traces)) { |
| ASSERT(USE_SHARED_PT()); |
| shared_pt->tracefile = open_log_file("traces-shared", NULL, 0); |
| ASSERT(shared_pt->tracefile != INVALID_FILE); |
| init_trace_file(shared_pt); |
| } |
| #endif |
| } |
| |
| /* Free all thread-shared state not critical to forward progress; |
| * fragment_reset_init() will be called before continuing. |
| */ |
| void |
| fragment_reset_free(void) |
| { |
| /* case 7966: don't initialize at all for hotp_only & thin_client */ |
| if (RUNNING_WITHOUT_CODE_CACHE()) |
| return; |
| |
| /* We must study the ibl tables before the trace/bb tables so that we're |
| * not looking at freed entries |
| */ |
| if (SHARED_IBT_TABLES_ENABLED()) { |
| |
| ibl_branch_type_t branch_type; |
| dead_fragment_table_t *current, *next; |
| DEBUG_DECLARE(int table_count = 0;) |
| DEBUG_DECLARE(stats_int_t dead_tables = GLOBAL_STAT(num_dead_shared_ibt_tables);) |
| |
| for (branch_type = IBL_BRANCH_TYPE_START; |
| branch_type < IBL_BRANCH_TYPE_END; branch_type++) { |
| if (DYNAMO_OPTION(shared_trace_ibt_tables)) { |
| DOLOG(1, LOG_FRAGMENT|LOG_STATS, { |
| hashtable_ibl_load_statistics(GLOBAL_DCONTEXT, |
| &shared_pt->trace_ibt[branch_type]); |
| }); |
| hashtable_ibl_myfree(GLOBAL_DCONTEXT, |
| &shared_pt->trace_ibt[branch_type]); |
| } |
| if (DYNAMO_OPTION(shared_bb_ibt_tables)) { |
| DOLOG(1, LOG_FRAGMENT|LOG_STATS, { |
| hashtable_ibl_load_statistics(GLOBAL_DCONTEXT, |
| &shared_pt->bb_ibt[branch_type]); |
| }); |
| hashtable_ibl_myfree(GLOBAL_DCONTEXT, |
| &shared_pt->bb_ibt[branch_type]); |
| } |
| } |
| |
| /* Delete dead tables. */ |
| /* grab lock for consistency, although we expect a single thread */ |
| mutex_lock(&dead_tables_lock); |
| current = dead_lists->dead_tables; |
| while (current != NULL) { |
| DODEBUG({table_count++;}); |
| next = current->next; |
| LOG(GLOBAL, LOG_FRAGMENT, 2, |
| "fragment_reset_free: dead table "PFX" cap %d, freeing\n", |
| current->table_unaligned, current->capacity); |
| hashtable_ibl_free_table(GLOBAL_DCONTEXT, current->table_unaligned, |
| current->table_flags, current->capacity); |
| heap_free(GLOBAL_DCONTEXT, current, sizeof(dead_fragment_table_t) |
| HEAPACCT(ACCT_IBLTABLE)); |
| STATS_DEC(num_dead_shared_ibt_tables); |
| STATS_INC(num_dead_shared_ibt_tables_freed); |
| current = next; |
| DODEBUG({ |
| if (dynamo_exited) |
| STATS_INC(num_dead_shared_ibt_tables_freed_at_exit); |
| }); |
| } |
| dead_lists->dead_tables = dead_lists->dead_tables_tail = NULL; |
| ASSERT(table_count == dead_tables); |
| mutex_unlock(&dead_tables_lock); |
| } |
| |
| /* FIXME: Take in a flag "permanent" that controls whether exiting or |
| * resetting. If resetting only, do not free unprot stats and entry stats |
| * (they're already in persistent heap, but we explicitly free them). |
| * This will be easy w/ unprot but will take work for entry stats |
| * since they resize themselves. |
| * Or, move them both to a new unprot and nonpersistent heap so we can |
| * actually free the memory back to the os, if we don't care to keep |
| * the stats across the reset. |
| */ |
| /* N.B.: to avoid rank order issues w/ shared_vm_areas lock being acquired |
| * after table_rwlock we do NOT grab the write lock before calling |
| * reset on the shared tables! We assume that reset involves suspending |
| * all other threads in DR and there will be no races. If the reset model |
| * changes, the lock order will have to be addressed. |
| */ |
| if (SHARED_FRAGMENTS_ENABLED()) { |
| /* clean up pending delayed deletion, if any */ |
| vm_area_check_shared_pending(GLOBAL_DCONTEXT/*== safe to free all*/, NULL); |
| |
| if (DYNAMO_OPTION(coarse_units)) { |
| /* We need to free coarse units earlier than vm_areas_exit() so we |
| * call it here. Must call before we free fine fragments so coarse |
| * can clean up incoming pointers. |
| */ |
| vm_area_coarse_units_reset_free(); |
| } |
| |
| #if defined(DEBUG) || defined(CLIENT_INTERFACE) |
| /* We need for CLIENT_INTERFACE to get fragment deleted events. */ |
| # if !defined(DEBUG) && defined(CLIENT_INTERFACE) |
| if (dr_fragment_deleted_hook_exists()) { |
| # endif |
| if (DYNAMO_OPTION(shared_bbs)) |
| hashtable_fragment_reset(GLOBAL_DCONTEXT, shared_bb); |
| if (DYNAMO_OPTION(shared_traces)) |
| hashtable_fragment_reset(GLOBAL_DCONTEXT, shared_trace); |
| DODEBUG({hashtable_fragment_reset(GLOBAL_DCONTEXT, shared_future);}); |
| # if !defined(DEBUG) && defined(CLIENT_INTERFACE) |
| } |
| # endif |
| #endif |
| |
| if (DYNAMO_OPTION(shared_bbs)) |
| hashtable_fragment_free(GLOBAL_DCONTEXT, shared_bb); |
| if (DYNAMO_OPTION(shared_traces)) |
| hashtable_fragment_free(GLOBAL_DCONTEXT, shared_trace); |
| hashtable_fragment_free(GLOBAL_DCONTEXT, shared_future); |
| /* Do NOT free RAC table as its state cannot be rebuilt. |
| * We also do not free other RCT tables to avoid the time to rebuild them. |
| */ |
| } |
| |
| #ifdef SHARING_STUDY |
| if (INTERNAL_OPTION(fragment_sharing_study)) { |
| print_shared_stats(); |
| reset_shared_block_table(shared_blocks, &shared_blocks_lock); |
| reset_shared_block_table(shared_traces, &shared_traces_lock); |
| } |
| #endif |
| } |
| |
| /* free all state */ |
| void |
| fragment_exit() |
| { |
| /* case 7966: don't initialize at all for hotp_only & thin_client |
| * FIXME: could set initial sizes to 0 for all configurations, instead |
| */ |
| if (RUNNING_WITHOUT_CODE_CACHE()) |
| goto cleanup; |
| |
| #if defined(INTERNAL) || defined(CLIENT_INTERFACE) |
| if (TRACEDUMP_ENABLED() && DYNAMO_OPTION(shared_traces)) { |
| /* write out all traces prior to deleting any, so links print nicely */ |
| uint i; |
| fragment_t *f; |
| /* change_linking_lock is required for output_trace(), though there |
| * won't be any races at this point of exiting. |
| */ |
| acquire_recursive_lock(&change_linking_lock); |
| TABLE_RWLOCK(shared_trace, read, lock); |
| for (i = 0; i < shared_trace->capacity; i++) { |
| f = shared_trace->table[i]; |
| if (!REAL_FRAGMENT(f)) |
| continue; |
| if (SHOULD_OUTPUT_FRAGMENT(f->flags)) |
| output_trace(GLOBAL_DCONTEXT, shared_pt, f, -1); |
| } |
| TABLE_RWLOCK(shared_trace, read, unlock); |
| release_recursive_lock(&change_linking_lock); |
| exit_trace_file(shared_pt); |
| } |
| #endif |
| |
| #ifdef FRAGMENT_SIZES_STUDY |
| DOLOG(1, (LOG_FRAGMENT|LOG_STATS), { |
| print_size_results(); |
| }); |
| #endif |
| |
| fragment_reset_free(); |
| |
| #ifdef RETURN_AFTER_CALL |
| if (dynamo_options.ret_after_call && rac_non_module_table.live_table != NULL) { |
| DODEBUG({ |
| DOLOG(1, LOG_FRAGMENT|LOG_STATS, { |
| hashtable_app_pc_load_statistics(GLOBAL_DCONTEXT, |
| rac_non_module_table.live_table); |
| }); |
| hashtable_app_pc_study(GLOBAL_DCONTEXT, rac_non_module_table.live_table, |
| 0/*table consistent*/); |
| }); |
| hashtable_app_pc_free(GLOBAL_DCONTEXT, rac_non_module_table.live_table); |
| HEAP_TYPE_FREE(GLOBAL_DCONTEXT, rac_non_module_table.live_table, |
| app_pc_table_t, ACCT_AFTER_CALL, PROTECTED); |
| rac_non_module_table.live_table = NULL; |
| } |
| ASSERT(rac_non_module_table.persisted_table == NULL); |
| DELETE_LOCK(after_call_lock); |
| #endif |
| |
| #if defined(RCT_IND_BRANCH) && defined(UNIX) |
| /* we do not free these tables in fragment_reset_free() b/c we |
| * would just have to build them all back up again in order to |
| * continue execution |
| */ |
| if ((TEST(OPTION_ENABLED, DYNAMO_OPTION(rct_ind_call)) || |
| TEST(OPTION_ENABLED, DYNAMO_OPTION(rct_ind_jump))) && |
| rct_global_table.live_table != NULL) { |
| DODEBUG({ |
| DOLOG(1, LOG_FRAGMENT|LOG_STATS, { |
| hashtable_app_pc_load_statistics(GLOBAL_DCONTEXT, |
| rct_global_table.live_table); |
| }); |
| hashtable_app_pc_study(GLOBAL_DCONTEXT, rct_global_table.live_table, |
| 0/*table consistent*/); |
| }); |
| hashtable_app_pc_free(GLOBAL_DCONTEXT, rct_global_table.live_table); |
| HEAP_TYPE_FREE(GLOBAL_DCONTEXT, rct_global_table.live_table, app_pc_table_t, |
| ACCT_AFTER_CALL, PROTECTED); |
| rct_global_table.live_table = NULL; |
| } else |
| ASSERT(rct_global_table.live_table == NULL); |
| ASSERT(rct_global_table.persisted_table == NULL); |
| #endif /* RCT_IND_BRANCH */ |
| |
| if (SHARED_FRAGMENTS_ENABLED()) { |
| /* tables are persistent across resets, only on heap for selfprot (case 7957) */ |
| if (DYNAMO_OPTION(shared_bbs)) { |
| HEAP_TYPE_FREE(GLOBAL_DCONTEXT, shared_bb, fragment_table_t, |
| ACCT_FRAG_TABLE, PROTECTED); |
| shared_bb = NULL; |
| } else |
| ASSERT(shared_bb == NULL); |
| if (DYNAMO_OPTION(shared_traces)) { |
| HEAP_TYPE_FREE(GLOBAL_DCONTEXT, shared_trace, fragment_table_t, |
| ACCT_FRAG_TABLE, PROTECTED); |
| shared_trace = NULL; |
| } else |
| ASSERT(shared_trace == NULL); |
| HEAP_TYPE_FREE(GLOBAL_DCONTEXT, shared_future, fragment_table_t, |
| ACCT_FRAG_TABLE, PROTECTED); |
| shared_future = NULL; |
| } |
| |
| if (SHARED_IBT_TABLES_ENABLED()) { |
| HEAP_TYPE_FREE(GLOBAL_DCONTEXT, dead_lists, dead_table_lists_t, |
| ACCT_OTHER, PROTECTED); |
| dead_lists = NULL; |
| } else |
| ASSERT(dead_lists == NULL); |
| |
| if (USE_SHARED_PT()) { |
| ASSERT(shared_pt != NULL); |
| HEAP_TYPE_FREE(GLOBAL_DCONTEXT, shared_pt, per_thread_t, ACCT_OTHER, PROTECTED); |
| shared_pt = NULL; |
| } else |
| ASSERT(shared_pt == NULL); |
| |
| if (SHARED_IBT_TABLES_ENABLED()) |
| DELETE_LOCK(dead_tables_lock); |
| #ifdef SHARING_STUDY |
| if (INTERNAL_OPTION(fragment_sharing_study)) { |
| DELETE_LOCK(shared_blocks_lock); |
| DELETE_LOCK(shared_traces_lock); |
| } |
| #endif |
| cleanup: |
| /* FIXME: we shouldn't need these locks anyway for hotp_only & thin_client */ |
| #if defined(INTERNAL) || defined(CLIENT_INTERFACE) |
| DELETE_LOCK(tracedump_mutex); |
| #endif |
| #ifdef CLIENT_INTERFACE |
| process_client_flush_requests(NULL, GLOBAL_DCONTEXT, client_flush_requests, |
| false /* no flush */); |
| DELETE_LOCK(client_flush_request_lock); |
| #endif |
| DELETE_LOCK(shared_cache_flush_lock); |
| } |
| |
| /* Decrement the ref-count for any reference to table that the |
| * per_thread_t contains. If could_be_live is true, will acquire write |
| * locks for the currently live tables. */ |
| /* NOTE: Can't inline in release build -- too many call sites? */ |
| static /* inline */ void |
| dec_table_ref_count(dcontext_t *dcontext, ibl_table_t *table, bool could_be_live) |
| { |
| ibl_table_t *live_table = NULL; |
| ibl_branch_type_t branch_type; |
| |
| /* Search live tables. A live table's ref-count is decremented |
| * during a thread exit. */ |
| /* FIXME If the table is more likely to be dead, we can reverse the order |
| * and search dead tables first. */ |
| if (!DYNAMO_OPTION(ref_count_shared_ibt_tables)) |
| return; |
| ASSERT(TESTALL(FRAG_TABLE_SHARED | FRAG_TABLE_IBL_TARGETED, |
| table->table_flags)); |
| if (could_be_live) { |
| for (branch_type = IBL_BRANCH_TYPE_START; |
| branch_type < IBL_BRANCH_TYPE_END; branch_type++) { |
| /* We match based on lookup table addresses. We need to lock the table |
| * during the compare and hold the lock during the ref-count dec to |
| * prevent a race with it being moved to the dead list. |
| */ |
| ibl_table_t *sh_table_ptr = TEST(FRAG_TABLE_TRACE, table->table_flags) ? |
| &shared_pt->trace_ibt[branch_type] : &shared_pt->bb_ibt[branch_type]; |
| TABLE_RWLOCK(sh_table_ptr, write, lock); |
| if (table->table == sh_table_ptr->table) { |
| live_table = sh_table_ptr; |
| break; |
| } |
| TABLE_RWLOCK(sh_table_ptr, write, unlock); |
| } |
| } |
| if (live_table != NULL) { |
| /* During shutdown, the ref-count can reach 0. The table is freed |
| * in the fragment_exit() path. */ |
| ASSERT(live_table->ref_count >= 1); |
| live_table->ref_count--; |
| TABLE_RWLOCK(live_table, write, unlock); |
| } |
| else { /* Search the dead tables list. */ |
| dead_fragment_table_t *current = dead_lists->dead_tables; |
| dead_fragment_table_t *prev = NULL; |
| |
| ASSERT(dead_lists->dead_tables != NULL); |
| ASSERT(dead_lists->dead_tables_tail != NULL); |
| /* We expect to be removing from the head of the list but due to |
| * races could be removing from the middle, i.e., if a preceding |
| * entry is about to be removed by another thread but the |
| * dead_tables_lock hasn't been acquired yet by that thread. |
| */ |
| mutex_lock(&dead_tables_lock); |
| for (current = dead_lists->dead_tables; current != NULL; |
| prev = current, current = current->next) { |
| if (current->table_unaligned == table->table_unaligned) { |
| ASSERT_CURIOSITY(current->ref_count >= 1); |
| current->ref_count--; |
| if (current->ref_count == 0) { |
| LOG(GLOBAL, LOG_FRAGMENT, 2, |
| "dec_table_ref_count: table "PFX" cap %d at ref 0, freeing\n", |
| current->table_unaligned, current->capacity); |
| /* Unlink this table from the list. */ |
| if (prev != NULL) |
| prev->next = current->next; |
| if (current == dead_lists->dead_tables) { |
| /* remove from the front */ |
| ASSERT(prev == NULL); |
| dead_lists->dead_tables = current->next; |
| } |
| if (current == dead_lists->dead_tables_tail) |
| dead_lists->dead_tables_tail = prev; |
| hashtable_ibl_free_table(GLOBAL_DCONTEXT, |
| current->table_unaligned, |
| current->table_flags, |
| current->capacity); |
| heap_free(GLOBAL_DCONTEXT, current, sizeof(dead_fragment_table_t) |
| HEAPACCT(ACCT_IBLTABLE)); |
| STATS_DEC(num_dead_shared_ibt_tables); |
| STATS_INC(num_dead_shared_ibt_tables_freed); |
| } |
| break; |
| } |
| } |
| mutex_unlock(&dead_tables_lock); |
| ASSERT(current != NULL); |
| } |
| } |
| |
| /* Decrement the ref-count for every shared IBT table that the |
| * per_thread_t has a reference to. */ |
| static void |
| dec_all_table_ref_counts(dcontext_t *dcontext, per_thread_t *pt) |
| { |
| /* We can also decrement ref-count for dead shared tables here. */ |
| if (SHARED_IBT_TABLES_ENABLED()) { |
| ibl_branch_type_t branch_type; |
| for (branch_type = IBL_BRANCH_TYPE_START; |
| branch_type < IBL_BRANCH_TYPE_END; branch_type++) { |
| if (DYNAMO_OPTION(shared_trace_ibt_tables)) { |
| ASSERT(pt->trace_ibt[branch_type].table != NULL); |
| dec_table_ref_count(dcontext, &pt->trace_ibt[branch_type], |
| true/*check live*/); |
| } |
| if (DYNAMO_OPTION(shared_bb_ibt_tables)) { |
| ASSERT(pt->bb_ibt[branch_type].table != NULL); |
| dec_table_ref_count(dcontext, &pt->bb_ibt[branch_type], |
| true/*check live*/); |
| } |
| } |
| } |
| } |
| |
| /* re-initializes non-persistent memory */ |
| void |
| fragment_thread_reset_init(dcontext_t *dcontext) |
| { |
| per_thread_t *pt; |
| ibl_branch_type_t branch_type; |
| |
| /* case 7966: don't initialize at all for hotp_only & thin_client */ |
| if (RUNNING_WITHOUT_CODE_CACHE()) |
| return; |
| |
| pt = (per_thread_t *) dcontext->fragment_field; |
| |
| /* important to init w/ cur timestamp to avoid this thread dec-ing ref |
| * count when it wasn't included in ref count init value! |
| * assumption: don't need lock to read flushtime_global atomically. |
| * when resetting, though, thread free & re-init is done before global free, |
| * so we have to explicitly set to 0 for that case. |
| */ |
| pt->flushtime_last_update = (dynamo_resetting) ? 0 : flushtime_global; |
| |
| /* set initial hashtable sizes */ |
| hashtable_fragment_init(dcontext, &pt->bb, INIT_HTABLE_SIZE_BB, |
| INTERNAL_OPTION(private_bb_load), |
| (hash_function_t)INTERNAL_OPTION(alt_hash_func), |
| 0, 0 _IF_DEBUG("bblock")); |
| |
| /* init routine will work for future_fragment_t* same as for fragment_t* */ |
| hashtable_fragment_init(dcontext, &pt->future, INIT_HTABLE_SIZE_FUTURE, |
| INTERNAL_OPTION(private_future_load), |
| (hash_function_t)INTERNAL_OPTION(alt_hash_func), |
| 0 /* hash_mask_offset */, 0 |
| _IF_DEBUG("future")); |
| |
| /* The trace table now is not used by IBL routines, and |
| * therefore doesn't need a lookup table, we can also use the |
| * alternative hash functions and use a higher load. |
| */ |
| if (PRIVATE_TRACES_ENABLED()) { |
| hashtable_fragment_init(dcontext, &pt->trace, INIT_HTABLE_SIZE_TRACE, |
| INTERNAL_OPTION(private_trace_load), |
| (hash_function_t)INTERNAL_OPTION(alt_hash_func), |
| 0 /* hash_mask_offset */, |
| FRAG_TABLE_TRACE |
| _IF_DEBUG("trace")); |
| } |
| |
| /* We'll now have more control over hashtables based on branch |
| * type. The most important of all is of course the return |
| * target table. These tables should be populated only when |
| * we know that the entry is a valid target, a trace is |
| * created, and it is indeed targeted by an IBL. |
| */ |
| /* These tables are targeted by both bb and trace routines */ |
| for (branch_type = IBL_BRANCH_TYPE_START; |
| branch_type < IBL_BRANCH_TYPE_END; branch_type++) { |
| if (!DYNAMO_OPTION(disable_traces) |
| /* If no traces and no bb ibl targets we point ibl at |
| * an empty trace table */ |
| || !DYNAMO_OPTION(bb_ibl_targets)) { |
| if (!DYNAMO_OPTION(shared_trace_ibt_tables)) { |
| hashtable_ibl_myinit(dcontext, &pt->trace_ibt[branch_type], |
| DYNAMO_OPTION(private_trace_ibl_targets_init), |
| DYNAMO_OPTION(private_ibl_targets_load), |
| HASH_FUNCTION_NONE, |
| HASHTABLE_IBL_OFFSET(branch_type), |
| branch_type, |
| false, /* no lookup table */ |
| (DYNAMO_OPTION(shared_traces) ? |
| FRAG_TABLE_TARGET_SHARED : 0) | |
| FRAG_TABLE_TRACE |
| _IF_DEBUG(ibl_trace_table_type_names[branch_type])); |
| #ifdef HASHTABLE_STATISTICS |
| if (INTERNAL_OPTION(hashtable_ibl_stats)) { |
| CHECK_UNPROT_STATS(pt->trace_ibt[branch_type]); |
| /* for compatibility using an entry in the per-branch type stats */ |
| INIT_HASHTABLE_STATS(pt->trace_ibt[branch_type]. |
| UNPROT_STAT(trace_ibl_stats[branch_type])); |
| } else { |
| pt->trace_ibt[branch_type].unprot_stats = NULL; |
| } |
| #endif /* HASHTABLE_STATISTICS */ |
| } |
| else { |
| /* ensure table from last time (if we had a reset) not still there */ |
| memset(&pt->trace_ibt[branch_type], 0, sizeof(pt->trace_ibt[branch_type])); |
| update_private_ptr_to_shared_ibt_table(dcontext, branch_type, |
| true, /* trace = yes */ |
| false, /* no adjust old |
| * ref-count */ |
| true /* lock */); |
| #ifdef HASHTABLE_STATISTICS |
| if (INTERNAL_OPTION(hashtable_ibl_stats)) { |
| ALLOC_UNPROT_STATS(dcontext, &pt->trace_ibt[branch_type]); |
| CHECK_UNPROT_STATS(pt->trace_ibt[branch_type]); |
| INIT_HASHTABLE_STATS(pt->trace_ibt[branch_type]. |
| UNPROT_STAT(trace_ibl_stats[branch_type])); |
| } else { |
| pt->trace_ibt[branch_type].unprot_stats = NULL; |
| } |
| #endif |
| } |
| } |
| |
| /* When targetting BBs, currently the source is assumed to be only a |
| * bb since traces going to a bb for the first time should mark it |
| * as a trace head. Therefore the tables are currently only |
| * targeted by bb IBL routines. It will be possible to later |
| * deal with trace heads and allow a trace to target a BB with |
| * the intent of modifying its THCI. |
| * |
| * (FIXME: having another table for THCI IBLs seems better than |
| * adding a counter (starting at -1) to all blocks and |
| * trapping when 0 for marking a trace head and again at 50 |
| * for creating a trace. And that is all of course after proving |
| * that doing it in DR has significant impact.) |
| * |
| * Note that private bb2bb transitions are not captured when |
| * we run with -shared_bbs. |
| */ |
| |
| /* These tables should be populated only when we know that the |
| * entry is a valid target, and it is indeed targeted by an |
| * IBL. They have to be per-type so that our security |
| * policies are properly checked. |
| */ |
| if (DYNAMO_OPTION(bb_ibl_targets)) { |
| if (!DYNAMO_OPTION(shared_bb_ibt_tables)) { |
| hashtable_ibl_myinit(dcontext, &pt->bb_ibt[branch_type], |
| DYNAMO_OPTION(private_bb_ibl_targets_init), |
| DYNAMO_OPTION(private_bb_ibl_targets_load), |
| HASH_FUNCTION_NONE, |
| HASHTABLE_IBL_OFFSET(branch_type), |
| branch_type, |
| false, /* no lookup table */ |
| (DYNAMO_OPTION(shared_bbs) ? |
| FRAG_TABLE_TARGET_SHARED : 0) |
| _IF_DEBUG(ibl_bb_table_type_names[branch_type])); |
| /* mark as inclusive table for bb's - we in fact currently |
| * keep only frags that are not FRAG_IS_TRACE_HEAD */ |
| #ifdef HASHTABLE_STATISTICS |
| if (INTERNAL_OPTION(hashtable_ibl_stats)) { |
| /* for compatibility using an entry in the per-branch type stats */ |
| CHECK_UNPROT_STATS(pt->bb_ibt[branch_type]); |
| /* FIXME: we don't expect trace_ibl_stats yet */ |
| INIT_HASHTABLE_STATS(pt->bb_ibt[branch_type]. |
| UNPROT_STAT(bb_ibl_stats[branch_type])); |
| } else { |
| pt->bb_ibt[branch_type].unprot_stats = NULL; |
| } |
| #endif /* HASHTABLE_STATISTICS */ |
| } |
| else { |
| /* ensure table from last time (if we had a reset) not still there */ |
| memset(&pt->bb_ibt[branch_type], 0, sizeof(pt->bb_ibt[branch_type])); |
| update_private_ptr_to_shared_ibt_table(dcontext, branch_type, |
| false, /* trace = no */ |
| false, /* no adjust old |
| * ref-count */ |
| true /* lock */); |
| #ifdef HASHTABLE_STATISTICS |
| if (INTERNAL_OPTION(hashtable_ibl_stats)) { |
| ALLOC_UNPROT_STATS(dcontext, &pt->bb_ibt[branch_type]); |
| CHECK_UNPROT_STATS(pt->bb_ibt[branch_type]); |
| INIT_HASHTABLE_STATS(pt->bb_ibt[branch_type]. |
| UNPROT_STAT(trace_ibl_stats[branch_type])); |
| } else { |
| pt->bb_ibt[branch_type].unprot_stats = NULL; |
| } |
| #endif |
| } |
| } |
| } |
| ASSERT(IBL_BRANCH_TYPE_END == 3); |
| |
| update_generated_hashtable_access(dcontext); |
| } |
| |
| void |
| fragment_thread_init(dcontext_t *dcontext) |
| { |
| /* we allocate per_thread_t in the global heap solely for self-protection, |
| * even when turned off, since even with a lot of threads this isn't a lot of |
| * pressure on the global heap |
| */ |
| per_thread_t *pt; |
| |
| /* case 7966: don't initialize un-needed data for hotp_only & thin_client. |
| * FIXME: could set htable initial sizes to 0 for all configurations, instead. |
| * per_thread_t is pretty big, so we avoid it, though it costs us checks for |
| * hotp_only in the islinking-related routines. |
| */ |
| if (RUNNING_WITHOUT_CODE_CACHE()) |
| return; |
| |
| pt = (per_thread_t *) global_heap_alloc(sizeof(per_thread_t) HEAPACCT(ACCT_OTHER)); |
| dcontext->fragment_field = (void *) pt; |
| |
| fragment_thread_reset_init(dcontext); |
| |
| #if defined(INTERNAL) || defined(CLIENT_INTERFACE) |
| if (TRACEDUMP_ENABLED() && PRIVATE_TRACES_ENABLED()) { |
| pt->tracefile = open_log_file("traces", NULL, 0); |
| ASSERT(pt->tracefile != INVALID_FILE); |
| init_trace_file(pt); |
| } |
| #endif |
| #if defined(CLIENT_INTERFACE) && defined(CLIENT_SIDELINE) |
| ASSIGN_INIT_LOCK_FREE(pt->fragment_delete_mutex, fragment_delete_mutex); |
| #endif |
| |
| pt->could_be_linking = false; |
| pt->wait_for_unlink = false; |
| pt->about_to_exit = false; |
| pt->flush_queue_nonempty = false; |
| pt->waiting_for_unlink = create_event(); |
| pt->finished_with_unlink = create_event(); |
| ASSIGN_INIT_LOCK_FREE(pt->linking_lock, linking_lock); |
| pt->finished_all_unlink = create_event(); |
| pt->soon_to_be_linking = false; |
| pt->at_syscall_at_flush = false; |
| |
| #ifdef PROFILE_LINKCOUNT |
| pt->tracedump_num_below_threshold = 0; |
| pt->tracedump_count_below_threshold = (linkcount_type_t) 0; |
| #endif |
| } |
| |
| static bool |
| check_flush_queue(dcontext_t *dcontext, fragment_t *was_I_flushed); |
| |
| /* frees all non-persistent memory */ |
| void |
| fragment_thread_reset_free(dcontext_t *dcontext) |
| { |
| per_thread_t *pt = (per_thread_t *) dcontext->fragment_field; |
| DEBUG_DECLARE(ibl_branch_type_t branch_type;) |
| |
| /* case 7966: don't initialize at all for hotp_only & thin_client */ |
| if (RUNNING_WITHOUT_CODE_CACHE()) |
| return; |
| |
| /* Dec ref count on any shared tables that are pointed to. */ |
| dec_all_table_ref_counts(dcontext, pt); |
| |
| #ifdef DEBUG |
| /* for non-debug we do fast exit path and don't free local heap */ |
| SELF_PROTECT_CACHE(dcontext, NULL, WRITABLE); |
| |
| /* we remove flushed fragments from the htable, and they can be |
| * flushed after enter_threadexit() due to os_thread_stack_exit(), |
| * so we need to check the flush queue here |
| */ |
| mutex_lock(&pt->linking_lock); |
| check_flush_queue(dcontext, NULL); |
| mutex_unlock(&pt->linking_lock); |
| |
| /* For consistency we remove entries from the IBL targets |
| * tables before we remove them from the trace table. However, |
| * we cannot free any fragments because for sure all of them will |
| * be present in the trace table. |
| */ |
| for (branch_type = IBL_BRANCH_TYPE_START; |
| branch_type < IBL_BRANCH_TYPE_END; branch_type++) { |
| if (!DYNAMO_OPTION(disable_traces) |
| /* If no traces and no bb ibl targets we point ibl at |
| * an empty trace table */ |
| || !DYNAMO_OPTION(bb_ibl_targets)) { |
| if (!DYNAMO_OPTION(shared_trace_ibt_tables)) { |
| DOLOG(2, LOG_FRAGMENT, { |
| hashtable_ibl_dump_table(dcontext, &pt->trace_ibt[branch_type]); |
| }); |
| DOLOG(1, LOG_FRAGMENT|LOG_STATS, { |
| hashtable_ibl_load_statistics(dcontext, &pt->trace_ibt[branch_type]); |
| }); |
| hashtable_ibl_myfree(dcontext, &pt->trace_ibt[branch_type]); |
| } else { |
| # ifdef HASHTABLE_STATISTICS |
| if (INTERNAL_OPTION(hashtable_ibl_stats)) { |
| print_hashtable_stats(dcontext, "Total", |
| shared_pt->trace_ibt[branch_type].name, |
| "trace ibl ", |
| get_branch_type_name(branch_type), |
| &pt->trace_ibt[branch_type]. |
| UNPROT_STAT(trace_ibl_stats[branch_type])); |
| DEALLOC_UNPROT_STATS(dcontext, &pt->trace_ibt[branch_type]); |
| } |
| # endif |
| memset(&pt->trace_ibt[branch_type], 0, |
| sizeof(pt->trace_ibt[branch_type])); |
| } |
| } |
| if (DYNAMO_OPTION(bb_ibl_targets)) { |
| if (!DYNAMO_OPTION(shared_bb_ibt_tables)) { |
| DOLOG(2, LOG_FRAGMENT, { |
| hashtable_ibl_dump_table(dcontext, &pt->bb_ibt[branch_type]); |
| }); |
| DOLOG(1, LOG_FRAGMENT|LOG_STATS, { |
| hashtable_ibl_load_statistics(dcontext, &pt->bb_ibt[branch_type]); |
| }); |
| hashtable_ibl_myfree(dcontext, &pt->bb_ibt[branch_type]); |
| } else { |
| # ifdef HASHTABLE_STATISTICS |
| if (INTERNAL_OPTION(hashtable_ibl_stats)) { |
| print_hashtable_stats(dcontext, "Total", |
| shared_pt->bb_ibt[branch_type].name, |
| "bb ibl ", |
| get_branch_type_name(branch_type), |
| &pt->bb_ibt[branch_type]. |
| UNPROT_STAT(bb_ibl_stats[branch_type])); |
| DEALLOC_UNPROT_STATS(dcontext, &pt->bb_ibt[branch_type]); |
| } |
| # endif |
| memset(&pt->bb_ibt[branch_type], 0, sizeof(pt->bb_ibt[branch_type])); |
| } |
| } |
| } |
| |
| /* case 7653: we can't free the main tables prior to freeing the contents |
| * of all of them, as link freeing involves looking up in the other tables. |
| */ |
| if (PRIVATE_TRACES_ENABLED()) { |
| DOLOG(1, LOG_FRAGMENT|LOG_STATS, { |
| hashtable_fragment_load_statistics(dcontext, &pt->trace); |
| }); |
| hashtable_fragment_reset(dcontext, &pt->trace); |
| } |
| DOLOG(1, LOG_FRAGMENT|LOG_STATS, { |
| hashtable_fragment_load_statistics(dcontext, &pt->bb); |
| }); |
| hashtable_fragment_reset(dcontext, &pt->bb); |
| DOLOG(1, LOG_FRAGMENT|LOG_STATS, { |
| hashtable_fragment_load_statistics(dcontext, &pt->future); |
| }); |
| hashtable_fragment_reset(dcontext, &pt->future); |
| |
| if (PRIVATE_TRACES_ENABLED()) |
| hashtable_fragment_free(dcontext, &pt->trace); |
| hashtable_fragment_free(dcontext, &pt->bb); |
| hashtable_fragment_free(dcontext, &pt->future); |
| |
| SELF_PROTECT_CACHE(dcontext, NULL, READONLY); |
| |
| #else |
| /* Case 10807: Clients need to be informed of fragment deletions |
| * so we'll reset the relevant hash tables for CI release builds. |
| */ |
| # ifdef CLIENT_INTERFACE |
| if (PRIVATE_TRACES_ENABLED()) |
| hashtable_fragment_reset(dcontext, &pt->trace); |
| hashtable_fragment_reset(dcontext, &pt->bb); |
| # endif |
| |
| #endif /* !DEBUG */ |
| } |
| |
| /* atexit cleanup */ |
| void |
| fragment_thread_exit(dcontext_t *dcontext) |
| { |
| per_thread_t *pt = (per_thread_t *) dcontext->fragment_field; |
| |
| /* case 7966: don't initialize at all for hotp_only & thin_client */ |
| if (RUNNING_WITHOUT_CODE_CACHE()) |
| return; |
| |
| #if defined(INTERNAL) || defined(CLIENT_INTERFACE) |
| if (TRACEDUMP_ENABLED() && PRIVATE_TRACES_ENABLED()) { |
| /* write out all traces prior to deleting any, so links print nicely */ |
| uint i; |
| fragment_t *f; |
| for (i = 0; i < pt->trace.capacity; i++) { |
| f = pt->trace.table[i]; |
| if (!REAL_FRAGMENT(f)) |
| continue; |
| if (SHOULD_OUTPUT_FRAGMENT(f->flags)) |
| output_trace(dcontext, pt, f, -1); |
| } |
| exit_trace_file(pt); |
| } |
| #endif |
| |
| fragment_thread_reset_free(dcontext); |
| |
| /* events are global */ |
| destroy_event(pt->waiting_for_unlink); |
| destroy_event(pt->finished_with_unlink); |
| destroy_event(pt->finished_all_unlink); |
| DELETE_LOCK(pt->linking_lock); |
| |
| #if defined(CLIENT_INTERFACE) && defined(CLIENT_SIDELINE) |
| DELETE_LOCK(pt->fragment_delete_mutex); |
| #endif |
| |
| global_heap_free(pt, sizeof(per_thread_t) HEAPACCT(ACCT_OTHER)); |
| dcontext->fragment_field = NULL; |
| } |
| |
| #ifdef UNIX |
| void |
| fragment_fork_init(dcontext_t *dcontext) |
| { |
| /* FIXME: what about global file? */ |
| # if defined(INTERNAL) || defined(CLIENT_INTERFACE) |
| per_thread_t *pt = (per_thread_t *) dcontext->fragment_field; |
| if (TRACEDUMP_ENABLED() && PRIVATE_TRACES_ENABLED()) { |
| /* new log dir has already been created, so just open a new log file */ |
| pt->tracefile = open_log_file("traces", NULL, 0); |
| ASSERT(pt->tracefile != INVALID_FILE); |
| init_trace_file(pt); |
| } |
| # endif |
| } |
| #endif |
| |
| /* fragment_t heap layout looks like this: |
| * |
| * fragment_t/trace_t |
| * translation_info_t*, if necessary |
| * array composed of different sizes of linkstub_t subclasses: |
| * direct_linkstub_t |
| * cbr_fallthrough_linkstub_t |
| * indirect_linkstub_t |
| * post_linkstub_t, if necessary |
| */ |
| static uint |
| fragment_heap_size(uint flags, int direct_exits, int indirect_exits) |
| { |
| uint total_sz; |
| ASSERT((direct_exits + indirect_exits > 0) || TEST(FRAG_COARSE_GRAIN, flags)); |
| total_sz = FRAGMENT_STRUCT_SIZE(flags) + |
| linkstubs_heap_size(flags, direct_exits, indirect_exits); |
| /* we rely on a small heap size for our ushort offset at the end */ |
| ASSERT(total_sz <= USHRT_MAX); |
| return total_sz; |
| } |
| |
| /* Allocates memory for a fragment_t and linkstubs and initializes them, but |
| * does not do any fcache-related initialization. |
| */ |
| static fragment_t * |
| fragment_create_heap(dcontext_t *dcontext, |
| int direct_exits, int indirect_exits, uint flags) |
| { |
| dcontext_t *alloc_dc = FRAGMENT_ALLOC_DC(dcontext, flags); |
| uint heapsz = fragment_heap_size(flags, direct_exits, indirect_exits); |
| /* linkstubs are in an array immediately after the fragment_t/trace_t struct */ |
| fragment_t *f = (fragment_t *) |
| nonpersistent_heap_alloc(alloc_dc, heapsz |
| HEAPACCT(TEST(FRAG_IS_TRACE, flags) ? |
| ACCT_TRACE : ACCT_FRAGMENT)); |
| LOG(THREAD, LOG_FRAGMENT, 5, |
| "fragment heap size for flags 0x%08x, exits %d %d, is %d => "PFX"\n", |
| flags, direct_exits, indirect_exits, heapsz, f); |
| |
| return f; |
| } |
| |
| static void |
| fragment_init_heap(fragment_t *f, app_pc tag, int direct_exits, int indirect_exits, |
| uint flags) |
| { |
| ASSERT(f != NULL); |
| f->flags = flags; /* MUST set before calling fcache_add_fragment or |
| * FRAGMENT_EXIT_STUBS */ |
| f->tag = tag; |
| /* Let fragment_create() fill in; other users are building fake fragments */ |
| DODEBUG({ f->id = -1; }); |
| f->next_vmarea = NULL; /* must be set by caller */ |
| f->prev_vmarea = NULL; /* must be set by caller */ |
| f->also.also_vmarea = NULL; /* must be set by caller */ |
| |
| linkstubs_init(FRAGMENT_EXIT_STUBS(f), direct_exits, indirect_exits, f); |
| |
| /* initialize non-ibt entry to top of fragment (caller responsible for |
| * setting up prefix) |
| */ |
| f->prefix_size = 0; |
| #ifdef FRAGMENT_SIZES_STUDY |
| record_fragment_size(f->size, (flags & FRAG_IS_TRACE) != 0); |
| #endif |
| |
| f->in_xlate.incoming_stubs = NULL; |
| #ifdef CUSTOM_TRACES_RET_REMOVAL |
| f->num_calls = 0; |
| f->num_rets = 0; |
| #endif |
| |
| /* trace-only fields */ |
| if (TEST(FRAG_IS_TRACE, flags)) { |
| trace_only_t *t = TRACE_FIELDS(f); |
| t->bbs = NULL; |
| /* real num_bbs won't be set until after the trace is emitted, |
| * but we need a non-zero value for linkstub_fragment() |
| */ |
| t->num_bbs = 1; |
| #ifdef PROFILE_RDTSC |
| t->count = 0UL; |
| t->total_time = (uint64) 0; |
| #endif |
| #ifdef SIDELINE_COUNT_STUDY |
| t->count_old_pre = (linkcount_type_t) 0; |
| t->count_old_post = (linkcount_type_t) 0; |
| #endif |
| } |
| } |
| |
| /* Create a new fragment_t with empty prefix and return it. |
| * The fragment_t is allocated on the global or local heap, depending on the flags, |
| * unless FRAG_COARSE_GRAIN is set, in which case the fragment_t is a unique |
| * temporary struct that is NOT heap allocated and is only safe to use |
| * so long as the bb_building_lock is held! |
| */ |
| fragment_t * |
| fragment_create(dcontext_t *dcontext, app_pc tag, int body_size, |
| int direct_exits, int indirect_exits, int exits_size, uint flags) |
| { |
| fragment_t *f; |
| DEBUG_DECLARE(stats_int_t next_id;) |
| DOSTATS({ |
| /* should watch this stat and if it gets too high need to re-do |
| * who needs the post-linkstub offset |
| */ |
| if (linkstub_frag_offs_at_end(flags, direct_exits, indirect_exits)) |
| STATS_INC(num_fragment_post_linkstub); |
| }); |
| |
| /* ensure no races during a reset */ |
| ASSERT(!dynamo_resetting); |
| |
| if (TEST(FRAG_COARSE_GRAIN, flags)) { |
| ASSERT(DYNAMO_OPTION(coarse_units)); |
| ASSERT_OWN_MUTEX(USE_BB_BUILDING_LOCK(), &bb_building_lock); |
| ASSERT(!TEST(FRAG_IS_TRACE, flags)); |
| ASSERT(TEST(FRAG_SHARED, flags)); |
| ASSERT(fragment_prefix_size(flags) == 0); |
| ASSERT((direct_exits == 0 && indirect_exits == 1) || |
| (indirect_exits == 0 && (direct_exits == 1 || direct_exits == 2))); |
| /* FIXME: eliminate this temp fragment and linkstubs and |
| * have custom emit and link code that does not require such data |
| * structures? It would certainly be faster code. |
| * But would still want to record each exit's target in a convenient |
| * data structure, for later linking, unless we try to link in |
| * the same pass in which we emit indirect stubs. |
| * We could also use fragment_create() and free the resulting struct |
| * somewhere and switch to a wrapper at that point. |
| */ |
| memset(&coarse_emit_fragment, 0, sizeof(coarse_emit_fragment)); |
| f = (fragment_t *) &coarse_emit_fragment; |
| /* We do not mark as FRAG_FAKE since this is pretty much a real |
| * fragment_t, and we do want to walk its linkstub_t structs, which |
| * are present. |
| */ |
| } else { |
| f = fragment_create_heap(dcontext, direct_exits, indirect_exits, flags); |
| } |
| |
| fragment_init_heap(f, tag, direct_exits, indirect_exits, flags); |
| |
| /* To make debugging easier we assign coarse-grain ids in the same namespace |
| * as fine-grain fragments, though we won't remember them at all. |
| */ |
| STATS_INC_ASSIGN(num_fragments, next_id); |
| IF_X64(ASSERT_TRUNCATE(f->id, int, next_id)); |
| DOSTATS({ f->id = (int) next_id; }); |
| DO_GLOBAL_STATS({ |
| if (!TEST(FRAG_IS_TRACE, f->flags)) { |
| RSTATS_INC(num_bbs); |
| IF_X64(if (FRAG_IS_32(f->flags)) STATS_INC(num_32bit_bbs);) |
| } |
| }); |
| DOSTATS({ |
| /* avoid double-counting for adaptive working set */ |
| if (!fragment_lookup_deleted(dcontext, tag) && !TEST(FRAG_COARSE_GRAIN, flags)) |
| STATS_INC(num_unique_fragments); |
| }); |
| /* FIXME: make fragment count a release-build stat so we can do this in |
| * release builds |
| */ |
| DOSTATS({ |
| if (stats != NULL && |
| (uint) GLOBAL_STAT(num_fragments) == INTERNAL_OPTION(reset_at_fragment_count)) { |
| schedule_reset(RESET_ALL); |
| } |
| }); |
| |
| /* size is a ushort |
| * our offsets are ushorts as well: they assume body_size is small enough, not size |
| */ |
| #ifdef CLIENT_INTERFACE |
| if (body_size + exits_size + fragment_prefix_size(flags) > MAX_FRAGMENT_SIZE) { |
| FATAL_USAGE_ERROR(INSTRUMENTATION_TOO_LARGE, 2, |
| get_application_name(), get_application_pid()); |
| } |
| #endif |
| ASSERT(body_size + exits_size + fragment_prefix_size(flags) <= MAX_FRAGMENT_SIZE); |
| /* currently MAX_FRAGMENT_SIZE is USHRT_MAX, but future proofing */ |
| ASSERT_TRUNCATE(f->size, ushort, |
| (body_size + exits_size + fragment_prefix_size(flags))); |
| f->size = (ushort) (body_size + exits_size + fragment_prefix_size(flags)); |
| |
| /* fcache_add will fill in start_pc, next_fcache, |
| * prev_fcache, and fcache_extra |
| */ |
| fcache_add_fragment(dcontext, f); |
| |
| /* after fcache_add_fragment so we can call get_fragment_coarse_info */ |
| DOSTATS({ |
| if (TEST(FRAG_SHARED, flags)) { |
| STATS_INC(num_shared_fragments); |
| if (TEST(FRAG_IS_TRACE, flags)) |
| STATS_INC(num_shared_traces); |
| else if (TEST(FRAG_COARSE_GRAIN, flags)) { |
| coarse_info_t *info = get_fragment_coarse_info(f); |
| if (get_executable_area_coarse_info(f->tag) != info) |
| STATS_INC(num_coarse_secondary); |
| STATS_INC(num_coarse_fragments); |
| } else |
| STATS_INC(num_shared_bbs); |
| } else { |
| STATS_INC(num_private_fragments); |
| if (TEST(FRAG_IS_TRACE, flags)) |
| STATS_INC(num_private_traces); |
| else |
| STATS_INC(num_private_bbs); |
| } |
| }); |
| |
| /* wait until initialized fragment completely before dumping any stats */ |
| DOLOG(1, LOG_FRAGMENT|LOG_VMAREAS, { |
| if (INTERNAL_OPTION(global_stats_interval) && |
| (f->id % INTERNAL_OPTION(global_stats_interval) == 0)) { |
| LOG(GLOBAL, LOG_FRAGMENT, 1, "Created %d fragments\n", f->id); |
| dump_global_stats(false); |
| } |
| if (INTERNAL_OPTION(thread_stats_interval) && |
| INTERNAL_OPTION(thread_stats)) { |
| /* FIXME: why do we need a new dcontext? */ |
| dcontext_t *dcontext = get_thread_private_dcontext(); |
| if (THREAD_STATS_ON(dcontext) && |
| THREAD_STAT(dcontext, num_fragments) % INTERNAL_OPTION(thread_stats_interval) == 0) { |
| dump_thread_stats(dcontext, false); |
| } |
| } |
| }); |
| |
| #ifdef WINDOWS |
| DOLOG(1, LOG_FRAGMENT|LOG_VMAREAS, { |
| if (f->id % 50000 == 0) { |
| LOG(GLOBAL, LOG_VMAREAS, 1, |
| "50K fragment check point: here are the loaded modules:\n"); |
| print_modules(GLOBAL, DUMP_NOT_XML); |
| LOG(GLOBAL, LOG_VMAREAS, 1, |
| "50K fragment check point: here are the executable areas:\n"); |
| print_executable_areas(GLOBAL); |
| } |
| }); |
| #endif |
| |
| return f; |
| } |
| |
| /* Creates a new fragment_t+linkstubs from the passed-in fragment and |
| * fills in linkstub_t and fragment_t fields, copying the fcache-related fields |
| * from the passed-in fragment (so be careful how the fields are used). |
| * Meant to be used to create a full fragment from a coarse-grain fragment. |
| * Caller is responsible for freeing via fragment_free() w/ the same dcontext |
| * passed in here. |
| */ |
| fragment_t * |
| fragment_recreate_with_linkstubs(dcontext_t *dcontext, fragment_t *f_src) |
| { |
| uint num_dir, num_indir; |
| uint size; |
| fragment_t *f_tgt; |
| instrlist_t *ilist; |
| linkstub_t *l; |
| cache_pc body_end_pc; |
| /* Not FAKE since has linkstubs, but still fake in a sense since no fcache |
| * slot -- need to mark that? |
| */ |
| uint flags = (f_src->flags & ~FRAG_FAKE); |
| ASSERT_CURIOSITY(TEST(FRAG_COARSE_GRAIN, f_src->flags)); /* only use so far */ |
| /* FIXME case 9325: build from tag here? Need to exactly re-mangle + re-instrument. |
| * We use _exact to get any elided final jmp not counted in size |
| */ |
| ilist = decode_fragment_exact(dcontext, f_src, NULL, NULL, f_src->flags, |
| &num_dir, &num_indir); |
| f_tgt = fragment_create_heap(dcontext, num_dir, num_indir, flags); |
| fragment_init_heap(f_tgt, f_src->tag, num_dir, num_indir, flags); |
| |
| f_tgt->start_pc = f_src->start_pc; |
| /* Can't call this until we have start_pc set */ |
| body_end_pc = set_linkstub_fields(dcontext, f_tgt, ilist, num_dir, num_indir, |
| false/*do not emit*/); |
| /* Calculate total size */ |
| IF_X64(ASSERT_TRUNCATE(size, uint, (body_end_pc - f_tgt->start_pc))); |
| size = (uint) (body_end_pc - f_tgt->start_pc); |
| for (l = FRAGMENT_EXIT_STUBS(f_tgt); l != NULL; l = LINKSTUB_NEXT_EXIT(l)) { |
| if (!EXIT_HAS_LOCAL_STUB(l->flags, f_tgt->flags)) |
| continue; /* it's kept elsewhere */ |
| size += linkstub_size(dcontext, f_tgt, l); |
| #ifdef CUSTOM_EXIT_STUBS |
| size += l->fixed_stub_offset; |
| #endif |
| } |
| ASSERT_TRUNCATE(f_tgt->size, ushort, size); |
| f_tgt->size = (ushort) size; |
| ASSERT(TEST(FRAG_FAKE, f_src->flags) || size == f_src->size); |
| ASSERT_TRUNCATE(f_tgt->prefix_size, byte, fragment_prefix_size(f_src->flags)); |
| f_tgt->prefix_size = (byte) fragment_prefix_size(f_src->flags); |
| ASSERT(TEST(FRAG_FAKE, f_src->flags) || f_src->prefix_size == f_tgt->prefix_size); |
| f_tgt->fcache_extra = f_src->fcache_extra; |
| |
| instrlist_clear_and_destroy(dcontext, ilist); |
| |
| return f_tgt; |
| } |
| |
| /* Frees the storage associated with f. |
| * Callers should use fragment_delete() instead of this routine, unless they |
| * obtained their fragment_t from fragment_recreate_with_linkstubs(). |
| */ |
| void |
| fragment_free(dcontext_t *dcontext, fragment_t *f) |
| { |
| dcontext_t *alloc_dc = FRAGMENT_ALLOC_DC(dcontext, f->flags); |
| uint heapsz; |
| int direct_exits = 0; |
| int indirect_exits = 0; |
| linkstub_t *l = FRAGMENT_EXIT_STUBS(f); |
| for (; l != NULL; l = LINKSTUB_NEXT_EXIT(l)) { |
| if (LINKSTUB_DIRECT(l->flags)) |
| direct_exits++; |
| else { |
| ASSERT(LINKSTUB_INDIRECT(l->flags)); |
| indirect_exits++; |
| } |
| } |
| heapsz = fragment_heap_size(f->flags, direct_exits, indirect_exits); |
| |
| STATS_INC(num_fragments_deleted); |
| |
| if (HAS_STORED_TRANSLATION_INFO(f)) { |
| ASSERT(FRAGMENT_TRANSLATION_INFO(f) != NULL); |
| translation_info_free(dcontext, FRAGMENT_TRANSLATION_INFO(f)); |
| } else |
| ASSERT(FRAGMENT_TRANSLATION_INFO(f) == NULL); |
| |
| /* N.B.: monitor_remove_fragment() was called in fragment_delete, |
| * which is assumed to have been called prior to fragment_free |
| */ |
| |
| linkstub_free_exitstubs(dcontext, f); |
| |
| if ((f->flags & FRAG_IS_TRACE) != 0) { |
| trace_only_t *t = TRACE_FIELDS(f); |
| if (t->bbs != NULL) { |
| nonpersistent_heap_free(alloc_dc, t->bbs, t->num_bbs*sizeof(trace_bb_info_t) |
| HEAPACCT(ACCT_TRACE)); |
| } |
| nonpersistent_heap_free(alloc_dc, f, heapsz HEAPACCT(ACCT_TRACE)); |
| } |
| else { |
| nonpersistent_heap_free(alloc_dc, f, heapsz HEAPACCT(ACCT_FRAGMENT)); |
| } |
| } |
| |
| /* Returns the end of the fragment body + any local stubs (excluding selfmod copy) */ |
| cache_pc |
| fragment_stubs_end_pc(fragment_t *f) |
| { |
| if (TEST(FRAG_SELFMOD_SANDBOXED, f->flags)) |
| return FRAGMENT_SELFMOD_COPY_PC(f); |
| else |
| return f->start_pc + f->size; |
| } |
| |
| /* Returns the end of the fragment body (excluding exit stubs and selfmod copy) */ |
| cache_pc |
| fragment_body_end_pc(dcontext_t *dcontext, fragment_t *f) |
| { |
| linkstub_t *l; |
| for (l = FRAGMENT_EXIT_STUBS(f); l; l = LINKSTUB_NEXT_EXIT(l)) { |
| if (EXIT_HAS_LOCAL_STUB(l->flags, f->flags)) { |
| return EXIT_STUB_PC(dcontext, f, l); |
| } |
| } |
| /* must be no stubs after fragment body */ |
| return fragment_stubs_end_pc(f); |
| } |
| |
| #ifdef PROFILE_LINKCOUNT |
| linkcount_type_t |
| get_total_linkcount(fragment_t *f) |
| { |
| /* return total count of exit counters */ |
| linkstub_t *l; |
| linkcount_type_t total = (linkcount_type_t) 0; |
| for (l = FRAGMENT_EXIT_STUBS(f); l != NULL; l = LINKSTUB_NEXT_EXIT(l)) { |
| total += l->count; |
| } |
| return total; |
| } |
| #endif |
| |
| #if defined(CLIENT_INTERFACE) && defined(CLIENT_SIDELINE) |
| /* synchronization routines needed for sideline threads so they don't get |
| * fragments they are referencing deleted */ |
| |
| void |
| fragment_get_fragment_delete_mutex(dcontext_t *dcontext) |
| { |
| if (dynamo_exited || dcontext == GLOBAL_DCONTEXT) |
| return; |
| mutex_lock(&(((per_thread_t *) dcontext->fragment_field)->fragment_delete_mutex)); |
| } |
| |
| void |
| fragment_release_fragment_delete_mutex(dcontext_t *dcontext) |
| { |
| if (dynamo_exited || dcontext == GLOBAL_DCONTEXT) |
| return; |
| mutex_unlock(&(((per_thread_t *) dcontext->fragment_field)->fragment_delete_mutex)); |
| } |
| #endif |
| |
| /* cleaner to have own flags since there are no negative versions |
| * of FRAG_SHARED and FRAG_IS_TRACE for distinguishing from "don't care" |
| */ |
| enum { |
| LOOKUP_TRACE = 0x001, |
| LOOKUP_BB = 0x002, |
| LOOKUP_PRIVATE = 0x004, |
| LOOKUP_SHARED = 0x008, |
| }; |
| |
| /* A lookup constrained by bb/trace and/or shared/private */ |
| static inline fragment_t * |
| fragment_lookup_type(dcontext_t *dcontext, app_pc tag, uint lookup_flags) |
| { |
| fragment_t *f; |
| |
| LOG(THREAD, LOG_MONITOR, 6, "fragment_lookup_type "PFX" 0x%x\n", |
| tag, lookup_flags); |
| if (dcontext != GLOBAL_DCONTEXT && TEST(LOOKUP_PRIVATE, lookup_flags)) { |
| /* FIXME: add a hashtablex.h wrapper that checks #entries and |
| * grabs lock for us for all lookups? |
| */ |
| /* look at private tables */ |
| per_thread_t *pt = (per_thread_t *) dcontext->fragment_field; |
| /* case 147: traces take precedence over bbs */ |
| if (PRIVATE_TRACES_ENABLED() && TEST(LOOKUP_TRACE, lookup_flags)) { |
| /* now try trace table */ |
| f = hashtable_fragment_lookup(dcontext, (ptr_uint_t)tag, &pt->trace); |
| if (f->tag != NULL) { |
| ASSERT(f->tag == tag); |
| DOLOG(2, LOG_FRAGMENT, { |
| if (DYNAMO_OPTION(shared_traces)) { |
| /* ensure private trace never shadows shared trace */ |
| fragment_t *sf; |
| read_lock(&shared_trace->rwlock); |
| sf = hashtable_fragment_lookup(dcontext, (ptr_uint_t)tag, |
| shared_trace); |
| read_unlock(&shared_trace->rwlock); |
| ASSERT(sf->tag == NULL); |
| } |
| }); |
| ASSERT(!TESTANY(FRAG_FAKE|FRAG_COARSE_GRAIN, f->flags)); |
| return f; |
| } |
| } |
| if (TEST(LOOKUP_BB, lookup_flags) && pt->bb.entries > 0) { |
| /* basic block table last */ |
| f = hashtable_fragment_lookup(dcontext, (ptr_uint_t)tag, &pt->bb); |
| if (f->tag != NULL) { |
| ASSERT(f->tag == tag); |
| DOLOG(2, LOG_FRAGMENT, { |
| if (DYNAMO_OPTION(shared_bbs)) { |
| /* ensure private bb never shadows shared bb, except for |
| * temp privates for trace building |
| */ |
| fragment_t *sf; |
| read_lock(&shared_bb->rwlock); |
| sf = hashtable_fragment_lookup(dcontext, (ptr_uint_t)tag, |
| shared_bb); |
| read_unlock(&shared_bb->rwlock); |
| ASSERT(sf->tag == NULL || TEST(FRAG_TEMP_PRIVATE, f->flags)); |
| } |
| }); |
| ASSERT(!TESTANY(FRAG_FAKE|FRAG_COARSE_GRAIN, f->flags)); |
| return f; |
| } |
| } |
| } |
| |
| if (TEST(LOOKUP_SHARED, lookup_flags)) { |
| if (DYNAMO_OPTION(shared_traces) && TEST(LOOKUP_TRACE, lookup_flags)) { |
| /* MUST look at shared trace table before shared bb table, |
| * since a shared trace can shadow a shared trace head |
| */ |
| read_lock(&shared_trace->rwlock); |
| f = hashtable_fragment_lookup(dcontext, (ptr_uint_t)tag, shared_trace); |
| read_unlock(&shared_trace->rwlock); |
| if (f->tag != NULL) { |
| ASSERT(f->tag == tag); |
| ASSERT(!TESTANY(FRAG_FAKE|FRAG_COARSE_GRAIN, f->flags)); |
| return f; |
| } |
| } |
| |
| if (DYNAMO_OPTION(shared_bbs) && TEST(LOOKUP_BB, lookup_flags)) { |
| /* MUST look at private trace table before shared bb table, |
| * since a private trace can shadow a shared trace head |
| */ |
| read_lock(&shared_bb->rwlock); |
| f = hashtable_fragment_lookup(dcontext, (ptr_uint_t)tag, shared_bb); |
| read_unlock(&shared_bb->rwlock); |
| if (f->tag != NULL) { |
| ASSERT(f->tag == tag); |
| ASSERT(!TESTANY(FRAG_FAKE|FRAG_COARSE_GRAIN, f->flags)); |
| return f; |
| } |
| } |
| } |
| |
| return NULL; |
| } |
| |
| /* lookup a fragment tag */ |
| fragment_t * |
| fragment_lookup(dcontext_t *dcontext, app_pc tag) |
| { |
| return fragment_lookup_type(dcontext, tag, |
| LOOKUP_TRACE|LOOKUP_BB|LOOKUP_PRIVATE|LOOKUP_SHARED); |
| } |
| |
| /* lookup a fragment tag, but only look in trace tables |
| * N.B.: because of shadowing this may not return what fragment_lookup() returns! |
| */ |
| fragment_t * |
| fragment_lookup_trace(dcontext_t *dcontext, app_pc tag) |
| { |
| return fragment_lookup_type(dcontext, tag, LOOKUP_TRACE|LOOKUP_PRIVATE|LOOKUP_SHARED); |
| } |
| |
| /* lookup a fragment tag, but only look in bb tables |
| * N.B.: because of shadowing this may not return what fragment_lookup() returns! |
| */ |
| fragment_t * |
| fragment_lookup_bb(dcontext_t *dcontext, app_pc tag) |
| { |
| return fragment_lookup_type(dcontext, tag, LOOKUP_BB|LOOKUP_PRIVATE|LOOKUP_SHARED); |
| } |
| |
| /* lookup a fragment tag, but only look in shared bb table |
| * N.B.: because of shadowing this may not return what fragment_lookup() returns! |
| */ |
| fragment_t * |
| fragment_lookup_shared_bb(dcontext_t *dcontext, app_pc tag) |
| { |
| return fragment_lookup_type(dcontext, tag, LOOKUP_BB|LOOKUP_SHARED); |
| } |
| |
| /* lookup a fragment tag, but only look in tables that are the same shared-ness |
| * as flags. |
| * N.B.: because of shadowing this may not return what fragment_lookup() returns! |
| */ |
| fragment_t * |
| fragment_lookup_same_sharing(dcontext_t *dcontext, app_pc tag, uint flags) |
| { |
| return fragment_lookup_type(dcontext, tag, LOOKUP_TRACE|LOOKUP_BB| |
| (TEST(FRAG_SHARED, flags) ? |
| LOOKUP_SHARED : LOOKUP_PRIVATE)); |
| } |
| |
| #ifdef DEBUG /*currently only used for debugging */ |
| static fragment_t * |
| hashtable_pclookup(dcontext_t *dcontext, fragment_table_t *table, cache_pc pc) |
| { |
| uint i; |
| fragment_t *f; |
| ASSERT_TABLE_SYNCHRONIZED(table, READWRITE); /* lookup requires read (or write) lock */ |
| for (i = 0; i < table->capacity; i++) { |
| f = table->table[i]; |
| if (!REAL_FRAGMENT(f)) |
| continue; |
| if (pc >= f->start_pc && pc < (f->start_pc + f->size)) { |
| return f; |
| } |
| } |
| return NULL; |
| } |
| |
| /* lookup a fragment pc in the fcache by walking all hashtables. |
| * we have more efficient methods (fcache_fragment_pclookup) so this is only |
| * used for debugging. |
| */ |
| fragment_t * |
| fragment_pclookup_by_htable(dcontext_t *dcontext, cache_pc pc, fragment_t *wrapper) |
| { |
| /* if every fragment is guaranteed to end in 1+ stubs (which |
| * is not true for DYNAMO_OPTION(separate_private_stubs) we can |
| * simply decode forward until we hit the stub and recover |
| * the linkstub_t* from there -- much more efficient than walking |
| * all the hashtables, plus nicely handles invisible & removed frags! |
| * FIXME: measure perf hit of pclookup, implement this decode strategy. |
| * also we can miss invisible or removed fragments (case 122) so we |
| * may want this regardless of performance -- see also FIXME below. |
| */ |
| fragment_t *f; |
| per_thread_t *pt = NULL; |
| if (dcontext != GLOBAL_DCONTEXT) { |
| pt = (per_thread_t *) dcontext->fragment_field; |
| /* look at private traces first */ |
| if (PRIVATE_TRACES_ENABLED()) { |
| f = hashtable_pclookup(dcontext, &pt->trace, pc); |
| if (f != NULL) |
| return f; |
| } |
| } |
| if (DYNAMO_OPTION(shared_traces)) { |
| /* then shared traces */ |
| read_lock(&shared_trace->rwlock); |
| f = hashtable_pclookup(dcontext, shared_trace, pc); |
| read_unlock(&shared_trace->rwlock); |
| if (f != NULL) |
| return f; |
| } |
| if (DYNAMO_OPTION(shared_bbs)) { |
| /* then shared basic blocks */ |
| read_lock(&shared_bb->rwlock); |
| f = hashtable_pclookup(dcontext, shared_bb, pc); |
| read_unlock(&shared_bb->rwlock); |
| if (f != NULL) |
| return f; |
| } |
| if (dcontext != GLOBAL_DCONTEXT) { |
| /* now private basic blocks */ |
| f = hashtable_pclookup(dcontext, &pt->bb, pc); |
| if (f != NULL) |
| return f; |
| } |
| if (DYNAMO_OPTION(coarse_units)) { |
| coarse_info_t *info = get_executable_area_coarse_info(pc); |
| while (info != NULL) { /* loop over primary and secondary unit */ |
| cache_pc body; |
| app_pc tag = fragment_coarse_pclookup(dcontext, info, pc, &body); |
| if (tag != NULL) { |
| ASSERT(wrapper != NULL); |
| fragment_coarse_wrapper(wrapper, tag, body); |
| return wrapper; |
| } |
| ASSERT(info->frozen || info->non_frozen == NULL); |
| info = info->non_frozen; |
| ASSERT(info == NULL || !info->frozen); |
| } |
| } |
| /* FIXME: shared fragment may have been removed from hashtable but |
| * still be in cache, and e.g. handle_modified_code still needs to know about it -- |
| * should walk deletion vector |
| */ |
| return NULL; |
| } |
| #endif /* DEBUG */ |
| |
| /* lookup a fragment pc in the fcache */ |
| fragment_t * |
| fragment_pclookup(dcontext_t *dcontext, cache_pc pc, fragment_t *wrapper) |
| { |
| /* Rather than walk every single hashtable, including the invisible table, |
| * and the pending-deletion list (case 3567), we find the fcache unit |
| * and walk it. |
| * An even more efficient alternative would be to decode backward, but |
| * that's not doable in general. |
| * |
| * If every fragment is guaranteed to end in 1+ stubs (which |
| * is not true for DYNAMO_OPTION(separate_{private,shared}_stubs) we can |
| * simply decode forward until we hit the stub and recover |
| * the linkstub_t* from there. |
| * Or we can decode until we hit a jmp and if it's to a linked fragment_t, |
| * search its incoming list. |
| * Stub decoding is complicated by CLIENT_INTERFACE custom stubs and by |
| * PROFILE_LINKCOUNT stub variations. |
| */ |
| return fcache_fragment_pclookup(dcontext, pc, wrapper); |
| } |
| |
| /* Performs a pclookup and if the result is a coarse-grain fragment, allocates |
| * a new fragment_t+linkstubs. |
| * Returns in alloc whether the returned fragment_t was allocated and needs to be |
| * freed by the caller via fragment_free(). |
| * If no result is found, alloc is set to false. |
| * FIXME: use FRAG_RECREATED flag to indicate allocated instead? |
| */ |
| fragment_t * |
| fragment_pclookup_with_linkstubs(dcontext_t *dcontext, cache_pc pc, |
| /*OUT*/bool *alloc) |
| { |
| fragment_t wrapper; |
| fragment_t *f = fragment_pclookup(dcontext, pc, &wrapper); |
| ASSERT(alloc != NULL); |
| if (f != NULL && TEST(FRAG_COARSE_GRAIN, f->flags)) { |
| ASSERT(f == &wrapper); |
| f = fragment_recreate_with_linkstubs(dcontext, f); |
| *alloc = true; |
| } else |
| *alloc = false; |
| return f; |
| } |
| |
| /* add f to the ftable */ |
| void |
| fragment_add(dcontext_t *dcontext, fragment_t *f) |
| { |
| per_thread_t *pt = (per_thread_t *) dcontext->fragment_field; |
| fragment_table_t *table = GET_FTABLE(pt, f->flags); |
| bool resized; |
| /* no future frags! */ |
| ASSERT(!TEST(FRAG_IS_FUTURE, f->flags)); |
| |
| DOCHECK(1, { |
| fragment_t *existing = fragment_lookup(dcontext, f->tag); |
| ASSERT(existing == NULL || |
| IF_CUSTOM_TRACES(/* we create and persist shadowed trace heads */ |
| (TEST(FRAG_IS_TRACE_HEAD, f->flags) || |
| TEST(FRAG_IS_TRACE_HEAD, existing->flags)) ||) |
| /* private trace or temp can shadow shared bb */ |
| (TESTANY(FRAG_IS_TRACE | FRAG_TEMP_PRIVATE, f->flags) && |
| TEST(FRAG_SHARED, f->flags) != TEST(FRAG_SHARED, existing->flags)) || |
| /* shared trace can shadow shared trace head, even with |
| * -remove_shared_trace_heads */ |
| (TESTALL(FRAG_IS_TRACE | FRAG_SHARED, f->flags) && |
| !TEST(FRAG_IS_TRACE, existing->flags) && |
| TESTALL(FRAG_SHARED | FRAG_IS_TRACE_HEAD, existing->flags))); |
| }); |
| |
| /* We'd like the shared fragment table synch to be independent of the |
| * bb building synch (which may become more fine-grained in the future), |
| * so an add needs to hold the write lock to prevent conflicts with |
| * other adds. |
| * We may be able to have a scheme where study() and remove() are writers |
| * but add() is a reader -- but that's confusing and prone to errors in |
| * the future. |
| * We assume that synchronizing addition of the same tag is done through |
| * other means -- we cannot grab this while performing the lookup |
| * w/o making our read locks check to see if we're the writer, |
| * which is a perf hit. Only the actual hashtable add is a "write". |
| */ |
| TABLE_RWLOCK(table, write, lock); |
| resized = fragment_add_to_hashtable(dcontext, f, table); |
| TABLE_RWLOCK(table, write, unlock); |
| |
| /* After resizing a table that is targeted by inlined IBL heads |
| * the current fragment will need to be repatched; but, we don't have |
| * to update the stubs when using per-type trace tables since the |
| * trace table itself is not targeted therefore resizing it doesn't matter. |
| */ |
| |
| #ifdef SHARING_STUDY |
| if (INTERNAL_OPTION(fragment_sharing_study)) { |
| if (TEST(FRAG_IS_TRACE, f->flags)) |
| add_shared_block(shared_traces, &shared_traces_lock, f); |
| else |
| add_shared_block(shared_blocks, &shared_blocks_lock, f); |
| } |
| #endif |
| } |
| |
| /* Many options, use macros in fragment.h for readability |
| * If output: |
| * dumps f to trace file |
| * If remove: |
| * removes f from ftable |
| * If unlink: |
| * if f is linked, unlinks f |
| * removes f from incoming link tables |
| * If fcache: |
| * deletes f from fcache unit |
| */ |
| void |
| fragment_delete(dcontext_t *dcontext, fragment_t *f, uint actions) |
| { |
| #if defined(CLIENT_INTERFACE) && defined(CLIENT_SIDELINE) |
| bool acquired_shared_vm_lock = false; |
| bool acquired_fragdel_lock = false; |
| #endif |
| LOG(THREAD, LOG_FRAGMENT, 3, |
| "fragment_delete: *"PFX" F%d("PFX")."PFX" %s 0x%x\n", |
| f, f->id, f->tag, f->start_pc, |
| TEST(FRAG_IS_TRACE, f->flags) ? "trace" : "bb", actions); |
| DOLOG(1, LOG_FRAGMENT, { |
| if ((f->flags & FRAG_CANNOT_DELETE) != 0) { |
| LOG(THREAD, LOG_FRAGMENT, 2, |
| "ERROR: trying to delete undeletable F%d("PFX") 0x%x\n", |
| f->id, f->tag, actions); |
| } |
| }); |
| ASSERT((f->flags & FRAG_CANNOT_DELETE) == 0); |
| ASSERT((f->flags & FRAG_IS_FUTURE) == 0); |
| |
| /* ensure the actual free of a shared fragment is done only |
| * after a multi-stage flush or a reset |
| */ |
| ASSERT(!TEST(FRAG_SHARED, f->flags) || TEST(FRAG_WAS_DELETED, f->flags) || |
| dynamo_exited || dynamo_resetting || is_self_allsynch_flushing()); |
| |
| #if defined(CLIENT_INTERFACE) && defined(CLIENT_SIDELINE) |
| /* need to protect ability to reference frag fields and fcache space */ |
| /* all other options are mostly notification */ |
| if (monitor_delete_would_abort_trace(dcontext, f) && DYNAMO_OPTION(shared_traces)) { |
| /* must acquire shared_vm_areas lock before fragment_delete_mutex (PR 596371) */ |
| acquired_shared_vm_lock = true; |
| acquire_recursive_lock(&change_linking_lock); |
| acquire_vm_areas_lock(dcontext, FRAG_SHARED); |
| } |
| /* XXX: I added the test for FRAG_WAS_DELETED for i#759: does sideline |
| * look at fragments after that is set? If so need to resolve rank order |
| * w/ shared_cache_lock. |
| */ |
| if (!TEST(FRAG_WAS_DELETED, f->flags) && |
| (!TEST(FRAGDEL_NO_HEAP, actions) || !TEST(FRAGDEL_NO_FCACHE, actions))) { |
| acquired_fragdel_lock = true; |
| fragment_get_fragment_delete_mutex(dcontext); |
| } |
| #endif |
| |
| #if defined(INTERNAL) || defined(CLIENT_INTERFACE) |
| if (!TEST(FRAGDEL_NO_OUTPUT, actions)) { |
| if (TEST(FRAGDEL_NEED_CHLINK_LOCK, actions) && TEST(FRAG_SHARED, f->flags)) |
| acquire_recursive_lock(&change_linking_lock); |
| else { |
| ASSERT(!TEST(FRAG_SHARED, f->flags) || |
| self_owns_recursive_lock(&change_linking_lock)); |
| } |
| fragment_output(dcontext, f); |
| if (TEST(FRAGDEL_NEED_CHLINK_LOCK, actions) && TEST(FRAG_SHARED, f->flags)) |
| release_recursive_lock(&change_linking_lock); |
| } |
| #endif |
| |
| if (!TEST(FRAGDEL_NO_MONITOR, actions)) |
| monitor_remove_fragment(dcontext, f); |
| |
| if (!TEST(FRAGDEL_NO_UNLINK, actions)) { |
| if (TEST(FRAGDEL_NEED_CHLINK_LOCK, actions) && TEST(FRAG_SHARED, f->flags)) |
| acquire_recursive_lock(&change_linking_lock); |
| else { |
| ASSERT(!TEST(FRAG_SHARED, f->flags) || |
| self_owns_recursive_lock(&change_linking_lock)); |
| } |
| if ((f->flags & FRAG_LINKED_INCOMING) != 0) |
| unlink_fragment_incoming(dcontext, f); |
| if ((f->flags & FRAG_LINKED_OUTGOING) != 0) |
| unlink_fragment_outgoing(dcontext, f); |
| incoming_remove_fragment(dcontext, f); |
| if (TEST(FRAGDEL_NEED_CHLINK_LOCK, actions) && TEST(FRAG_SHARED, f->flags)) |
| release_recursive_lock(&change_linking_lock); |
| } |
| |
| if (!TEST(FRAGDEL_NO_HTABLE, actions)) |
| fragment_remove(dcontext, f); |
| |
| if (!TEST(FRAGDEL_NO_VMAREA, actions)) |
| vm_area_remove_fragment(dcontext, f); |
| |
| if (!TEST(FRAGDEL_NO_FCACHE, actions)) { |
| fcache_remove_fragment(dcontext, f); |
| } |
| |
| #ifdef SIDELINE |
| if (dynamo_options.sideline) |
| sideline_fragment_delete(f); |
| #endif |
| #ifdef CLIENT_INTERFACE |
| if (dr_fragment_deleted_hook_exists() && |
| (!TEST(FRAGDEL_NO_HEAP, actions) || !TEST(FRAGDEL_NO_FCACHE, actions))) |
| instrument_fragment_deleted(dcontext, f->tag, f->flags); |
| #endif |
| #ifdef UNIX |
| if (INTERNAL_OPTION(profile_pcs)) |
| pcprofile_fragment_deleted(dcontext, f); |
| #endif |
| if (!TEST(FRAGDEL_NO_HEAP, actions)) { |
| fragment_free(dcontext, f); |
| } |
| #if defined(CLIENT_INTERFACE) && defined(CLIENT_SIDELINE) |
| if (acquired_fragdel_lock) |
| fragment_release_fragment_delete_mutex(dcontext); |
| if (acquired_shared_vm_lock) { |
| release_vm_areas_lock(dcontext, FRAG_SHARED); |
| release_recursive_lock(&change_linking_lock); |
| } |
| #endif |
| |
| |
| } |
| |
| /* Record translation info. Typically used for pending-delete fragments |
| * whose original app code cannot be trusted as it has been modified (case |
| * 3559). |
| * Caller is required to take care of synch (typically this is called |
| * during a flush or during fragment emit) |
| */ |
| void |
| fragment_record_translation_info(dcontext_t *dcontext, fragment_t *f, instrlist_t *ilist) |
| { |
| ASSERT(!NEED_SHARED_LOCK(f->flags) || |
| !USE_BB_BUILDING_LOCK() || |
| OWN_MUTEX(&bb_building_lock) || |
| OWN_MUTEX(&trace_building_lock) || |
| is_self_flushing()); |
| /* We require that either the FRAG_WAS_DELETED flag is set, to |
| * indicate there is allocated memory in the live field that needs |
| * to be freed, or that the FRAG_HAS_TRANSLATION_INFO field is |
| * set, indicating that there is a special appended field pointing |
| * to the translation info. |
| */ |
| if (TEST(FRAG_HAS_TRANSLATION_INFO, f->flags)) { |
| ASSERT(!TEST(FRAG_WAS_DELETED, f->flags)); |
| *(FRAGMENT_TRANSLATION_INFO_ADDR(f)) = |
| record_translation_info(dcontext, f, ilist); |
| ASSERT(FRAGMENT_TRANSLATION_INFO(f) != NULL); |
| STATS_INC(num_fragment_translation_stored); |
| } else if (TEST(FRAG_WAS_DELETED, f->flags)) { |
| ASSERT(f->in_xlate.incoming_stubs == NULL); |
| if (INTERNAL_OPTION(safe_translate_flushed)) { |
| f->in_xlate.translation_info = record_translation_info(dcontext, f, ilist); |
| ASSERT(f->in_xlate.translation_info != NULL); |
| ASSERT(FRAGMENT_TRANSLATION_INFO(f) == f->in_xlate.translation_info); |
| STATS_INC(num_fragment_translation_stored); |
| #ifdef INTERNAL |
| DODEBUG({ |
| if (INTERNAL_OPTION(stress_recreate_pc)) { |
| /* verify recreation */ |
| stress_test_recreate(dcontext, f, NULL); |
| } |
| }); |
| #endif |
| } else |
| f->in_xlate.translation_info = NULL; |
| } else |
| ASSERT_NOT_REACHED(); |
| } |
| |
| /* Removes the shared fragment f from all lookup tables in a safe |
| * manner that does not require a full flush synch. |
| * This routine can be called without synchronizing with other threads. |
| */ |
| void |
| fragment_remove_shared_no_flush(dcontext_t *dcontext, fragment_t *f) |
| { |
| DEBUG_DECLARE(bool shared_ibt_table_used = !TEST(FRAG_IS_TRACE, f->flags) ? |
| DYNAMO_OPTION(shared_bb_ibt_tables) : |
| DYNAMO_OPTION(shared_trace_ibt_tables);) |
| |
| ASSERT_NOT_IMPLEMENTED(!TEST(FRAG_COARSE_GRAIN, f->flags)); |
| |
| /* Strategy: ensure no races in updating table or links by grabbing the high-level |
| * locks that are used to synchronize additions to the table itself. |
| * Then, simply remove directly from DR-only tables, and safely from ib tables. |
| * FIXME: There are still risks that the fragment's link state may change |
| */ |
| LOG(THREAD, LOG_FRAGMENT, 3, "fragment_remove_shared_no_flush: F%d\n", f->id); |
| ASSERT(TEST(FRAG_SHARED, f->flags)); |
| if (TEST(FRAG_IS_TRACE, f->flags)) { |
| mutex_lock(&trace_building_lock); |
| } |
| /* grab bb building lock even for traces to further prevent link changes */ |
| mutex_lock(&bb_building_lock); |
| |
| if (TEST(FRAG_WAS_DELETED, f->flags)) { |
| /* since caller can't grab locks, we can have a race where someone |
| * else deletes first -- in that case nothing to do |
| */ |
| STATS_INC(shared_delete_noflush_race); |
| mutex_unlock(&bb_building_lock); |
| if (TEST(FRAG_IS_TRACE, f->flags)) |
| mutex_unlock(&trace_building_lock); |
| return; |
| } |
| |
| /* FIXME: try to share code w/ fragment_unlink_for_deletion() */ |
| |
| /* Make link changes atomic. We also want vm_area_remove_fragment and |
| * marking as deleted to be atomic so we grab vm_areas lock up front. |
| */ |
| acquire_recursive_lock(&change_linking_lock); |
| acquire_vm_areas_lock(dcontext, f->flags); |
| |
| /* FIXME: share all this code w/ vm_area_unlink_fragments() |
| * The work there is just different enough to make that hard, though. |
| */ |
| if (TEST(FRAG_LINKED_OUTGOING, f->flags)) |
| unlink_fragment_outgoing(GLOBAL_DCONTEXT, f); |
| if (TEST(FRAG_LINKED_INCOMING, f->flags)) |
| unlink_fragment_incoming(GLOBAL_DCONTEXT, f); |
| incoming_remove_fragment(GLOBAL_DCONTEXT, f); |
| |
| /* remove from ib lookup tables in a safe manner. this removes the |
| * frag only from this thread's tables OR from shared tables. |
| */ |
| fragment_prepare_for_removal(GLOBAL_DCONTEXT, f); |
| /* fragment_remove ignores the ibl tables for shared fragments */ |
| fragment_remove(GLOBAL_DCONTEXT, f); |
| /* FIXME: we don't currently remove from thread-private ibl tables as that |
| * requires walking all of the threads. */ |
| ASSERT_NOT_IMPLEMENTED(!IS_IBL_TARGET(f->flags) || shared_ibt_table_used); |
| |
| vm_area_remove_fragment(dcontext, f); |
| /* case 8419: make marking as deleted atomic w/ fragment_t.also_vmarea field |
| * invalidation, so that users of vm_area_add_to_list() can rely on this |
| * flag to determine validity |
| */ |
| f->flags |= FRAG_WAS_DELETED; |
| |
| release_vm_areas_lock(dcontext, f->flags); |
| release_recursive_lock(&change_linking_lock); |
| |
| /* if a flush occurs, this fragment will be ignored -- so we must store |
| * translation info now, just in case |
| */ |
| if (!TEST(FRAG_HAS_TRANSLATION_INFO, f->flags)) |
| fragment_record_translation_info(dcontext, f, NULL); |
| |
| /* try to catch any potential races */ |
| ASSERT(!TEST(FRAG_LINKED_OUTGOING, f->flags)); |
| ASSERT(!TEST(FRAG_LINKED_INCOMING, f->flags)); |
| |
| mutex_unlock(&bb_building_lock); |
| if (TEST(FRAG_IS_TRACE, f->flags)) { |
| mutex_unlock(&trace_building_lock); |
| } |
| |
| /* no locks can be held when calling this, but f is already unreachable, |
| * so can do this outside of locks |
| */ |
| add_to_lazy_deletion_list(dcontext, f); |
| } |
| |
| /* Prepares a fragment for delayed deletion by unlinking it. |
| * Caller is responsible for calling vm_area_remove_fragment(). |
| * Caller must hold the change_linking_lock if f is shared. |
| */ |
| void |
| fragment_unlink_for_deletion(dcontext_t *dcontext, fragment_t *f) |
| { |
| ASSERT(!TEST(FRAG_SHARED, f->flags) || |
| self_owns_recursive_lock(&change_linking_lock)); |
| /* this is not an error since fcache unit flushing puts lazily-deleted |
| * fragments onto its list to ensure they are in the same pending |
| * delete entry as the normal fragments -- so this routine becomes |
| * a nop for them |
| */ |
| if (TEST(FRAG_WAS_DELETED, f->flags)) { |
| LOG(THREAD, LOG_FRAGMENT|LOG_VMAREAS, 5, |
| "NOT unlinking F%d("PFX") for deletion\n", f->id, f->start_pc); |
| STATS_INC(deleted_frags_re_deleted); |
| return; |
| } |
| LOG(THREAD, LOG_FRAGMENT|LOG_VMAREAS, 5, |
| "unlinking F%d("PFX") for deletion\n", f->id, f->start_pc); |
| #if defined(INTERNAL) || defined(CLIENT_INTERFACE) |
| /* we output now to avoid problems reading component blocks |
| * of traces after source modules are unloaded |
| */ |
| fragment_output(dcontext, f); |
| #endif |
| if (TEST(FRAG_LINKED_OUTGOING, f->flags)) |
| unlink_fragment_outgoing(dcontext, f); |
| if (TEST(FRAG_LINKED_INCOMING, f->flags)) |
| unlink_fragment_incoming(dcontext, f); |
| /* need to remove outgoings from others' incoming and |
| * redirect others' outgoing to a future. former must be |
| * done before we remove from hashtable, and latter must be |
| * done now to avoid other fragments jumping into stale code, |
| * so we do it here and not when we do the real fragment_delete(). |
| * we don't need to do this for private fragments, but we do anyway |
| * so that we can use the fragment_t.incoming_stubs field as a union. |
| */ |
| incoming_remove_fragment(dcontext, f); |
| |
| if (TEST(FRAG_SHARED, f->flags)) { |
| /* we shouldn't need to worry about someone else changing the |
| * link status, since nobody else is allowed to be in DR now, |
| * and afterward they all must invalidate any ptrs they hold |
| * to flushed fragments, and flushed fragments are not |
| * reachable via hashtable or incoming lists! |
| */ |
| /* ASSUMPTION: monitor_remove_fragment does NOT need to |
| * be called for all threads, since private trace head |
| * ctrs are cleared lazily (only relevant here for |
| * -shared_traces) and invalidating last_{exit,fragment} |
| * is done by the trace overlap and abort in the main |
| * flush loop. |
| */ |
| } |
| |
| /* need to remove from htable |
| * we used to only do fragment_prepare_for_removal() (xref case 1808) |
| * for private fragments, but for case 3559 we want to free up the |
| * incoming field at unlink time, and we must do all 3 of unlink, |
| * vmarea, and htable freeing at once. |
| */ |
| fragment_remove(dcontext, f); |
| |
| /* let recreate_fragment_ilist() know that this fragment is |
| * pending deletion and might no longer match the app's state. |
| * for shared fragments, also lets people know f is not in a normal |
| * vmarea anymore (though actually moving f is up to the caller). |
| * additionally the flag indicates that translation info was allocated |
| * for this fragment. |
| */ |
| f->flags |= FRAG_WAS_DELETED; |
| |
| /* the original app code cannot be used to recreate state, so we must |
| * store translation info now |
| */ |
| if (!TEST(FRAG_HAS_TRANSLATION_INFO, f->flags)) |
| fragment_record_translation_info(dcontext, f, NULL); |
| |
| STATS_INC(fragments_unlinked_for_deletion); |
| } |
| |
| /* When shared IBT tables are used, update thread-private state |
| * to reflect the current parameter values -- hash mask, table address -- |
| * for the shared ftable. |
| */ |
| static bool |
| update_private_ibt_table_ptrs(dcontext_t *dcontext, ibl_table_t *ftable |
| _IF_DEBUG(fragment_entry_t **orig_table)) |
| { |
| bool table_change = false; |
| |
| if (TEST(FRAG_TABLE_SHARED, ftable->table_flags)) { |
| |
| per_thread_t *pt = (per_thread_t *) dcontext->fragment_field; |
| |
| if (TEST(FRAG_TABLE_TRACE, ftable->table_flags) && |
| ftable->table != pt->trace_ibt[ftable->branch_type].table) { |
| DODEBUG({ |
| if (orig_table != NULL) |
| *orig_table = |
| pt->trace_ibt[ftable->branch_type].table; |
| }); |
| table_change = true; |
| } |
| else if (DYNAMO_OPTION(bb_ibl_targets) && |
| !TEST(FRAG_TABLE_TRACE, ftable->table_flags) && |
| ftable->table != pt->bb_ibt[ftable->branch_type].table) { |
| DODEBUG({ |
| if (orig_table != NULL) |
| *orig_table = |
| pt->bb_ibt[ftable->branch_type].table; |
| }); |
| table_change = true; |
| } |
| if (table_change) { |
| update_private_ptr_to_shared_ibt_table(dcontext, ftable->branch_type, |
| TEST(FRAG_TABLE_TRACE, |
| ftable->table_flags), |
| true, /* adjust old |
| * ref-count */ |
| true /* lock */); |
| DODEBUG({ |
| if (orig_table != NULL) |
| ASSERT(ftable->table != *orig_table); |
| }); |
| } |
| #ifdef DEBUG |
| else if (orig_table != NULL) |
| *orig_table = NULL; |
| #endif |
| } |
| return table_change; |
| } |
| |
| /* Update the thread-private ptrs for the dcontext to point to the |
| * currently "live" shared IBT table for branch_type. |
| * When adjust_ref_count==true, adjust the ref-count for the old table |
| * that the dcontext currently points to. |
| * When lock_table==true, lock the shared table prior to manipulating |
| * it. If this is false, the caller must have locked the table already. |
| * NOTE: If adjust_ref_count=true, lock_table should be true also and |
| * the caller should NOT hold the table lock, since the underlying |
| * routines that manipulate the ref count lock the table. |
| */ |
| static inline void |
| update_private_ptr_to_shared_ibt_table(dcontext_t *dcontext, |
| ibl_branch_type_t branch_type, bool trace, |
| bool adjust_old_ref_count, bool lock_table) |
| { |
| per_thread_t *pt = (per_thread_t *) dcontext->fragment_field; |
| ibl_table_t *sh_table_ptr = trace ? &shared_pt->trace_ibt[branch_type] : |
| &shared_pt->bb_ibt[branch_type]; |
| ibl_table_t *pvt_table_ptr = trace ? &pt->trace_ibt[branch_type] : |
| &pt->bb_ibt[branch_type]; |
| |
| /* Point to the new table. The shared table must be locked prior to |
| * accessing any of its fields. */ |
| if (lock_table) |
| TABLE_RWLOCK(sh_table_ptr, write, lock); |
| ASSERT_OWN_WRITE_LOCK(true, &sh_table_ptr->rwlock); |
| |
| /* We can get here multiple times due to callers being racy */ |
| if (pvt_table_ptr->table == sh_table_ptr->table) { |
| SYSLOG_INTERNAL_WARNING_ONCE("racy private ptr to shared table update"); |
| if (lock_table) |
| TABLE_RWLOCK(sh_table_ptr, write, unlock); |
| return; |
| } |
| |
| /* Decrement the ref-count for any old table that is pointed to. */ |
| if (adjust_old_ref_count) { |
| dec_table_ref_count(dcontext, pvt_table_ptr, false/*can't be live*/); |
| } |
| |
| /* We must hold at least the read lock when writing, else we could grab |
| * an inconsistent mask/lookuptable pair if another thread is in the middle |
| * of resizing the table (case 10405). |
| */ |
| /* Only data for one set of tables is stored in TLS -- for the trace |
| * tables in the default config OR the BB tables in shared BBs |
| * only mode. |
| */ |
| if ((trace || SHARED_BB_ONLY_IB_TARGETS()) && |
| DYNAMO_OPTION(ibl_table_in_tls)) |
| update_lookuptable_tls(dcontext, sh_table_ptr); |
| |
| ASSERT(pvt_table_ptr->table != sh_table_ptr->table); |
| pvt_table_ptr->table = sh_table_ptr->table; |
| pvt_table_ptr->hash_mask = sh_table_ptr->hash_mask; |
| /* We copy the unaligned value over also because it's used for matching |
| * in the dead table list. */ |
| pvt_table_ptr->table_unaligned = sh_table_ptr->table_unaligned; |
| pvt_table_ptr->table_flags = sh_table_ptr->table_flags; |
| sh_table_ptr->ref_count++; |
| ASSERT(sh_table_ptr->ref_count > 0); |
| |
| DODEBUG({ |
| LOG(THREAD, LOG_FRAGMENT|LOG_STATS, 2, |
| "update_table_ptrs %s-%s table: addr "PFX", mask "PIFX"\n", |
| trace ? "trace" : "BB", sh_table_ptr->name, |
| sh_table_ptr->table, sh_table_ptr->hash_mask); |
| if ((trace || SHARED_BB_ONLY_IB_TARGETS()) && |
| DYNAMO_OPTION(ibl_table_in_tls)) { |
| local_state_extended_t *state = |
| (local_state_extended_t *) dcontext->local_state; |
| LOG(THREAD, LOG_FRAGMENT|LOG_STATS, 2, |
| "TLS state %s-%s table: addr "PFX", mask "PIFX"\n", |
| trace ? "trace" : "BB", sh_table_ptr->name, |
| state->table_space.table[branch_type].lookuptable, |
| state->table_space.table[branch_type].hash_mask); |
| } |
| }); |
| #ifdef HASHTABLE_STATISTICS |
| if (INTERNAL_OPTION(hashtable_ibl_entry_stats)) { |
| pvt_table_ptr->entry_stats_to_lookup_table = |
| sh_table_ptr->entry_stats_to_lookup_table; |
| } |
| else |
| pvt_table_ptr->entry_stats_to_lookup_table = 0; |
| #endif |
| if (lock_table) |
| TABLE_RWLOCK(sh_table_ptr, write, unlock); |
| |
| /* We don't need the lock for this, and holding it will have rank order |
| * issues with disassembling in debug builds */ |
| if (PRIVATE_TRACES_ENABLED() || DYNAMO_OPTION(bb_ibl_targets)) |
| update_generated_hashtable_access(dcontext); |
| |
| STATS_INC(num_shared_ibt_table_ptr_resets); |
| } |
| |
| /* When shared IBT tables are used, update thread-private state |
| * to reflect the current parameter values -- hash mask, table address -- |
| * for all tables. |
| */ |
| static bool |
| update_all_private_ibt_table_ptrs(dcontext_t *dcontext, per_thread_t *pt) |
| { |
| bool rc = false; |
| |
| if (SHARED_IBT_TABLES_ENABLED()) { |
| ibl_branch_type_t branch_type; |
| |
| for (branch_type = IBL_BRANCH_TYPE_START; |
| branch_type < IBL_BRANCH_TYPE_END; branch_type++) { |
| if (DYNAMO_OPTION(shared_trace_ibt_tables)) { |
| if (update_private_ibt_table_ptrs(dcontext, |
| &shared_pt->trace_ibt[branch_type] |
| _IF_DEBUG(NULL))) |
| rc = true; |
| } |
| if (DYNAMO_OPTION(shared_bb_ibt_tables)) { |
| if (update_private_ibt_table_ptrs(dcontext, |
| &shared_pt->bb_ibt[branch_type] |
| _IF_DEBUG(NULL))) |
| rc = true; |
| } |
| } |
| } |
| return rc; |
| } |
| |
| /* Prepares for removal of f from ftable (does not delete f) by pointing the |
| * fragment's lookup table entry to an entry point that leads to a cache exit. |
| * This routine is needed for safe removal of a fragment by a thread while |
| * another thread may be about to jump to it via an IBL. The lookuptable is |
| * left in a slightly inconsistent state but one that is accepted by the |
| * consistency check. See the note on hashtable_fragment_check_consistency |
| * in the routine. |
| * |
| * Returns true if the fragment was found & removed. |
| */ |
| static bool |
| fragment_prepare_for_removal_from_table(dcontext_t *dcontext, fragment_t *f, |
| ibl_table_t *ftable) |
| { |
| uint hindex; |
| fragment_entry_t fe = FRAGENTRY_FROM_FRAGMENT(f); |
| fragment_entry_t *pg; |
| |
| /* We need the write lock since the start_pc is modified (though we |
| * technically may be ok in all scenarios there) and to avoid problems |
| * with parallel prepares (shouldn't count on the bb building lock). |
| * Grab the lock after all private ptrs are updated since that |
| * operation might grab the same lock, if this remove is from a |
| * shared IBT table. |
| */ |
| /* FIXME: why do we need to update here? */ |
| update_private_ibt_table_ptrs(dcontext, ftable _IF_DEBUG(NULL)); |
| TABLE_RWLOCK(ftable, write, lock); |
| pg = hashtable_ibl_lookup_for_removal(fe, ftable, &hindex); |
| if (pg != NULL) { |
| /* Note all IBL routines that could be looking up an entry |
| * in this table have to exit with equivalent register |
| * state. It is possible to enter a private bb IBL |
| * lookup, shared bb IBL lookup or trace bb IBL lookup and |
| * if a delete race is hit then they would all go to the |
| * pending_delete_pc that we'll now supply. They HAVE to |
| * be all equivalent independent of the source fragment for this to work. |
| * |
| * On the other hand we can provide different start_pc |
| * values if we have different tables. We currently don't |
| * take advantage of this but we'll leave the power in place. |
| */ |
| |
| /* FIXME: [perf] we could memoize this value in the table itself */ |
| cache_pc pending_delete_pc = |
| get_target_delete_entry_pc(dcontext, ftable); |
| |
| ASSERT(IBL_ENTRIES_ARE_EQUAL(*pg, fe)); |
| ASSERT(pending_delete_pc != NULL); |
| LOG(THREAD, LOG_FRAGMENT, 3, |
| "fragment_prepare: remove F%d("PFX") from %s[%u] (table addr "PFX"), " |
| "set to "PFX"\n", |
| f->id, f->tag, ftable->name, hindex, ftable->table, |
| pending_delete_pc); |
| |
| /* start_pc_fragment will not match start_pc for the table |
| * consistency checks. However, the hashtable_fragment_check_consistency |
| * routine verifies that either start_pc/start_pc_fragment match OR that |
| * the start_pc_fragment is set to the correct target_delete |
| * entry point. |
| * |
| * We change the tag to FAKE_TAG, which preserves linear probing. |
| * In a thread-shared table, this ensures that the same tag will never |
| * be present in more than one entry in a table (1 real entry & |
| * 1+ target_delete entries). |
| * This isn't needed in a thread-private table but doesn't hurt. |
| */ |
| ftable->table[hindex].start_pc_fragment = pending_delete_pc; |
| ftable->table[hindex].tag_fragment = FAKE_TAG; |
| /* FIXME In a shared table, this means that the entry cannot |
| * be overwritten for a fragment with the same tag. */ |
| ftable->unlinked_entries++; |
| ftable->entries--; |
| TABLE_RWLOCK(ftable, write, unlock); |
| ASSERT(!TEST(FRAG_CANNOT_DELETE, f->flags)); |
| return true; |
| } |
| TABLE_RWLOCK(ftable, write, unlock); |
| return false; |
| } |
| |
| /* Prepares fragment f for removal from all IBL routine targeted tables. |
| * Does not actually remove the entry from the table |
| * as that can only be done through proper cross-thread synchronization. |
| * |
| * Returns true if the fragment was found & removed. |
| */ |
| bool |
| fragment_prepare_for_removal(dcontext_t *dcontext, fragment_t *f) |
| { |
| per_thread_t *pt; |
| bool prepared = false; |
| ibl_branch_type_t branch_type; |
| if (!IS_IBL_TARGET(f->flags)) { |
| /* nothing to do */ |
| return false; |
| } |
| |
| ASSERT(TEST(FRAG_SHARED, f->flags) || dcontext != GLOBAL_DCONTEXT); |
| /* We need a real per_thread_t & context below so make sure we have one. */ |
| if (dcontext == GLOBAL_DCONTEXT) { |
| dcontext = get_thread_private_dcontext(); |
| ASSERT(dcontext != NULL); |
| } |
| pt = GET_PT(dcontext); |
| /* FIXME: as an optimization we could test if IS_IBL_TARGET() is |
| * set before looking it up |
| */ |
| |
| for (branch_type = IBL_BRANCH_TYPE_START; |
| branch_type < IBL_BRANCH_TYPE_END; branch_type++) { |
| per_thread_t *local_pt = pt; |
| /* We put traces into the trace tables and BBs into the BB tables |
| * and sometimes put traces into BB tables also. We never put |
| * BBs into a trace table. |
| */ |
| if (TEST(FRAG_IS_TRACE, f->flags)) { |
| if (DYNAMO_OPTION(shared_trace_ibt_tables)) |
| local_pt = shared_pt; |
| if (fragment_prepare_for_removal_from_table(dcontext, f, |
| &local_pt-> |
| trace_ibt[branch_type])) |
| prepared = true; |
| } |
| if (DYNAMO_OPTION(bb_ibl_targets) && |
| (!TEST(FRAG_IS_TRACE, f->flags) || |
| DYNAMO_OPTION(bb_ibt_table_includes_traces))) { |
| if (DYNAMO_OPTION(shared_bb_ibt_tables)) |
| local_pt = shared_pt; |
| if (fragment_prepare_for_removal_from_table(dcontext, f, |
| &local_pt-> |
| bb_ibt[branch_type])) { |
| #ifdef DEBUG |
| ibl_table_t *ibl_table = GET_IBT_TABLE(pt, f->flags, branch_type); |
| fragment_entry_t current; |
| |
| TABLE_RWLOCK(ibl_table, read, lock); |
| current = hashtable_ibl_lookup(dcontext, (ptr_uint_t)f->tag, ibl_table); |
| ASSERT(IBL_ENTRY_IS_EMPTY(current)); |
| TABLE_RWLOCK(ibl_table, read, unlock); |
| #endif |
| prepared = true; |
| } |
| } |
| } |
| return prepared; |
| } |
| |
| #ifdef DEBUG |
| /* FIXME: hashtable_fragment_reset() needs to walk the tables to get these |
| * stats, but then we'd need to subtract 1 from all smaller counts - |
| * e.g. if an entry is found in 3 tables we can add (1,-1,0) then |
| * we'll find it again and we should add (0,1,-1) and one more time |
| * when we should add (0,0,1). In total all will be accounted for to |
| * (1,0,0) without messing much else. |
| */ |
| static inline void |
| fragment_ibl_stat_account(uint flags, uint ibls_targeted) |
| { |
| if (TEST(FRAG_IS_TRACE, flags)) { |
| switch (ibls_targeted) { |
| case 0: break; /* doesn't have to be a target of any IBL routine */ |
| case 1: STATS_INC(num_traces_in_1_ibl_tables); break; |
| case 2: STATS_INC(num_traces_in_2_ibl_tables); break; |
| case 3: STATS_INC(num_traces_in_3_ibl_tables); break; |
| default: ASSERT_NOT_REACHED(); |
| } |
| } else { |
| switch (ibls_targeted) { |
| case 0: break; /* doesn't have to be a target of any IBL routine */ |
| case 1: STATS_INC(num_bbs_in_1_ibl_tables); break; |
| case 2: STATS_INC(num_bbs_in_2_ibl_tables); break; |
| case 3: STATS_INC(num_bbs_in_3_ibl_tables); break; |
| default: ASSERT_NOT_REACHED(); |
| } |
| } |
| } |
| #endif |
| |
| /* Removes f from any IBT tables it is in. |
| * If f is in a shared table, only removes if from_shared is true, in |
| * which case dcontext must be GLOBAL_DCONTEXT and we must have |
| * dynamo_all_threads_synched (case 10137). |
| */ |
| void |
| fragment_remove_from_ibt_tables(dcontext_t *dcontext, fragment_t *f, |
| bool from_shared) |
| { |
| bool shared_ibt_table = |
| (!TEST(FRAG_IS_TRACE, f->flags) && DYNAMO_OPTION(shared_bb_ibt_tables)) || |
| (TEST(FRAG_IS_TRACE, f->flags) && DYNAMO_OPTION(shared_trace_ibt_tables)); |
| fragment_entry_t fe = FRAGENTRY_FROM_FRAGMENT(f); |
| |
| ASSERT(!from_shared || !shared_ibt_table || !IS_IBL_TARGET(f->flags) || |
| (dcontext == GLOBAL_DCONTEXT && dynamo_all_threads_synched)); |
| if (((!shared_ibt_table && dcontext != GLOBAL_DCONTEXT) || |
| (from_shared && dcontext == GLOBAL_DCONTEXT && dynamo_all_threads_synched)) && |
| IS_IBL_TARGET(f->flags)) { |
| |
| /* trace_t tables should be all private and any deletions should follow strict |
| * two step deletion process, we don't need to be holding nested locks when |
| * removing any cached entries from the per-type IBL target tables. |
| */ |
| /* FIXME: the stats on ibls_targeted are not quite correct - we need to |
| * gather these independently */ |
| DEBUG_DECLARE(uint ibls_targeted = 0;) |
| ibl_branch_type_t branch_type; |
| per_thread_t *pt = GET_PT(dcontext); |
| |
| ASSERT(TEST(FRAG_IS_TRACE, f->flags) || DYNAMO_OPTION(bb_ibl_targets)); |
| for (branch_type = IBL_BRANCH_TYPE_START; |
| branch_type < IBL_BRANCH_TYPE_END; branch_type++) { |
| /* assuming a single tag can't be both a trace and bb */ |
| ibl_table_t *ibtable = GET_IBT_TABLE(pt, f->flags, branch_type); |
| |
| ASSERT(!TEST(FRAG_TABLE_SHARED, ibtable->table_flags) || |
| dynamo_all_threads_synched); |
| TABLE_RWLOCK(ibtable, write, lock); /* satisfy asserts, even if allsynch */ |
| if (hashtable_ibl_remove(fe, ibtable)) { |
| LOG(THREAD, LOG_FRAGMENT, 2, |
| " removed F%d("PFX") from IBT table %s\n", |
| f->id, f->tag, |
| TEST(FRAG_TABLE_TRACE, ibtable->table_flags) ? |
| ibl_trace_table_type_names[branch_type] : |
| ibl_bb_table_type_names[branch_type]); |
| |
| DOSTATS({ibls_targeted++;}); |
| } |
| TABLE_RWLOCK(ibtable, write, unlock); |
| } |
| DOSTATS({fragment_ibl_stat_account(f->flags, ibls_targeted);}); |
| } |
| } |
| |
| /* Removes ibl entries whose tags are in [start,end) */ |
| static uint |
| fragment_remove_ibl_entries_in_region(dcontext_t *dcontext, app_pc start, app_pc end, |
| uint frag_flags) |
| { |
| uint total_removed = 0; |
| per_thread_t *pt = GET_PT(dcontext); |
| ibl_branch_type_t branch_type; |
| ASSERT(pt != NULL); |
| ASSERT(TEST(FRAG_IS_TRACE, frag_flags) || DYNAMO_OPTION(bb_ibl_targets)); |
| ASSERT(dcontext == get_thread_private_dcontext() || dynamo_all_threads_synched); |
| for (branch_type = IBL_BRANCH_TYPE_START; |
| branch_type < IBL_BRANCH_TYPE_END; branch_type++) { |
| ibl_table_t *ibtable = GET_IBT_TABLE(pt, frag_flags, branch_type); |
| uint removed = 0; |
| TABLE_RWLOCK(ibtable, write, lock); |
| if (ibtable->entries > 0) { |
| removed = hashtable_ibl_range_remove(dcontext, ibtable, |
| (ptr_uint_t)start, (ptr_uint_t)end, NULL); |
| /* Ensure a full remove gets everything */ |
| ASSERT(start != UNIVERSAL_REGION_BASE || end != UNIVERSAL_REGION_END || |
| (ibtable->entries == 0 && |
| is_region_memset_to_char((app_pc)ibtable->table, |
| (ibtable->capacity-1)*sizeof(fragment_entry_t), |
| 0))); |
| } |
| LOG(THREAD, LOG_FRAGMENT, 2, |
| " removed %d entries (%d left) in "PFX"-"PFX" from IBT table %s\n", |
| removed, ibtable->entries, start, end, |
| TEST(FRAG_TABLE_TRACE, ibtable->table_flags) ? |
| ibl_trace_table_type_names[branch_type] : |
| ibl_bb_table_type_names[branch_type]); |
| TABLE_RWLOCK(ibtable, write, unlock); |
| total_removed += removed; |
| } |
| return total_removed; |
| } |
| |
| /* Removes shared (and incidentally private, but if no shared targets |
| * can exist, may remove nothing) ibl entries whose tags are in |
| * [start,end) from all tables associated w/ dcontext. If |
| * dcontext==GLOBAL_DCONTEXT, uses the shared tables, if they exist; |
| * else, uses the private tables, if any. |
| */ |
| uint |
| fragment_remove_all_ibl_in_region(dcontext_t *dcontext, app_pc start, app_pc end) |
| { |
| uint removed = 0; |
| if (DYNAMO_OPTION(bb_ibl_targets) && |
| ((dcontext == GLOBAL_DCONTEXT && DYNAMO_OPTION(shared_bb_ibt_tables)) || |
| (dcontext != GLOBAL_DCONTEXT && !DYNAMO_OPTION(shared_bb_ibt_tables)))) { |
| removed += |
| fragment_remove_ibl_entries_in_region(dcontext, start, end, 0/*bb table*/); |
| } |
| if (DYNAMO_OPTION(shared_traces) && |
| ((dcontext == GLOBAL_DCONTEXT && DYNAMO_OPTION(shared_trace_ibt_tables)) || |
| (dcontext != GLOBAL_DCONTEXT && !DYNAMO_OPTION(shared_trace_ibt_tables)))) { |
| removed += |
| fragment_remove_ibl_entries_in_region(dcontext, start, end, FRAG_IS_TRACE); |
| } |
| return removed; |
| } |
| |
| /* Removes f from any hashtables -- BB, trace, or future -- and IBT tables |
| * it is in, except for shared IBT tables. */ |
| void |
| fragment_remove(dcontext_t *dcontext, fragment_t *f) |
| { |
| per_thread_t *pt = GET_PT(dcontext); |
| fragment_table_t *table = GET_FTABLE(pt, f->flags); |
| |
| ASSERT(TEST(FRAG_SHARED, f->flags) || dcontext != GLOBAL_DCONTEXT); |
| /* For consistency we remove entries from the IBT |
| * tables before we remove them from the trace table. |
| */ |
| fragment_remove_from_ibt_tables(dcontext, f, false/*leave in shared*/); |
| |
| /* We need the write lock since deleting shifts elements around (though we |
| * technically may be ok in all scenarios there) and to avoid problems with |
| * multiple removes at once (shouldn't count on the bb building lock) |
| */ |
| TABLE_RWLOCK(table, write, lock); |
| if (hashtable_fragment_remove(f, table)) { |
| LOG(THREAD, LOG_FRAGMENT, 4, |
| "fragment_remove: removed F%d("PFX") from fcache lookup table\n", |
| f->id, f->tag); |
| TABLE_RWLOCK(table, write, unlock); |
| return; |
| } |
| TABLE_RWLOCK(table, write, unlock); |
| |
| /* ok to not find a trace head used to start a trace -- fine to have deleted |
| * the trace head |
| */ |
| ASSERT(cur_trace_tag(dcontext) == f->tag |
| /* PR 299808: we have invisible temp trace bbs */ |
| IF_CLIENT_INTERFACE(|| TEST(FRAG_TEMP_PRIVATE, f->flags))); |
| } |
| |
| /* Remove f from ftable, replacing it in the hashtable with new_f, |
| * which has an identical tag. |
| * f's next field is left intact so this can be done while owner is in fcache |
| * f is NOT deleted in any other way! |
| * To delete later, caller must call fragment_delete w/ remove=false |
| */ |
| void |
| fragment_replace(dcontext_t *dcontext, fragment_t *f, fragment_t *new_f) |
| { |
| per_thread_t *pt = GET_PT(dcontext); |
| fragment_table_t *table = GET_FTABLE(pt, f->flags); |
| TABLE_RWLOCK(table, write, lock); |
| if (hashtable_fragment_replace(f, new_f, table)) { |
| fragment_entry_t fe = FRAGENTRY_FROM_FRAGMENT(f); |
| fragment_entry_t new_fe = FRAGENTRY_FROM_FRAGMENT(new_f); |
| LOG(THREAD, LOG_FRAGMENT, 4, |
| "removed F%d from fcache lookup table (replaced with F%d) "PFX"->~"PFX","PFX"\n", |
| f->id, new_f->id, f->tag, f->start_pc, new_f->start_pc); |
| /* Need to replace all entries from the IBL tables that may have this entry */ |
| if (IS_IBL_TARGET(f->flags)) { |
| ibl_branch_type_t branch_type; |
| for (branch_type = IBL_BRANCH_TYPE_START; |
| branch_type < IBL_BRANCH_TYPE_END; branch_type++) { |
| ibl_table_t *ibtable = GET_IBT_TABLE(pt, f->flags, branch_type); |
| /* currently we don't have shared ib target tables, |
| * otherwise a write lock would come in the picture here |
| */ |
| ASSERT(!TEST(FRAG_TABLE_SHARED, ibtable->table_flags)); |
| hashtable_ibl_replace(fe, new_fe, ibtable); |
| } |
| } |
| } else |
| ASSERT_NOT_REACHED(); |
| TABLE_RWLOCK(table, write, unlock); |
| |
| /* tell monitor f has disappeared, but do not delete from incoming table |
| * or from fcache, also do not dump to trace file |
| */ |
| monitor_remove_fragment(dcontext, f); |
| } |
| |
| void |
| fragment_shift_fcache_pointers(dcontext_t *dcontext, fragment_t *f, ssize_t shift, |
| cache_pc start, cache_pc end, size_t old_size) |
| { |
| per_thread_t *pt = GET_PT(dcontext); |
| |
| IF_X64(ASSERT_NOT_IMPLEMENTED(false)); /* must re-relativize when copying! */ |
| |
| /* need to shift all stored cache_pcs. |
| * do not need to shift relative pcs pointing to other fragments -- they're |
| * all getting shifted too! |
| * just need to re-pc-relativize jmps to fixed locations, namely |
| * cti's in exit stubs, and call instructions inside fragments. |
| */ |
| |
| LOG(THREAD, LOG_FRAGMENT, 2, "fragment_shift_fcache_pointers: F%d + "SSZFMT"\n", |
| f->id, shift); |
| |
| ASSERT(!TEST(FRAG_IS_FUTURE, f->flags)); /* only in-cache frags */ |
| |
| f->start_pc += shift; |
| |
| /* Should shift cached lookup entries in all IBL target tables, |
| * order doesn't matter here: either way we'll be inconsistent, can't do this within the cache. |
| */ |
| if (IS_IBL_TARGET(f->flags)) { |
| ibl_branch_type_t branch_type; |
| for (branch_type = IBL_BRANCH_TYPE_START; |
| branch_type < IBL_BRANCH_TYPE_END; branch_type++) { |
| /* Of course, we need to shift only pointers into the cache that is getting shifted! */ |
| ibl_table_t *ibtable = GET_IBT_TABLE(pt, f->flags, branch_type); |
| fragment_entry_t fe = FRAGENTRY_FROM_FRAGMENT(f); |
| fragment_entry_t *pg; |
| uint hindex; |
| TABLE_RWLOCK(ibtable, read, lock); |
| pg = hashtable_ibl_lookup_for_removal(fe, ibtable, &hindex); |
| if (pg != NULL) |
| pg->start_pc_fragment += shift; |
| TABLE_RWLOCK(ibtable, read, unlock); |
| LOG(THREAD, LOG_FRAGMENT, 2, |
| "fragment_table_shift_fcache_pointers: %s ibt %s shifted by %d\n", |
| TEST(FRAG_IS_TRACE, f->flags) ? "trace" : "BB", ibtable->name, shift); |
| } |
| } |
| |
| linkstubs_shift(dcontext, f, shift); |
| |
| DOLOG(6, LOG_FRAGMENT, { /* print after start_pc updated so get actual code */ |
| LOG(THREAD, LOG_FRAGMENT, 6, "before shifting F%d ("PFX")\n", f->id, f->tag); |
| disassemble_fragment(dcontext, f, stats->loglevel < 3); |
| }); |
| |
| #ifdef X86 |
| if (TEST(FRAG_SELFMOD_SANDBOXED, f->flags)) { |
| /* just re-finalize to update */ |
| finalize_selfmod_sandbox(dcontext, f); |
| } |
| #endif |
| |
| /* inter-cache links must be redone, but all fragment entry pcs must be |
| * fixed up first, so that's done separately |
| */ |
| |
| /* re-do pc-relative targets outside of cache */ |
| shift_ctis_in_fragment(dcontext, f, shift, start, end, old_size); |
| #ifdef CHECK_RETURNS_SSE2 |
| finalize_return_check(dcontext, f); |
| #endif |
| |
| DOLOG(6, LOG_FRAGMENT, { |
| LOG(THREAD, LOG_FRAGMENT, 6, "after shifting F%d ("PFX")\n", f->id, f->tag); |
| disassemble_fragment(dcontext, f, stats->loglevel < 3); |
| }); |
| } |
| |
| /* this routine only copies data structures like bbs and statistics |
| */ |
| void |
| fragment_copy_data_fields(dcontext_t *dcontext, fragment_t *f_src, fragment_t *f_dst) |
| { |
| if ((f_src->flags & FRAG_IS_TRACE) != 0) { |
| trace_only_t *t_src = TRACE_FIELDS(f_src); |
| trace_only_t *t_dst = TRACE_FIELDS(f_dst); |
| ASSERT((f_dst->flags & FRAG_IS_TRACE) != 0); |
| if (t_src->bbs != NULL) { |
| t_dst->bbs = |
| nonpersistent_heap_alloc(dcontext, t_src->num_bbs*sizeof(trace_bb_info_t) |
| HEAPACCT(ACCT_TRACE)); |
| memcpy(t_dst->bbs, t_src->bbs, t_src->num_bbs*sizeof(trace_bb_info_t)); |
| t_dst->num_bbs = t_src->num_bbs; |
| } |
| |
| #ifdef PROFILE_RDTSC |
| t_dst->count = t_src->count; |
| t_dst->total_time = t_src->total_time; |
| #endif |
| |
| #if defined (PROFILE_LINKCOUNT) && defined(SIDELINE_COUNT_STUDY) |
| t_dst->count_old_pre = t_src->count_old_pre; |
| t_dst->count_old_post = t_src->count_old_post; |
| #endif |
| } |
| } |
| |
| #if defined(DEBUG) && defined(INTERNAL) |
| static void |
| dump_lookup_table(dcontext_t *dcontext, ibl_table_t *ftable) |
| { |
| uint i; |
| cache_pc target_delete = get_target_delete_entry_pc(dcontext, ftable); |
| |
| ASSERT(target_delete != NULL); |
| ASSERT(ftable->table != NULL); |
| LOG(THREAD, LOG_FRAGMENT, 1, |
| "%6s %10s %10s -- %s\n", "i", "tag", "target", ftable->name); |
| /* need read lock to traverse the table */ |
| TABLE_RWLOCK(ftable, read, lock); |
| for (i = 0; i < ftable->capacity; i++) { |
| if (ftable->table[i].tag_fragment != 0) { |
| if (ftable->table[i].start_pc_fragment == target_delete) { |
| LOG(THREAD, LOG_FRAGMENT, 1, |
| "%6x "PFX" target_delete\n", |
| i, ftable->table[i].tag_fragment); |
| ASSERT(ftable->table[i].tag_fragment == FAKE_TAG); |
| } |
| else { |
| LOG(THREAD, LOG_FRAGMENT, 1, |
| "%6x "PFX" "PFX"\n", |
| i, ftable->table[i].tag_fragment, |
| ftable->table[i].start_pc_fragment); |
| } |
| } |
| DOCHECK(1, { hashtable_ibl_check_consistency(dcontext, ftable, i); }); |
| } |
| TABLE_RWLOCK(ftable, read, unlock); |
| } |
| #endif |
| |
| #ifdef DEBUG |
| /* used only for debugging purposes, check if IBL routine leaks due to wraparound */ |
| /* interesting only when not using INTERNAL_OPTION(ibl_sentinel_check) */ |
| static bool |
| is_fragment_index_wraparound(dcontext_t *dcontext, ibl_table_t *ftable, fragment_t *f) |
| { |
| uint hindex = HASH_FUNC((ptr_uint_t)f->tag, ftable); |
| uint found_at_hindex; |
| fragment_entry_t fe = FRAGENTRY_FROM_FRAGMENT(f); |
| fragment_entry_t *pg = hashtable_ibl_lookup_for_removal(fe, ftable, &found_at_hindex); |
| ASSERT(pg != NULL); |
| ASSERT(IBL_ENTRIES_ARE_EQUAL(*pg, fe)); |
| LOG(THREAD, LOG_FRAGMENT, 3, |
| "is_fragment_index_wraparound F%d, tag "PFX", found_at_hindex 0x%x, preferred 0x%x\n", |
| f->id, f->tag, found_at_hindex, hindex); |
| return (found_at_hindex < hindex); /* wraparound */ |
| } |
| #endif /* DEBUG */ |
| |
| static void |
| fragment_add_ibl_target_helper(dcontext_t *dcontext, fragment_t *f, |
| ibl_table_t *ibl_table) |
| { |
| fragment_entry_t current; |
| fragment_entry_t fe = FRAGENTRY_FROM_FRAGMENT(f); |
| |
| /* Never add a BB to a trace table. */ |
| ASSERT(!(!TEST(FRAG_IS_TRACE, f->flags) && |
| TEST(FRAG_TABLE_TRACE, ibl_table->table_flags))); |
| |
| /* adding is a write operation */ |
| TABLE_RWLOCK(ibl_table, write, lock); |
| /* This is the last time the table lock is grabbed before adding the frag so |
| * check here to account for the race in the time between the |
| * FRAG_IS_TRACE_HEAD check in add_ibl_target() and now. We never add trace |
| * heads to an IBT target table. |
| */ |
| if (TEST(FRAG_IS_TRACE_HEAD, f->flags)) { |
| TABLE_RWLOCK(ibl_table, write, unlock); |
| STATS_INC(num_th_bb_ibt_add_race); |
| return; |
| } |
| /* For shared tables, check again in case another thread snuck in |
| * before the preceding lock and added the target. */ |
| if (TEST(FRAG_TABLE_SHARED, ibl_table->table_flags)) { |
| current = hashtable_ibl_lookup(dcontext, (ptr_uint_t)f->tag, ibl_table); |
| if (IBL_ENTRY_IS_EMPTY(current)) |
| hashtable_ibl_add(dcontext, fe, ibl_table); |
| /* We don't ever expect to find a like-tagged fragment. A BB |
| * can be unlinked due to eviction or when it's marked as a trace |
| * head. Eviction (for example, due to cache consistency) |
| * sets start_pc_fragment to FAKE_TAG, so there can't be |
| * a tag match; &unlinked_fragment is returned, and this |
| * applies to traces also. For trace head marking, FAKE_TAG |
| * is also set so &unlinked_fragment is returned. |
| * |
| * If we didn't set FAKE_TAG for an unlinked entry, then it |
| * could be clobbered with a new fragment w/the same tag. |
| * In a shared table, unlinked entries cannot be clobbered |
| * except by fragments w/the same tags, so this could help |
| * limit the length of collision chains. |
| */ |
| } |
| else { |
| hashtable_ibl_add(dcontext, fe, ibl_table); |
| } |
| TABLE_RWLOCK(ibl_table, write, unlock); |
| DOSTATS({ |
| if (!TEST(FRAG_IS_TRACE, f->flags)) |
| STATS_INC(num_bbs_ibl_targets); |
| /* We assume that traces can be added to trace and BB IBT tables but |
| * not just to BB tables. We count only traces added to trace tables |
| * so that we don't double increment. |
| */ |
| else if (TEST(FRAG_IS_TRACE, f->flags) && |
| TEST(FRAG_TABLE_TRACE, ibl_table->table_flags)) |
| STATS_INC(num_traces_ibl_targets); |
| }); |
| |
| /* Adding current exit to help calculate an estimated |
| * indirect branch fan-out, that is function fan-in for |
| * returns. Note that other IBL hits to the same place |
| * will not have exits associated. |
| */ |
| LOG(THREAD, LOG_FRAGMENT, 2, |
| "fragment_add_ibl_target added F%d("PFX"), branch %d, to %s, on exit from "PFX"\n", |
| f->id, f->tag, |
| ibl_table->branch_type, ibl_table->name, |
| LINKSTUB_FAKE(dcontext->last_exit) ? 0 : |
| EXIT_CTI_PC(dcontext->last_fragment, dcontext->last_exit) |
| ); |
| DOLOG(4, LOG_FRAGMENT, { |
| dump_lookuptable_tls(dcontext); |
| hashtable_ibl_dump_table(dcontext, ibl_table); |
| dump_lookup_table(dcontext, ibl_table); |
| }); |
| DODEBUG({ |
| if (TEST(FRAG_SHARED, f->flags) && !TEST(FRAG_IS_TRACE, f->flags)) |
| LOG(THREAD, LOG_FRAGMENT, 2, |
| "add_ibl_target: shared BB F%d("PFX") added\n", f->id, |
| f->tag); |
| }); |
| } |
| |
| /* IBL targeted fragments per branch type */ |
| fragment_t * |
| fragment_add_ibl_target(dcontext_t *dcontext, app_pc tag, |
| ibl_branch_type_t branch_type) |
| { |
| per_thread_t *pt = (per_thread_t *) dcontext->fragment_field; |
| fragment_t *f = NULL; |
| fragment_t wrapper; |
| |
| if (SHARED_BB_ONLY_IB_TARGETS()) { |
| f = fragment_lookup_bb(dcontext, tag); |
| if (f == NULL) { |
| f = fragment_coarse_lookup_wrapper(dcontext, tag, &wrapper); |
| if (f != NULL) { |
| #if defined(RETURN_AFTER_CALL) || defined(RCT_IND_BRANCH) |
| if (TEST(COARSE_FILL_IBL_MASK(branch_type), |
| DYNAMO_OPTION(coarse_fill_ibl))) { |
| /* On-demand per-type ibl filling from the persisted RAC/RCT |
| * table. We limit to the first thread to ask for it by |
| * clearing the coarse_info_t pending_table fields. |
| */ |
| /* FIXME: combine w/ the coarse lookup to do this once only */ |
| coarse_info_t *coarse = get_fragment_coarse_info(f); |
| ASSERT(coarse != NULL); |
| if (coarse->persisted && |
| exists_coarse_ibl_pending_table(dcontext, coarse, branch_type)) { |
| bool in_persisted_ibl = false; |
| mutex_lock(&coarse->lock); |
| if (exists_coarse_ibl_pending_table(dcontext, |
| coarse, branch_type)) { |
| ibl_table_t *ibl_table = |
| GET_IBT_TABLE(pt, f->flags, branch_type); |
| coarse_persisted_fill_ibl(dcontext, coarse, branch_type); |
| TABLE_RWLOCK(ibl_table, read, lock); |
| if (!IBL_ENTRY_IS_EMPTY(hashtable_ibl_lookup(dcontext, |
| (ptr_uint_t)tag, ibl_table))) |
| in_persisted_ibl = true; |
| TABLE_RWLOCK(ibl_table, read, unlock); |
| if (in_persisted_ibl) { |
| mutex_unlock(&coarse->lock); |
| return f; |
| } |
| } |
| mutex_unlock(&coarse->lock); |
| } |
| } |
| #endif /* defined(RETURN_AFTER_CALL) || defined(RCT_IND_BRANCH) */ |
| } |
| } |
| } else { |
| f = fragment_lookup_trace(dcontext, tag); |
| if (f == NULL && DYNAMO_OPTION(bb_ibl_targets)) { |
| /* Populate with bb's that are not trace heads */ |
| f = fragment_lookup_bb(dcontext, tag); |
| /* We don't add trace heads OR when a trace is targetting a BB. In the |
| * latter case, the BB will shortly be marked as a trace head and |
| * removed from the IBT table so we don't needlessly add it. |
| */ |
| if (f != NULL && |
| (TEST(FRAG_IS_TRACE_HEAD, f->flags) || |
| TEST(FRAG_IS_TRACE, dcontext->last_fragment->flags))) { |
| /* FIXME: should change the logic if trace headness becomes a private property */ |
| f = NULL; /* ignore fragment */ |
| STATS_INC(num_ib_th_target); /* counted in num_ibt_cold_misses */ |
| } |
| } |
| } |
| |
| LOG(THREAD, LOG_FRAGMENT, 3, |
| "fragment_add_ibl_target tag "PFX", branch %d, F%d %s\n", |
| tag, branch_type, f != NULL ? f->id : 0, |
| (f != NULL && TEST(FRAG_IS_TRACE, f->flags)) ? "existing trace" : ""); |
| |
| /* a valid IBT fragment exists */ |
| if (f != NULL) { |
| ibl_table_t *ibl_table = GET_IBT_TABLE(pt, f->flags, branch_type); |
| DEBUG_DECLARE(fragment_entry_t *orig_lookuptable = NULL;) |
| fragment_entry_t current; |
| |
| /* Make sure this thread's local ptrs & state is current in case a |
| * shared table resize occurred while it was in the cache. We update |
| * only during an IBL miss, since that's the first time that |
| * accessing the old table inflicted a cost (a context switch). |
| */ |
| /* NOTE We could be more aggressive and update the private ptrs for |
| * all tables, not just the one being added to, by calling |
| * update_all_private_ibt_table_ptrs(). We could be even more |
| * aggressive by updating all ptrs on every cache exit in |
| * enter_couldbelinking() but that could also prove to be more |
| * expensive by invoking the update logic when an IBL miss didn't |
| * occur. However, more frequent updates could lead to old tables |
| * being freed earlier. We can revisit this if we see old tables |
| * piling up and not being freed in a timely manner. |
| */ |
| update_private_ibt_table_ptrs(dcontext, ibl_table |
| _IF_DEBUG(&orig_lookuptable)); |
| |
| /* We can't place a private fragment into a thread-shared table. |
| * Nothing prevents a sandboxed or ignore syscalls frag from being |
| * the target of an IB. This is covered by case 5836. |
| * |
| * We don't need to re-check in the add_ibl_target_helper because |
| * the shared/private property applies to all IBT tables -- either |
| * all are shared or none are. |
| */ |
| if (TEST(FRAG_TABLE_SHARED, ibl_table->table_flags) && |
| !TEST(FRAG_SHARED, f->flags)) { |
| STATS_INC(num_ibt_shared_private_conflict); |
| return f; |
| } |
| |
| ASSERT(TEST(FRAG_IS_TRACE, f->flags) == |
| TEST(FRAG_TABLE_TRACE, ibl_table->table_flags)); |
| /* We can't assert that an IBL target isn't a trace head due to a race |
| * between trace head marking and adding to a table. See the comments |
| * in fragment_add_to_hashtable(). |
| */ |
| TABLE_RWLOCK(ibl_table, read, lock); |
| current = hashtable_ibl_lookup(dcontext, (ptr_uint_t)tag, ibl_table); |
| TABLE_RWLOCK(ibl_table, read, unlock); |
| /* Now that we set the fragment_t* for any unlinked entry to |
| * &unlinked_fragment -- regardless of why it was unlinked -- and also |
| * set the lookup table tag to FAKE_TAG, we should never find a fragment |
| * with the same tag and should never have an unlinked marker returned |
| * here. |
| */ |
| ASSERT(!IBL_ENTRY_IS_INVALID(current)); |
| if (IBL_ENTRY_IS_EMPTY(current)) { |
| DOLOG(4, LOG_FRAGMENT, { |
| dump_lookuptable_tls(dcontext); |
| hashtable_ibl_dump_table(dcontext, ibl_table); |
| dump_lookup_table(dcontext, ibl_table); |
| }); |
| fragment_add_ibl_target_helper(dcontext, f, ibl_table); |
| /* When using BB2BB IBL w/trace building, we add trace targets |
| * to the BB table. (We always add a trace target to the trace |
| * table.) We fool the helper routine into using the BB |
| * table by passing in a non-trace value for the flags argument. |
| */ |
| if (TEST(FRAG_IS_TRACE, f->flags) && DYNAMO_OPTION(bb_ibl_targets) && |
| DYNAMO_OPTION(bb_ibt_table_includes_traces)) { |
| ibl_table_t *ibl_table_too = |
| GET_IBT_TABLE(pt, f->flags & ~FRAG_IS_TRACE, branch_type); |
| |
| ASSERT(ibl_table_too != NULL); |
| ASSERT(!TEST(FRAG_TABLE_TRACE, ibl_table_too->table_flags)); |
| /* Make sure this thread's local ptrs & state is up to |
| * date in case a resize occurred while it was in the cache. */ |
| update_private_ibt_table_ptrs(dcontext, ibl_table_too |
| _IF_DEBUG(NULL)); |
| fragment_add_ibl_target_helper(dcontext, f, ibl_table_too); |
| } |
| } |
| else { |
| DEBUG_DECLARE(const char *reason;) |
| #ifdef DEBUG |
| if (is_building_trace(dcontext)) { |
| reason = "trace building"; |
| STATS_INC(num_ibt_exit_trace_building); |
| } else if (TEST(FRAG_WAS_DELETED, dcontext->last_fragment->flags)) { |
| reason = "src unlinked (frag deleted)"; |
| STATS_INC(num_ibt_exit_src_unlinked_frag_deleted); |
| } else if (!TEST(LINK_LINKED, dcontext->last_exit->flags) && |
| TESTALL(FRAG_SHARED | FRAG_IS_TRACE_HEAD, |
| dcontext->last_fragment->flags) && |
| fragment_lookup_type(dcontext, |
| dcontext->last_fragment->tag, |
| LOOKUP_TRACE|LOOKUP_SHARED) != NULL) { |
| /* Another thread unlinked src as part of replacing it with |
| * a new trace while this thread was in there (see case 5634 |
| * for details) */ |
| reason = "src unlinked (shadowed)"; |
| STATS_INC(num_ibt_exit_src_unlinked_shadowed); |
| } else if (!INTERNAL_OPTION(ibl_sentinel_check) && |
| is_fragment_index_wraparound(dcontext, ibl_table, f)) { |
| reason = "sentinel"; |
| STATS_INC(num_ibt_leaks_likely_sentinel); |
| } else if (TEST(FRAG_SELFMOD_SANDBOXED, dcontext->last_fragment->flags)) { |
| reason = "src sandboxed"; |
| STATS_INC(num_ibt_exit_src_sandboxed); |
| } else if (TEST(FRAG_TABLE_SHARED, ibl_table->table_flags) && |
| orig_lookuptable != ibl_table->table) { |
| /* A table resize could cause a miss when the target is |
| * in the new table. */ |
| reason = "shared IBT table resize"; |
| STATS_INC(num_ibt_exit_shared_table_resize); |
| } else if (DYNAMO_OPTION(bb_ibl_targets) && |
| IS_SHARED_SYSCALLS_LINKSTUB(dcontext->last_exit) && |
| !DYNAMO_OPTION(disable_traces) && |
| !TEST(FRAG_IS_TRACE, f->flags)) { |
| reason = "shared syscall exit cannot target BBs"; |
| STATS_INC(num_ibt_exit_src_trace_shared_syscall); |
| } else if (DYNAMO_OPTION(bb_ibl_targets) && |
| TEST(FRAG_IS_TRACE, f->flags) && |
| !DYNAMO_OPTION(bb_ibt_table_includes_traces)) { |
| reason = "BBs do not target traces"; |
| STATS_INC(num_ibt_exit_src_trace_shared_syscall); |
| } else if (!INTERNAL_OPTION(link_ibl)) { |
| reason = "-no_link_ibl prevents ibl"; |
| STATS_INC(num_ibt_exit_nolink); |
| } else { |
| reason = "BAD leak?"; |
| DOLOG(3, LOG_FRAGMENT, { |
| hashtable_ibl_dump_table(dcontext, ibl_table); |
| hashtable_ibl_study(dcontext, ibl_table, 0/*table consistent*/); |
| }); |
| STATS_INC(num_ibt_exit_unknown); |
| ASSERT_CURIOSITY_ONCE(false && "fragment_add_ibl_target unknown reason"); |
| } |
| /* nothing to do, just sanity checking */ |
| LOG(THREAD, LOG_FRAGMENT, 2, |
| "fragment_add_ibl_target tag "PFX", F%d already added - %s\n", |
| tag, f->id, reason); |
| #endif |
| } |
| } else { |
| STATS_INC(num_ibt_cold_misses); |
| } |
| #ifdef HASHTABLE_STATISTICS |
| if (INTERNAL_OPTION(stay_on_trace_stats)) { |
| /* best effort: adjust for 32bit counter overflow occasionally |
| * we'll get a hashtable leak only when not INTERNAL_OPTION(ibl_sentinel_check) |
| */ |
| check_stay_on_trace_stats_overflow(dcontext, branch_type); |
| } |
| #endif /* HASHTABLE_STATISTICS */ |
| DOLOG(4, LOG_FRAGMENT, { |
| dump_lookuptable_tls(dcontext); |
| }); |
| return f; |
| } |
| |
| /**********************************************************************/ |
| /* FUTURE FRAGMENTS */ |
| |
| /* create a new fragment with empty prefix and return it |
| */ |
| static future_fragment_t * |
| fragment_create_future(dcontext_t *dcontext, app_pc tag, uint flags) |
| { |
| dcontext_t *alloc_dc = FRAGMENT_ALLOC_DC(dcontext, flags); |
| future_fragment_t *fut = (future_fragment_t*) |
| nonpersistent_heap_alloc(alloc_dc, sizeof(future_fragment_t) |
| HEAPACCT(ACCT_FRAG_FUTURE)); |
| ASSERT(!NEED_SHARED_LOCK(flags) || self_owns_recursive_lock(&change_linking_lock)); |
| LOG(THREAD, LOG_FRAGMENT, 4, "Created future fragment "PFX" w/ flags 0x%08x\n", |
| tag, flags|FRAG_FAKE|FRAG_IS_FUTURE); |
| STATS_INC(num_future_fragments); |
| DOSTATS({ |
| if (TEST(FRAG_SHARED, flags)) |
| STATS_INC(num_shared_future_fragments); |
| }); |
| fut->tag = tag; |
| fut->flags = flags | FRAG_FAKE | FRAG_IS_FUTURE; |
| fut->incoming_stubs = NULL; |
| return fut; |
| } |
| |
| static void |
| fragment_free_future(dcontext_t *dcontext, future_fragment_t *fut) |
| { |
| dcontext_t *alloc_dc = FRAGMENT_ALLOC_DC(dcontext, fut->flags); |
| LOG(THREAD, LOG_FRAGMENT, 4, "Freeing future fragment "PFX"\n", fut->tag); |
| ASSERT(fut->incoming_stubs == NULL); |
| nonpersistent_heap_free(alloc_dc, fut, sizeof(future_fragment_t) |
| HEAPACCT(ACCT_FRAG_FUTURE)); |
| } |
| |
| future_fragment_t * |
| fragment_create_and_add_future(dcontext_t *dcontext, app_pc tag, uint flags) |
| { |
| per_thread_t *pt = GET_PT(dcontext); |
| future_fragment_t *fut = fragment_create_future(dcontext, tag, flags); |
| fragment_table_t *futtable = GET_FTABLE(pt, fut->flags); |
| ASSERT(!NEED_SHARED_LOCK(flags) || self_owns_recursive_lock(&change_linking_lock)); |
| /* adding to the table is a write operation */ |
| TABLE_RWLOCK(futtable, write, lock); |
| fragment_add_to_hashtable(dcontext, (fragment_t *)fut, futtable); |
| TABLE_RWLOCK(futtable, write, unlock); |
| return fut; |
| } |
| |
| void |
| fragment_delete_future(dcontext_t *dcontext, future_fragment_t *fut) |
| { |
| per_thread_t *pt = GET_PT(dcontext); |
| fragment_table_t *futtable = GET_FTABLE(pt, fut->flags); |
| ASSERT(!NEED_SHARED_LOCK(fut->flags) || |
| self_owns_recursive_lock(&change_linking_lock)); |
| /* removing from the table is a write operation */ |
| TABLE_RWLOCK(futtable, write, lock); |
| hashtable_fragment_remove((fragment_t *)fut, futtable); |
| TABLE_RWLOCK(futtable, write, unlock); |
| fragment_free_future(dcontext, fut); |
| } |
| |
| /* We do not want to remove futures from a flushed region if they have |
| * incoming links (i#609). |
| */ |
| static bool |
| fragment_delete_future_filter(fragment_t *f) |
| { |
| future_fragment_t *fut = (future_fragment_t *) f; |
| ASSERT(TEST(FRAG_IS_FUTURE, f->flags)); |
| return (fut->incoming_stubs == NULL); |
| } |
| |
| static uint |
| fragment_delete_futures_in_region(dcontext_t *dcontext, app_pc start, app_pc end) |
| { |
| per_thread_t *pt = GET_PT(dcontext); |
| uint flags = FRAG_IS_FUTURE | (dcontext == GLOBAL_DCONTEXT ? FRAG_SHARED : 0); |
| fragment_table_t *futtable = GET_FTABLE(pt, flags); |
| uint removed; |
| /* Higher-level lock needed since we do lookup+add w/o holding table lock between */ |
| ASSERT(!NEED_SHARED_LOCK(flags) || self_owns_recursive_lock(&change_linking_lock)); |
| TABLE_RWLOCK(futtable, write, lock); |
| removed = hashtable_fragment_range_remove(dcontext, futtable, |
| (ptr_uint_t)start, (ptr_uint_t)end, |
| fragment_delete_future_filter); |
| TABLE_RWLOCK(futtable, write, unlock); |
| return removed; |
| } |
| |
| future_fragment_t * |
| fragment_lookup_future(dcontext_t *dcontext, app_pc tag) |
| { |
| /* default is to lookup shared, since private only sometimes exists, |
| * and often only care about trace head, for which always use shared |
| */ |
| uint flags = SHARED_FRAGMENTS_ENABLED() ? FRAG_SHARED : 0; |
| per_thread_t *pt = GET_PT(dcontext); |
| fragment_table_t *futtable = GET_FTABLE(pt, FRAG_IS_FUTURE | flags); |
| fragment_t *f; |
| TABLE_RWLOCK(futtable, read, lock); |
| f = hashtable_fragment_lookup(dcontext, (ptr_uint_t)tag, futtable); |
| TABLE_RWLOCK(futtable, read, unlock); |
| if (f != &null_fragment) |
| return (future_fragment_t *) f; |
| return NULL; |
| } |
| |
| future_fragment_t * |
| fragment_lookup_private_future(dcontext_t *dcontext, app_pc tag) |
| { |
| per_thread_t *pt = (per_thread_t *) dcontext->fragment_field; |
| fragment_table_t *futtable = GET_FTABLE(pt, FRAG_IS_FUTURE); |
| fragment_t *f = hashtable_fragment_lookup(dcontext, (ptr_uint_t)tag, futtable); |
| if (f != &null_fragment) |
| return (future_fragment_t *) f; |
| return NULL; |
| } |
| |
| /* END FUTURE FRAGMENTS |
| **********************************************************************/ |
| |
| #if defined(RETURN_AFTER_CALL) || defined(RCT_IND_BRANCH) |
| /* FIXME: move to rct.c when we move the whole app_pc table there */ |
| |
| #define STATS_RCT_ADD(which, stat, val) DOSTATS({ \ |
| if ((which) == RCT_RAC) \ |
| STATS_ADD(rac_##stat, val); \ |
| else \ |
| STATS_ADD(rct_##stat, val); \ |
| }) |
| |
| static inline bool |
| rct_is_global_table(rct_module_table_t *permod) |
| { |
| return (permod == &rac_non_module_table || |
| IF_UNIX_ELSE(permod == &rct_global_table, false)); |
| } |
| |
| static inline rct_module_table_t * |
| rct_get_table(app_pc tag, rct_type_t which) |
| { |
| rct_module_table_t *permod = os_module_get_rct_htable(tag, which); |
| if (permod == NULL) { /* not a module */ |
| if (which == RCT_RAC) |
| permod = &rac_non_module_table; |
| } |
| return permod; |
| } |
| |
| /* returns NULL if not found */ |
| static app_pc |
| rct_table_lookup_internal(dcontext_t *dcontext, app_pc tag, |
| rct_module_table_t *permod) |
| { |
| app_pc actag = NULL; |
| ASSERT(os_get_module_info_locked()); |
| if (permod != NULL) { |
| /* Check persisted table first as it's likely to be larger and |
| * it needs no read lock |
| */ |
| if (permod->persisted_table != NULL) { |
| actag = hashtable_app_pc_rlookup(dcontext, (ptr_uint_t)tag, |
| permod->persisted_table); |
| } |
| if (actag == NULL && permod->live_table != NULL) { |
| actag = hashtable_app_pc_rlookup(dcontext, (ptr_uint_t)tag, |
| permod->live_table); |
| } |
| } |
| return actag; |
| } |
| |
| /* returns NULL if not found */ |
| static app_pc |
| rct_table_lookup(dcontext_t *dcontext, app_pc tag, rct_type_t which) |
| { |
| app_pc actag = NULL; |
| rct_module_table_t *permod; |
| ASSERT(which >= 0 && which < RCT_NUM_TYPES); |
| os_get_module_info_lock(); |
| permod = rct_get_table(tag, which); |
| actag = rct_table_lookup_internal(dcontext, tag, permod); |
| os_get_module_info_unlock(); |
| return actag; |
| } |
| |
| /* Caller must hold the higher-level lock. |
| * Returns whether added a new entry or not. |
| */ |
| static bool |
| rct_table_add(dcontext_t *dcontext, app_pc tag, rct_type_t which) |
| { |
| rct_module_table_t *permod; |
| /* we use a higher-level lock to synchronize the lookup + add |
| * combination with other simultaneous adds as well as with removals |
| */ |
| /* FIXME We could use just the table lock for the lookup+add. This is cleaner |
| * than using another lock during the entire routine and acquiring & releasing |
| * the table lock in read mode for the lookup and then acquiring & releasing |
| * it again in write mode for the add. Also, any writes to the table outside |
| * of this routine would be blocked (as is desired). The down side is that |
| * reads would be blocked during the entire operation. |
| * The #ifdef DEBUG lookup would need to be moved to after the table lock |
| * is released to avoid a rank order violation (all table locks have the |
| * same rank). That's not problematic since it's only stat code. |
| */ |
| /* If we no longer hold this high-level lock for adds+removes we need |
| * to hold the new add/remove lock across persist_size->persist |
| */ |
| ASSERT_OWN_MUTEX(true, (which == RCT_RAC ? &after_call_lock : &rct_module_lock)); |
| os_get_module_info_lock(); |
| permod = rct_get_table(tag, which); |
| /* Xref case 9717, on a partial image mapping we may try to add locations |
| * (specifically the entry point) that our outside of any module. Technically this |
| * is also possible on a full mapping since we've seen entry points redirected |
| * (and there's nothing requiring that they be re-directed to another dll or, if |
| * at dr init, that we've already processed that target module, xref case 10693. */ |
| ASSERT_CURIOSITY(permod != NULL || EXEMPT_TEST("win32.partial_map.exe")); |
| if (permod == NULL || rct_table_lookup_internal(dcontext, tag, permod) != NULL) { |
| os_get_module_info_unlock(); |
| return false; |
| } |
| if (permod->live_table == NULL) { |
| /* lazily initialized */ |
| if (rct_is_global_table(permod)) |
| SELF_UNPROTECT_DATASEC(DATASEC_RARELY_PROT); |
| permod->live_table = HEAP_TYPE_ALLOC(GLOBAL_DCONTEXT, app_pc_table_t, |
| ACCT_AFTER_CALL, PROTECTED); |
| if (rct_is_global_table(permod)) { |
| /* For global tables we would have to move to heap, or |
| * else unprot every time, to maintain min and max: but |
| * the min-max optimization isn't going to help global |
| * tables so we just don't bother. |
| */ |
| permod->live_min = NULL; |
| permod->live_max = (app_pc) POINTER_MAX; |
| SELF_PROTECT_DATASEC(DATASEC_RARELY_PROT); |
| } |
| hashtable_app_pc_init(GLOBAL_DCONTEXT, permod->live_table, |
| which == RCT_RAC ? INIT_HTABLE_SIZE_AFTER_CALL : |
| INIT_HTABLE_SIZE_RCT_IBT, |
| which == RCT_RAC ? DYNAMO_OPTION(shared_after_call_load) : |
| DYNAMO_OPTION(global_rct_ind_br_load), |
| (hash_function_t)INTERNAL_OPTION(alt_hash_func), |
| 0 /* hash_mask_offset */, |
| (SHARED_FRAGMENTS_ENABLED() ? |
| HASHTABLE_ENTRY_SHARED : 0) |
| | HASHTABLE_SHARED | HASHTABLE_PERSISTENT |
| /* I'm seeing a number of high-ave-collision |
| * cases on both rac and rct; there's no easy |
| * way to estimate final size, so going to |
| * relax a little as not perf-critical */ |
| | HASHTABLE_RELAX_CLUSTER_CHECKS |
| _IF_DEBUG(which == RCT_RAC ? "after_call_targets" : |
| "rct_ind_targets")); |
| STATS_RCT_ADD(which, live_tables, 1); |
| } |
| ASSERT(permod->live_table != NULL); |
| /* adding is a write operation */ |
| TABLE_RWLOCK(permod->live_table, write, lock); |
| hashtable_app_pc_add(dcontext, tag, permod->live_table); |
| TABLE_RWLOCK(permod->live_table, write, unlock); |
| /* case 7628: used for persistence optimization: but watch overhead */ |
| if (!rct_is_global_table(permod)) { |
| /* See comments above */ |
| if (permod->live_min == NULL || tag < permod->live_min) |
| permod->live_min = tag; |
| if (tag > permod->live_max) |
| permod->live_max = tag; |
| } |
| os_get_module_info_unlock(); |
| STATS_RCT_ADD(which, live_entries, 1); |
| DOSTATS({ |
| if (permod == &rac_non_module_table) |
| STATS_INC(rac_non_module_entries); |
| }); |
| return true; |
| } |
| |
| static void |
| rct_table_flush_entry(dcontext_t *dcontext, app_pc tag, rct_type_t which) |
| { |
| rct_module_table_t *permod; |
| /* need higher level lock to properly synchronize with lookup+add */ |
| ASSERT_OWN_MUTEX(true, (which == RCT_RAC ? &after_call_lock : &rct_module_lock)); |
| os_get_module_info_lock(); |
| permod = rct_get_table(tag, which); |
| ASSERT(permod != NULL); |
| /* We should have removed any persist info before calling this routine */ |
| ASSERT(permod->persisted_table == NULL); |
| ASSERT(permod->live_table != NULL); |
| if (permod->live_table != NULL) { |
| /* removing is a write operation */ |
| TABLE_RWLOCK(permod->live_table, write, lock); |
| hashtable_app_pc_remove(tag, permod->live_table); |
| TABLE_RWLOCK(permod->live_table, write, unlock); |
| } |
| os_get_module_info_unlock(); |
| } |
| |
| /* Invalidates all after call or indirect branch targets from given |
| * range [text_start,text_end) which must be either completely |
| * contained in a single module or not touch any modules. |
| * Assuming any existing fragments that were added to IBL tables will |
| * be flushed independently: this routine only flushes the policy |
| * information. |
| * Note this needs to be called on app_memory_deallocation() for RAC |
| * (potentially from DGC), and rct_process_module_mmap() for other RCT |
| * entries which should be only in modules. |
| * |
| * Returns entries flushed. |
| */ |
| static uint |
| rct_table_invalidate_range(dcontext_t *dcontext, rct_type_t which, |
| app_pc text_start, app_pc text_end) |
| { |
| uint entries_removed = 0; |
| rct_module_table_t *permod; |
| /* need higher level lock to properly synchronize with lookup+add */ |
| ASSERT_OWN_MUTEX(true, (which == RCT_RAC ? &after_call_lock : &rct_module_lock)); |
| ASSERT(text_start < text_end); |
| |
| if (DYNAMO_OPTION(rct_sticky)) { |
| /* case 5329 - leaving for bug-compatibility with previous releases */ |
| /* trade-off is spurious RCT violations vs memory leak */ |
| return 0; |
| } |
| |
| /* We only support removing from within a single module or not touching |
| * any modules */ |
| ASSERT(get_module_base(text_start) == get_module_base(text_end)); |
| |
| os_get_module_info_lock(); |
| permod = rct_get_table(text_start, which); |
| ASSERT(permod != NULL); |
| /* We should have removed any persist info before calling this routine */ |
| ASSERT(permod->persisted_table == NULL); |
| ASSERT(permod->live_table != NULL); |
| if (permod != NULL && permod->live_table != NULL) { |
| TABLE_RWLOCK(permod->live_table, write, lock); |
| entries_removed = |
| hashtable_app_pc_range_remove(dcontext, permod->live_table, |
| (ptr_uint_t)text_start, (ptr_uint_t)text_end, |
| NULL); |
| |
| DOCHECK(1, { |
| uint second_pass = |
| hashtable_app_pc_range_remove(dcontext, permod->live_table, |
| (ptr_uint_t)text_start, |
| (ptr_uint_t)text_end, NULL); |
| ASSERT(second_pass == 0 && "nothing should be missed"); |
| /* simplest sanity check that hashtable_app_pc_range_remove() works */ |
| }); |
| TABLE_RWLOCK(permod->live_table, write, unlock); |
| } |
| os_get_module_info_unlock(); |
| return entries_removed; |
| } |
| |
| static void |
| rct_table_free_internal(dcontext_t *dcontext, app_pc_table_t *table) |
| { |
| hashtable_app_pc_free(dcontext, table); |
| ASSERT(TEST(HASHTABLE_PERSISTENT, table->table_flags)); |
| HEAP_TYPE_FREE(dcontext, table, app_pc_table_t, ACCT_AFTER_CALL, PROTECTED); |
| } |
| |
| void |
| rct_table_free(dcontext_t *dcontext, app_pc_table_t *table, bool free_data) |
| { |
| DODEBUG({ |
| DOLOG(1, LOG_FRAGMENT|LOG_STATS, { |
| hashtable_app_pc_load_statistics(dcontext, table); |
| }); |
| hashtable_app_pc_study(dcontext, table, 0/*table consistent*/); |
| }); |
| if (!free_data) { |
| /* We don't need the free_data param anymore */ |
| ASSERT(table->table_unaligned == NULL); /* don't try to free, part of mmap */ |
| } |
| rct_table_free_internal(GLOBAL_DCONTEXT, table); |
| } |
| |
| app_pc_table_t * |
| rct_table_copy(dcontext_t *dcontext, app_pc_table_t *src) |
| { |
| if (src == NULL) |
| return NULL; |
| else |
| return hashtable_app_pc_copy(GLOBAL_DCONTEXT, src); |
| } |
| |
| app_pc_table_t * |
| rct_table_merge(dcontext_t *dcontext, app_pc_table_t *src1, app_pc_table_t *src2) |
| { |
| if (src1 == NULL) { |
| if (src2 == NULL) |
| return NULL; |
| return hashtable_app_pc_copy(GLOBAL_DCONTEXT, src2); |
| } else if (src2 == NULL) |
| return hashtable_app_pc_copy(GLOBAL_DCONTEXT, src1); |
| else |
| return hashtable_app_pc_merge(GLOBAL_DCONTEXT, src1, src2); |
| } |
| |
| /* Up to caller to synchronize access to table. */ |
| uint |
| rct_table_persist_size(dcontext_t *dcontext, app_pc_table_t *table) |
| { |
| /* Don't persist zero-entry tables */ |
| if (table == NULL || table->entries == 0) |
| return 0; |
| else |
| return hashtable_app_pc_persist_size(dcontext, table); |
| } |
| |
| /* Up to caller to synchronize access to table. |
| * Returns true iff all writes succeeded. |
| */ |
| bool |
| rct_table_persist(dcontext_t *dcontext, app_pc_table_t *table, file_t fd) |
| { |
| bool success = true; |
| ASSERT(fd != INVALID_FILE); |
| ASSERT(table != NULL); /* caller shouldn't call us o/w */ |
| if (table != NULL) |
| success = hashtable_app_pc_persist(dcontext, table, fd); |
| return success; |
| } |
| |
| app_pc_table_t * |
| rct_table_resurrect(dcontext_t *dcontext, byte *mapped_table, rct_type_t which) |
| { |
| return hashtable_app_pc_resurrect(GLOBAL_DCONTEXT, mapped_table |
| _IF_DEBUG(which == RCT_RAC ? "after_call_targets" : |
| "rct_ind_targets")); |
| } |
| |
| void |
| rct_module_table_free(dcontext_t *dcontext, rct_module_table_t *permod, app_pc modpc) |
| { |
| ASSERT(os_get_module_info_locked()); |
| if (permod->live_table != NULL) { |
| rct_table_free(GLOBAL_DCONTEXT, permod->live_table, true); |
| permod->live_table = NULL; |
| } |
| if (permod->persisted_table != NULL) { |
| /* persisted table: table data is from disk, but header is on heap */ |
| rct_table_free(GLOBAL_DCONTEXT, permod->persisted_table, false); |
| permod->persisted_table = NULL; |
| /* coarse_info_t has a duplicated pointer to the persisted table, |
| * but it should always be flushed before we get here |
| */ |
| ASSERT(get_executable_area_coarse_info(modpc) == NULL); |
| } |
| } |
| |
| void |
| rct_module_table_persisted_invalidate(dcontext_t *dcontext, app_pc modpc) |
| { |
| rct_module_table_t *permod; |
| uint i; |
| os_get_module_info_lock(); |
| for (i = 0; i < RCT_NUM_TYPES; i++) { |
| permod = rct_get_table(modpc, i); |
| ASSERT(permod != NULL); |
| if (permod != NULL && permod->persisted_table != NULL) { |
| /* If the persisted table contains entries beyond what we will discover |
| * when we re-build its cache we must transfer those to the live table |
| * now. At first I was only keeping entire-module RCT entries, but we |
| * must keep all RAC and RCT entries since we may not see the triggering |
| * code before we hit the check point (may reset at a ret so we won't see |
| * the call before the ret check; same for Borland SEH). We can only not |
| * keep them on a module unload. |
| */ |
| /* Optimization: don't transfer if about to unload. This assumes |
| * there will be no persistence of a later coarse unit in a different |
| * region of the same module, which case 9651 primary_for_module |
| * currently ensures! (We already have read lock; ok to grab again.) |
| */ |
| if (!os_module_get_flag(modpc, MODULE_BEING_UNLOADED) && !dynamo_exited) { |
| /* FIXME case 10362: we could leave the file mapped in and |
| * use the persisted RCT table independently of the cache |
| */ |
| /* Merging will remove any dups, though today we never re-load |
| * pcaches so there shouldn't be any |
| */ |
| app_pc_table_t *merged = |
| /* all modinfo RCT tables are on global heap */ |
| rct_table_merge(GLOBAL_DCONTEXT, permod->live_table, |
| permod->persisted_table); |
| if (permod->live_table != NULL) |
| rct_table_free(GLOBAL_DCONTEXT, permod->live_table, true); |
| permod->live_table = merged; |
| LOG(THREAD, LOG_FRAGMENT, 2, |
| "rct_module_table_persisted_invalidate "PFX": not unload, so " |
| "moving persisted %d entries to live table\n", |
| modpc, permod->persisted_table->entries); |
| # ifdef WINDOWS |
| /* We leave the MODULE_RCT_LOADED flag */ |
| # endif |
| STATS_INC(rct_persisted_outlast_cache); |
| } |
| /* we rely on coarse_unit_reset_free() freeing the persisted table struct */ |
| permod->persisted_table = NULL; |
| } |
| } |
| os_get_module_info_unlock(); |
| } |
| |
| /* Produces a new hashtable that contains all entries in the live and persisted |
| * tables for the module containing modpc that are within [limit_start, limit_end) |
| */ |
| app_pc_table_t * |
| rct_module_table_copy(dcontext_t *dcontext, app_pc modpc, rct_type_t which, |
| app_pc limit_start, app_pc limit_end) |
| { |
| app_pc_table_t *merged = NULL; |
| rct_module_table_t *permod; |
| mutex_t *lock = (which == RCT_RAC) ? &after_call_lock : &rct_module_lock; |
| mutex_lock(lock); |
| if (which == RCT_RAC) { |
| ASSERT(DYNAMO_OPTION(ret_after_call)); |
| if (!DYNAMO_OPTION(ret_after_call)) |
| return NULL; |
| } else { |
| ASSERT(TEST(OPTION_ENABLED, DYNAMO_OPTION(rct_ind_call)) || |
| TEST(OPTION_ENABLED, DYNAMO_OPTION(rct_ind_jump))); |
| if (!TEST(OPTION_ENABLED, DYNAMO_OPTION(rct_ind_call)) && |
| !TEST(OPTION_ENABLED, DYNAMO_OPTION(rct_ind_jump))) |
| return NULL; |
| } |
| os_get_module_info_lock(); |
| permod = rct_get_table(modpc, which); |
| ASSERT(permod != NULL); |
| if (permod != NULL) { |
| /* FIXME: we could pass the limit range down to hashtable_app_pc_{copy,merge} |
| * for more efficiency and to avoid over-sizing the table, but this |
| * should be rare w/ -persist_rct_entire and single-+x-section modules |
| */ |
| merged = rct_table_merge(dcontext, permod->live_table, permod->persisted_table); |
| if (merged != NULL) { |
| DEBUG_DECLARE(uint removed = 0;) |
| TABLE_RWLOCK(merged, write, lock); |
| if (limit_start > permod->live_min) { |
| DEBUG_DECLARE(removed +=) |
| hashtable_app_pc_range_remove(dcontext, merged, |
| (ptr_uint_t)permod->live_min, |
| (ptr_uint_t)limit_start, NULL); |
| } |
| if (limit_end <= permod->live_max) { |
| DEBUG_DECLARE(removed +=) |
| hashtable_app_pc_range_remove(dcontext, merged, |
| (ptr_uint_t)limit_end, |
| (ptr_uint_t)permod->live_max+1, NULL); |
| } |
| TABLE_RWLOCK(merged, write, unlock); |
| STATS_RCT_ADD(which, module_persist_out_of_range, removed); |
| } |
| } |
| os_get_module_info_unlock(); |
| mutex_unlock(lock); |
| return merged; |
| } |
| |
| /* We return the persisted table so we can keep a pointer to it in the |
| * loaded coarse_info_t, but we must be careful to do a coordinated |
| * free of the duplicated pointer. |
| */ |
| bool |
| rct_module_table_set(dcontext_t *dcontext, app_pc modpc, app_pc_table_t *table, |
| rct_type_t which) |
| { |
| rct_module_table_t *permod; |
| bool used = false; |
| mutex_t *lock = (which == RCT_RAC) ? &after_call_lock : &rct_module_lock; |
| mutex_lock(lock); |
| os_get_module_info_lock(); |
| permod = rct_get_table(modpc, which); |
| ASSERT(permod != NULL); |
| ASSERT(permod->persisted_table == NULL); /* can't resurrect twice */ |
| ASSERT(table != NULL); |
| /* Case 9834: avoid double-add from earlier entire-module resurrect */ |
| ASSERT(which == RCT_RAC || !os_module_get_flag(modpc, MODULE_RCT_LOADED)); |
| /* FIXME case 8648: we're loosening security by allowing ret to target any |
| * after-call executed in any prior run of this app or whatever |
| * app is producing, instead of just this run. |
| */ |
| if (permod != NULL && permod->persisted_table == NULL) { |
| used = true; |
| /* There could be dups in persisted table that are already in |
| * live table (particularly from unloading and re-loading a pcache, |
| * though that never happens today). Everything should work fine, |
| * and if we do unload the pcache prior to module unload the merge |
| * into the live entries will remove the dups. |
| */ |
| permod->persisted_table = table; |
| ASSERT(permod->persisted_table->entries > 0); |
| /* FIXME: for case 9639 if we had the ibl table set up (say, for shared |
| * ibl tables) and stored the cache pc (though I guess coarse htable is |
| * set up) we could fill the ibl table here as well (but then we'd |
| * have to abort for hotp conflicts earlier in coarse_unit_load()). |
| * Instead we delay and do it in rac_persisted_fill_ibl(). |
| */ |
| LOG(THREAD, LOG_FRAGMENT, 2, |
| "rct_module_table_resurrect: added %d %s entries\n", |
| permod->persisted_table->entries, which == RCT_RAC ? "RAC" : "RCT"); |
| STATS_RCT_ADD(which, persisted_tables, 1); |
| STATS_RCT_ADD(which, persisted_entries, permod->persisted_table->entries); |
| } |
| os_get_module_info_unlock(); |
| mutex_unlock(lock); |
| return used; |
| } |
| |
| bool |
| rct_module_persisted_table_exists(dcontext_t *dcontext, app_pc modpc, |
| rct_type_t which) |
| { |
| bool exists = false; |
| rct_module_table_t *permod; |
| os_get_module_info_lock(); |
| permod = rct_get_table(modpc, which); |
| exists = (permod != NULL && permod->persisted_table != NULL); |
| os_get_module_info_unlock(); |
| return exists; |
| } |
| |
| uint |
| rct_module_live_entries(dcontext_t *dcontext, app_pc modpc, rct_type_t which) |
| { |
| uint entries = 0; |
| rct_module_table_t *permod; |
| os_get_module_info_lock(); |
| permod = rct_get_table(modpc, which); |
| if (permod != NULL && permod->live_table != NULL) |
| entries = permod->live_table->entries; |
| os_get_module_info_unlock(); |
| return entries; |
| } |
| |
| static void |
| coarse_persisted_fill_ibl_helper(dcontext_t *dcontext, ibl_table_t *ibl_table, |
| coarse_info_t *info, app_pc_table_t *ptable, |
| ibl_branch_type_t branch_type) |
| { |
| uint i; |
| fragment_t wrapper; |
| app_pc tag; |
| cache_pc body_pc; |
| DEBUG_DECLARE(uint added = 0;) |
| ASSERT(ptable != NULL); |
| if (ptable == NULL) |
| return; |
| ASSERT(os_get_module_info_locked()); |
| |
| /* Make sure this thread's local ptrs & state are up to |
| * date in case a resize occurred while it was in the cache. */ |
| update_private_ibt_table_ptrs(dcontext, ibl_table _IF_DEBUG(NULL)); |
| |
| /* Avoid hash collision asserts while adding by sizing up front; |
| * FIXME: we may over-size for INDJMP table |
| */ |
| TABLE_RWLOCK(ibl_table, write, lock); |
| hashtable_ibl_check_size(dcontext, ibl_table, 0, ptable->entries); |
| TABLE_RWLOCK(ibl_table, write, unlock); |
| |
| /* FIXME: we should hold ptable's read lock but it's lower ranked |
| * than the fragment table's lock, so we rely on os module lock |
| */ |
| for (i = 0; i < ptable->capacity; i++) { |
| tag = ptable->table[i]; |
| if (APP_PC_ENTRY_IS_REAL(tag)) { |
| /* FIXME: should we persist the cache pcs to save time here? That won't |
| * be ideal if we ever use the mmapped table directly (for per-module |
| * tables: case 9672). We could support both tag-only and tag-cache |
| * pairs by having the 1st entry be a flags word. |
| */ |
| fragment_coarse_lookup_in_unit(dcontext, info, tag, NULL, &body_pc); |
| /* may not be present, given no checks in rct_entries_in_region() */ |
| if (body_pc != NULL && |
| /* We can have same entry in both RAC and RCT table, and we use |
| * both tables to fill the INDJMP table |
| */ |
| (branch_type != IBL_INDJMP || |
| !IBL_ENTRY_IS_EMPTY(hashtable_ibl_lookup(dcontext, (ptr_uint_t)tag, |
| ibl_table)))) { |
| fragment_coarse_wrapper(&wrapper, tag, body_pc); |
| fragment_add_ibl_target_helper(dcontext, &wrapper, ibl_table); |
| DOSTATS({ added++; }); |
| } |
| } |
| } |
| LOG(THREAD, LOG_FRAGMENT, 2, |
| "coarse_persisted_fill_ibl %s: added %d of %d entries\n", |
| get_branch_type_name(branch_type), added, ptable->entries); |
| STATS_ADD(perscache_ibl_prefill, added); |
| } |
| |
| /* Case 9639: fill ibl table from persisted RAC/RCT table entries */ |
| static void |
| coarse_persisted_fill_ibl(dcontext_t *dcontext, coarse_info_t *info, |
| ibl_branch_type_t branch_type) |
| { |
| per_thread_t *pt = GET_PT(dcontext); |
| ibl_table_t *ibl_table; |
| rct_module_table_t *permod; |
| |
| /* Caller must hold info lock */ |
| ASSERT_OWN_MUTEX(true, &info->lock); |
| ASSERT(exists_coarse_ibl_pending_table(dcontext, info, branch_type)); |
| ASSERT(TEST(COARSE_FILL_IBL_MASK(branch_type), |
| DYNAMO_OPTION(coarse_fill_ibl))); |
| if (!exists_coarse_ibl_pending_table(dcontext, info, branch_type)) |
| return; |
| |
| os_get_module_info_lock(); |
| ibl_table = GET_IBT_TABLE(pt, FRAG_SHARED|FRAG_COARSE_GRAIN, branch_type); |
| if (branch_type == IBL_RETURN || branch_type == IBL_INDJMP) { |
| permod = rct_get_table(info->base_pc, RCT_RAC); |
| ASSERT(permod != NULL && permod->persisted_table != NULL); |
| if (permod != NULL && permod->persisted_table != NULL) { |
| LOG(THREAD, LOG_FRAGMENT, 2, |
| "coarse_persisted_fill_ibl %s: adding RAC %d entries\n", |
| get_branch_type_name(branch_type), permod->persisted_table->entries); |
| coarse_persisted_fill_ibl_helper(dcontext, ibl_table, info, |
| permod->persisted_table, branch_type); |
| } |
| } |
| if (branch_type == IBL_INDCALL || branch_type == IBL_INDJMP) { |
| permod = rct_get_table(info->base_pc, RCT_RCT); |
| ASSERT(permod != NULL && permod->persisted_table != NULL); |
| if (permod != NULL && permod->persisted_table != NULL) { |
| LOG(THREAD, LOG_FRAGMENT, 2, |
| "coarse_persisted_fill_ibl %s: adding RCT %d entries\n", |
| get_branch_type_name(branch_type), permod->persisted_table->entries); |
| coarse_persisted_fill_ibl_helper(dcontext, ibl_table, info, |
| permod->persisted_table, branch_type); |
| } |
| } |
| os_get_module_info_unlock(); |
| /* We only fill for the 1st thread (if using per-thread ibl |
| * tables). We'd need per-thread flags to do otherwise, and the |
| * goal is only to help startup performance. |
| * FIXME case 9639: later threads may do startup work in various apps, |
| * and we may want a better solution here. |
| */ |
| info->ibl_pending_used |= COARSE_FILL_IBL_MASK(branch_type); |
| } |
| |
| #endif /* defined(RETURN_AFTER_CALL) || defined(RCT_IND_BRANCH) */ |
| |
| #ifdef RETURN_AFTER_CALL |
| |
| /* returns NULL if not found */ |
| app_pc |
| fragment_after_call_lookup(dcontext_t *dcontext, app_pc tag) |
| { |
| return rct_table_lookup(dcontext, tag, RCT_RAC); |
| } |
| |
| void |
| fragment_add_after_call(dcontext_t *dcontext, app_pc tag) |
| { |
| mutex_lock(&after_call_lock); |
| if (!rct_table_add(dcontext, tag, RCT_RAC)) |
| STATS_INC(num_existing_after_call); |
| else |
| STATS_INC(num_future_after_call); |
| mutex_unlock(&after_call_lock); |
| } |
| |
| /* flushing a fragment invalidates the after call entry */ |
| void |
| fragment_flush_after_call(dcontext_t *dcontext, app_pc tag) |
| { |
| mutex_lock(&after_call_lock); |
| rct_table_flush_entry(dcontext, tag, RCT_RAC); |
| mutex_unlock(&after_call_lock); |
| STATS_INC(num_future_after_call_removed); |
| STATS_DEC(num_future_after_call); |
| } |
| |
| /* see comments in rct_table_invalidate_range() */ |
| uint |
| invalidate_after_call_target_range(dcontext_t *dcontext, |
| app_pc text_start, app_pc text_end) |
| { |
| uint entries_removed; |
| mutex_lock(&after_call_lock); |
| entries_removed = |
| rct_table_invalidate_range(dcontext, RCT_RAC, text_start, text_end); |
| mutex_unlock(&after_call_lock); |
| |
| STATS_ADD(num_future_after_call_removed, entries_removed); |
| STATS_SUB(num_future_after_call, entries_removed); |
| |
| LOG(THREAD, LOG_FRAGMENT, 2, |
| "invalidate_rct_target_range "PFX"-"PFX": removed %d entries\n", |
| text_start, text_end, entries_removed); |
| |
| return entries_removed; |
| } |
| |
| #endif /* RETURN_AFTER_CALL */ |
| |
| /***********************************************************************/ |
| #ifdef RCT_IND_BRANCH |
| /* |
| * RCT indirect branch policy bookkeeping. Mostly a set of wrappers |
| * around the basic hashtable functionality. |
| * |
| * FIXME: all of these routines should be moved to rct.c after we move |
| * the hashtable primitives to fragment.h as static inline's |
| */ |
| |
| /* returns NULL if not found */ |
| app_pc |
| rct_ind_branch_target_lookup(dcontext_t *dcontext, app_pc tag) |
| { |
| return rct_table_lookup(dcontext, tag, RCT_RCT); |
| } |
| |
| /* returns true if a new entry for target was added, |
| * or false if target was already known |
| */ |
| /* Note - entries are expected to be within MEM_IMAGE */ |
| bool |
| rct_add_valid_ind_branch_target(dcontext_t *dcontext, app_pc tag) |
| { |
| ASSERT_OWN_MUTEX(true, &rct_module_lock); |
| DOLOG(2, LOG_FRAGMENT, { |
| /* FIXME: would be nice to add a heavy weight check that we're |
| * really only a PE IMAGE via is_in_code_section() |
| */ |
| }); |
| if (!rct_table_add(dcontext, tag, RCT_RCT)) |
| return false; |
| else { |
| STATS_INC(rct_ind_branch_entries); |
| return true; /* new entry */ |
| } |
| } |
| |
| /* invalidate an indirect branch target and free any associated memory */ |
| /* FIXME: note that this is currently not used and |
| * invalidate_ind_branch_target_range() will be the likely method to |
| * use for most cases when a whole module range is invalidated. |
| */ |
| void |
| rct_flush_ind_branch_target_entry(dcontext_t *dcontext, app_pc tag) |
| { |
| ASSERT_OWN_MUTEX(true, &rct_module_lock); /* synch with adding */ |
| rct_table_flush_entry(dcontext, tag, RCT_RCT); |
| STATS_DEC(rct_ind_branch_entries); |
| STATS_INC(rct_ind_branch_entries_removed); |
| } |
| |
| /* see comments in rct_table_invalidate_range() */ |
| uint |
| invalidate_ind_branch_target_range(dcontext_t *dcontext, |
| app_pc text_start, app_pc text_end) |
| { |
| uint entries_removed; |
| ASSERT_OWN_MUTEX(true, &rct_module_lock); /* synch with adding */ |
| |
| entries_removed = |
| rct_table_invalidate_range(dcontext, RCT_RCT, text_start, text_end); |
| STATS_ADD(rct_ind_branch_entries_removed, entries_removed); |
| STATS_SUB(rct_ind_branch_entries, entries_removed); |
| |
| return entries_removed; |
| } |
| #endif /* RCT_IND_BRANCH */ |
| |
| |
| /****************************************************************************/ |
| /* CACHE CONSISTENCY */ |
| |
| /* Handle exits from the cache from our self-modifying code sandboxing |
| * instrumentation. |
| */ |
| void |
| fragment_self_write(dcontext_t *dcontext) |
| { |
| ASSERT(!is_self_couldbelinking()); |
| /* need to delete just this fragment, then start interpreting |
| * at the instr after the self-write instruction |
| */ |
| dcontext->next_tag = EXIT_TARGET_TAG(dcontext, dcontext->last_fragment, |
| dcontext->last_exit); |
| LOG(THREAD, LOG_ALL, 2, "Sandboxing exit from fragment "PFX" @"PFX"\n", |
| dcontext->last_fragment->tag, |
| EXIT_CTI_PC(dcontext->last_fragment, dcontext->last_exit)); |
| LOG(THREAD, LOG_ALL, 2, "\tset next_tag to "PFX"\n", dcontext->next_tag); |
| /* We come in here both for actual selfmod and for exec count thresholds, |
| * to avoid needing separate LINK_ flags. |
| */ |
| if (DYNAMO_OPTION(sandbox2ro_threshold) > 0) { |
| if (vm_area_selfmod_check_clear_exec_count(dcontext, dcontext->last_fragment)) { |
| /* vm_area_* deleted this fragment by flushing so nothing more to do */ |
| return; |
| } |
| } |
| LOG(THREAD, LOG_ALL, 1, "WARNING: fragment "PFX" @"PFX" overwrote its own code\n", |
| dcontext->last_fragment->tag, |
| EXIT_CTI_PC(dcontext->last_fragment, dcontext->last_exit)); |
| STATS_INC(num_self_writes); |
| if (TEST(FRAG_WAS_DELETED, dcontext->last_fragment->flags)) { |
| /* Case 8177: case 3559 unionized fragment_t.in_xlate, so we cannot delete a |
| * fragment that has already been unlinked in the first stage of a flush. |
| * The flush queue check, which comes after this (b/c we want to be |
| * nolinking), will delete. |
| */ |
| ASSERT(((per_thread_t *) dcontext->fragment_field)->flush_queue_nonempty); |
| STATS_INC(num_self_writes_after_flushes); |
| } else { |
| #ifdef PROGRAM_SHEPHERDING |
| /* we can't call fragment_delete if vm_area_t deletes it by flushing */ |
| if (!vm_area_fragment_self_write(dcontext, dcontext->last_fragment->tag)) |
| #endif |
| { |
| fragment_delete(dcontext, dcontext->last_fragment, FRAGDEL_ALL); |
| STATS_INC(num_fragments_deleted_selfmod); |
| } |
| } |
| } |
| |
| /* coarse_grain says to use just the min_pc and max_pc of f. |
| * otherwise a full walk of the original code is done to see if |
| * any piece of the fragment really does overlap. |
| * returns the tag of the bb that actually overlaps (i.e., finds the |
| * component bb of a trace that does the overlapping). |
| */ |
| bool |
| fragment_overlaps(dcontext_t *dcontext, fragment_t *f, |
| byte *region_start, byte *region_end, bool coarse_grain, |
| overlap_info_t *info_res, app_pc *bb_tag) |
| { |
| overlap_info_t info; |
| info.overlap = false; |
| if ((f->flags & FRAG_IS_TRACE) != 0) { |
| uint i; |
| trace_only_t *t = TRACE_FIELDS(f); |
| /* look through all blocks making up the trace */ |
| ASSERT(t->bbs != NULL); |
| /* trace should have at least one bb */ |
| ASSERT(t->num_bbs > 0); |
| for (i=0; i<t->num_bbs; i++) { |
| if (app_bb_overlaps(dcontext, t->bbs[i].tag, f->flags, |
| region_start, region_end, &info)) { |
| if (bb_tag != NULL) |
| *bb_tag = t->bbs[i].tag; |
| break; |
| } |
| } |
| } else { |
| app_bb_overlaps(dcontext, f->tag, f->flags, |
| region_start, region_end, &info); |
| if (info.overlap && bb_tag != NULL) |
| *bb_tag = f->tag; |
| } |
| if (info_res != NULL) |
| *info_res = info; |
| return info.overlap; |
| } |
| |
| #ifdef DEBUG |
| void |
| study_all_hashtables(dcontext_t *dcontext) |
| { |
| per_thread_t *pt = (per_thread_t *) dcontext->fragment_field; |
| ibl_branch_type_t branch_type; |
| |
| for (branch_type = IBL_BRANCH_TYPE_START; |
| branch_type < IBL_BRANCH_TYPE_END; branch_type++) { |
| if (!DYNAMO_OPTION(disable_traces)) { |
| per_thread_t *ibl_pt = pt; |
| if (DYNAMO_OPTION(shared_trace_ibt_tables)) |
| ibl_pt = shared_pt; |
| hashtable_ibl_study(dcontext, &ibl_pt->trace_ibt[branch_type], |
| 0/*table consistent*/); |
| } |
| if (DYNAMO_OPTION(bb_ibl_targets)) { |
| per_thread_t *ibl_pt = pt; |
| if (DYNAMO_OPTION(shared_bb_ibt_tables)) |
| ibl_pt = shared_pt; |
| hashtable_ibl_study(dcontext, &ibl_pt->bb_ibt[branch_type], |
| 0/*table consistent*/); |
| } |
| } |
| if (PRIVATE_TRACES_ENABLED()) |
| hashtable_fragment_study(dcontext, &pt->trace, 0/*table consistent*/); |
| hashtable_fragment_study(dcontext, &pt->bb, 0/*table consistent*/); |
| hashtable_fragment_study(dcontext, &pt->future, 0/*table consistent*/); |
| if (DYNAMO_OPTION(shared_bbs)) |
| hashtable_fragment_study(dcontext, shared_bb, 0/*table consistent*/); |
| if (DYNAMO_OPTION(shared_traces)) |
| hashtable_fragment_study(dcontext, shared_trace, 0/*table consistent*/); |
| if (SHARED_FRAGMENTS_ENABLED()) |
| hashtable_fragment_study(dcontext, shared_future, 0/*table consistent*/); |
| # ifdef RETURN_AFTER_CALL |
| if (dynamo_options.ret_after_call && rac_non_module_table.live_table != NULL) { |
| hashtable_app_pc_study(dcontext, rac_non_module_table.live_table, |
| 0/*table consistent*/); |
| } |
| # endif |
| # if defined(RCT_IND_BRANCH) && defined(UNIX) |
| if ((TEST(OPTION_ENABLED, DYNAMO_OPTION(rct_ind_call)) || |
| TEST(OPTION_ENABLED, DYNAMO_OPTION(rct_ind_jump))) && |
| rct_global_table.live_table != NULL) { |
| hashtable_app_pc_study(dcontext, rct_global_table.live_table, |
| 0/*table consistent*/); |
| } |
| # endif /* RCT_IND_BRANCH */ |
| # if defined(WIN32) && (defined(RETURN_AFTER_CALL) || defined(RCT_IND_BRANCH)) |
| { |
| module_iterator_t *mi = module_iterator_start(); |
| uint i; |
| rct_module_table_t *permod; |
| while (module_iterator_hasnext(mi)) { |
| module_area_t *data = module_iterator_next(mi); |
| for (i = 0; i < RCT_NUM_TYPES; i++) { |
| permod = os_module_get_rct_htable(data->start, i); |
| ASSERT(permod != NULL); |
| if (permod->persisted_table != NULL) { |
| LOG(THREAD, LOG_FRAGMENT, 2, |
| "%s persisted hashtable for %s "PFX"-"PFX"\n", |
| i == RCT_RAC ? "RAC" : "RCT", |
| GET_MODULE_NAME(&data->names), data->start, data->end); |
| hashtable_app_pc_study(dcontext, permod->persisted_table, |
| 0/*table consistent*/); |
| } |
| if (permod->live_table != NULL) { |
| LOG(THREAD, LOG_FRAGMENT, 2, |
| "%s live hashtable for %s "PFX"-"PFX"\n", |
| i == RCT_RAC ? "RAC" : "RCT", |
| GET_MODULE_NAME(&data->names), data->start, data->end); |
| hashtable_app_pc_study(dcontext, permod->live_table, |
| 0/*table consistent*/); |
| } |
| } |
| } |
| module_iterator_stop(mi); |
| } |
| # endif /* defined(RETURN_AFTER_CALL) || defined(RCT_IND_BRANCH) */ |
| } |
| #endif /* DEBUG */ |
| |
| /**************************************************************************** |
| * FLUSHING |
| * |
| * Two-stage freeing of fragments via immediate unlinking followed by lazy |
| * deletion. |
| */ |
| |
| uint |
| get_flushtime_last_update(dcontext_t *dcontext) |
| { |
| per_thread_t *pt = (per_thread_t *) dcontext->fragment_field; |
| return pt->flushtime_last_update; |
| } |
| |
| void |
| set_flushtime_last_update(dcontext_t *dcontext, uint val) |
| { |
| per_thread_t *pt = (per_thread_t *) dcontext->fragment_field; |
| pt->flushtime_last_update = val; |
| } |
| |
| void |
| set_at_syscall(dcontext_t *dcontext, bool val) |
| { |
| ASSERT(dcontext != GLOBAL_DCONTEXT); |
| dcontext->upcontext_ptr->at_syscall = val; |
| } |
| |
| bool |
| get_at_syscall(dcontext_t *dcontext) |
| { |
| ASSERT(dcontext != GLOBAL_DCONTEXT); |
| return dcontext->upcontext_ptr->at_syscall; |
| } |
| |
| /* Assumes caller takes care of synchronization. |
| * Returns false iff was_I_flushed ends up being deleted right now from |
| * a private cache OR was_I_flushed has been flushed from a shared cache |
| * and is pending final deletion. |
| */ |
| static bool |
| check_flush_queue(dcontext_t *dcontext, fragment_t *was_I_flushed) |
| { |
| per_thread_t *pt = (per_thread_t *) dcontext->fragment_field; |
| bool not_flushed = true; |
| ASSERT_OWN_MUTEX(true, &pt->linking_lock); |
| /* first check private queue and act on pending deletions */ |
| if (pt->flush_queue_nonempty) { |
| bool local_prot = local_heap_protected(dcontext); |
| if (local_prot) |
| SELF_PROTECT_LOCAL(dcontext, WRITABLE); |
| /* remove local vm areas on t->Q queue and all frags in their lists */ |
| not_flushed = not_flushed && |
| vm_area_flush_fragments(dcontext, was_I_flushed); |
| pt->flush_queue_nonempty = false; |
| LOG(THREAD, LOG_FRAGMENT, 2, "Hashtable state after flushing the queue:\n"); |
| DOLOG(2, LOG_FRAGMENT, { |
| study_all_hashtables(dcontext); |
| }); |
| if (local_prot) |
| SELF_PROTECT_LOCAL(dcontext, READONLY); |
| } |
| /* now check shared queue to dec ref counts */ |
| if (DYNAMO_OPTION(shared_deletion) && |
| /* No lock needed: any racy incs to global are in safe direction, and our inc |
| * is atomic so we shouldn't see any partial-word-updated values here. This |
| * check is our shared deletion algorithm's only perf hit when there's no |
| * actual shared flushing. |
| */ |
| pt->flushtime_last_update < flushtime_global) { |
| /* dec ref count on any pending shared areas */ |
| not_flushed = not_flushed && |
| vm_area_check_shared_pending(dcontext, was_I_flushed); |
| /* Remove unlinked markers if called for. |
| * FIXME If a thread's flushtime is updated due to shared syscall sync, |
| * its tables won't be rehashed here -- the thread's flushtime will be |
| * equal to the global flushtime so the 'if' isn't entered. We have |
| * multiple options as to other points for rehashing -- a table add, a |
| * table delete, any entry into DR. For now, we've chosen a table add |
| * (see check_table_size()). |
| */ |
| if (SHARED_IB_TARGETS() && |
| (INTERNAL_OPTION(rehash_unlinked_threshold) < 100 || |
| INTERNAL_OPTION(rehash_unlinked_always))) { |
| |
| ibl_branch_type_t branch_type; |
| |
| for (branch_type = IBL_BRANCH_TYPE_START; |
| branch_type < IBL_BRANCH_TYPE_END; branch_type++) { |
| |
| ibl_table_t *table = &pt->bb_ibt[branch_type]; |
| |
| if (table->unlinked_entries > 0 && |
| (INTERNAL_OPTION(rehash_unlinked_threshold) < |
| (100 * table->unlinked_entries / |
| (table->unlinked_entries + table->entries)) || |
| INTERNAL_OPTION(rehash_unlinked_always))) { |
| STATS_INC(num_ibt_table_rehashes); |
| LOG(THREAD, LOG_FRAGMENT, 1, |
| "Rehash table %s: linked %u, unlinked %u\n", |
| table->name, table->entries, table->unlinked_entries); |
| hashtable_ibl_unlinked_remove(dcontext, table); |
| } |
| } |
| } |
| } |
| |
| /* FIXME This is the ideal location for inserting refcounting logic |
| * for freeing a resized shared IBT table, as is done for shared |
| * deletion above. |
| */ |
| |
| return not_flushed; |
| } |
| |
| /* Note that an all-threads-synch flush does NOT set the self-flushing flag, |
| * so use is_self_allsynch_flushing() instead. |
| */ |
| bool |
| is_self_flushing() |
| { |
| /* race condition w/ flusher being updated -- but since only testing vs self, |
| * if flusher update is atomic, should be safe |
| */ |
| return (get_thread_private_dcontext() == flusher); |
| } |
| |
| bool |
| is_self_allsynch_flushing() |
| { |
| /* race condition w/ allsynch_flusher being updated -- but since only testing |
| * vs self, if flusher update is atomic, should be safe |
| */ |
| return (allsynch_flusher != NULL && |
| get_thread_private_dcontext() == allsynch_flusher); |
| } |
| |
| /* N.B.: only accurate if called on self (else a race condition) */ |
| bool |
| is_self_couldbelinking() |
| { |
| dcontext_t *dcontext = get_thread_private_dcontext(); |
| /* if no dcontext yet then can't be couldbelinking */ |
| return (dcontext != NULL && |
| !RUNNING_WITHOUT_CODE_CACHE() /*case 7966: has no pt*/ && |
| is_couldbelinking(dcontext)); |
| } |
| |
| /* N.B.: can only call if target thread is self, suspended, or waiting for flush */ |
| bool |
| is_couldbelinking(dcontext_t *dcontext) |
| { |
| per_thread_t *pt = (per_thread_t *) dcontext->fragment_field; |
| /* the lock is only needed for self writing or another thread reading |
| * but if different thread we require thread is suspended or waiting for |
| * flush so is ok */ |
| /* FIXME : add an assert that the thread that owns the dcontext is either |
| * the caller, at the flush sync wait, or is suspended by thread_synch |
| * routines */ |
| return (!RUNNING_WITHOUT_CODE_CACHE() /*case 7966: has no pt*/ && |
| pt != NULL/*PR 536058: no pt*/ && |
| pt->could_be_linking); |
| } |
| |
| static void |
| wait_for_flusher_nolinking(dcontext_t *dcontext) |
| { |
| /* FIXME: can have livelock w/ these types of synch loops, |
| * any way to work into deadlock-avoidance? |
| */ |
| per_thread_t *pt = (per_thread_t *) dcontext->fragment_field; |
| ASSERT(!pt->could_be_linking); |
| while (pt->wait_for_unlink) { |
| LOG(THREAD, LOG_DISPATCH|LOG_THREADS, 2, |
| "Thread "TIDFMT" waiting for flush (flusher is %d @flushtime %d)\n", |
| /* safe to deref flusher since flusher is waiting for our signal */ |
| dcontext->owning_thread, flusher->owning_thread, flushtime_global); |
| mutex_unlock(&pt->linking_lock); |
| STATS_INC(num_wait_flush); |
| wait_for_event(pt->finished_all_unlink); |
| LOG(THREAD, LOG_DISPATCH|LOG_THREADS, 2, |
| "Thread "TIDFMT" resuming after flush\n", dcontext->owning_thread); |
| mutex_lock(&pt->linking_lock); |
| } |
| } |
| |
| static void |
| wait_for_flusher_linking(dcontext_t *dcontext) |
| { |
| /* FIXME: can have livelock w/ these types of synch loops, |
| * any way to work into deadlock-avoidance? |
| */ |
| per_thread_t *pt = (per_thread_t *) dcontext->fragment_field; |
| ASSERT(pt->could_be_linking); |
| while (pt->wait_for_unlink) { |
| LOG(THREAD, LOG_DISPATCH|LOG_THREADS, 2, |
| "Thread "TIDFMT" waiting for flush (flusher is %d @flushtime %d)\n", |
| /* safe to deref flusher since flusher is waiting for our signal */ |
| dcontext->owning_thread, flusher->owning_thread, flushtime_global); |
| mutex_unlock(&pt->linking_lock); |
| signal_event(pt->waiting_for_unlink); |
| STATS_INC(num_wait_flush); |
| wait_for_event(pt->finished_with_unlink); |
| LOG(THREAD, LOG_DISPATCH|LOG_THREADS, 2, |
| "Thread "TIDFMT" resuming after flush\n", dcontext->owning_thread); |
| mutex_lock(&pt->linking_lock); |
| } |
| } |
| |
| #ifdef DEBUG |
| static void |
| check_safe_for_flush_synch(dcontext_t *dcontext) |
| { |
| /* We cannot hold any locks at synch points that wait for flushers, as we |
| * could prevent forward progress of a couldbelinking thread that the |
| * flusher will wait for. |
| */ |
| /* FIXME: will fail w/ -single_thread_in_DR, along w/ the other similar |
| * asserts for flushing and cache entering |
| */ |
| # ifdef DEADLOCK_AVOIDANCE |
| ASSERT(thread_owns_no_locks(dcontext) || |
| /* if thread exits while a trace is in progress (case 8055) we're |
| * holding thread_initexit_lock, which prevents any flusher from |
| * proceeding and hitting a deadlock point, so we should be safe |
| */ |
| thread_owns_one_lock(dcontext, &thread_initexit_lock) || |
| /* it is safe to be the all-thread-syncher and hit a flush synch |
| * point, as no flusher can be active (all threads should be suspended |
| * except this thread) |
| */ |
| thread_owns_two_locks(dcontext, &thread_initexit_lock, |
| &all_threads_synch_lock)); |
| # endif /* DEADLOCK_AVOIDANCE */ |
| } |
| #endif /* DEBUG */ |
| |
| #ifdef CLIENT_INTERFACE |
| static void |
| process_client_flush_requests(dcontext_t *dcontext, dcontext_t *alloc_dcontext, |
| client_flush_req_t *req, bool flush) |
| { |
| client_flush_req_t *iter = req; |
| while (iter != NULL) { |
| client_flush_req_t *next = iter->next; |
| if (flush) { |
| /* Note that we don't free futures from potentially linked-to region b/c we |
| * don't have lazy linking (xref case 2236) */ |
| /* FIXME - if there's more then one of these would be nice to batch them |
| * especially for the synch all ones. */ |
| if (iter->flush_callback != NULL) { |
| /* FIXME - for implementation simplicity we do a synch-all flush so |
| * that we can inform the client right away, it might be nice to use |
| * the more performant regular flush when possible. */ |
| flush_fragments_from_region(dcontext, iter->start, iter->size, |
| true/*force synchall*/); |
| (*iter->flush_callback)(iter->flush_id); |
| } else { |
| /* do a regular flush */ |
| flush_fragments_from_region(dcontext, iter->start, iter->size, |
| false/*don't force synchall*/); |
| } |
| } |
| HEAP_TYPE_FREE(alloc_dcontext, iter, client_flush_req_t, ACCT_CLIENT, |
| UNPROTECTED); |
| iter = next; |
| } |
| } |
| #endif |
| |
| /* Returns false iff was_I_flushed ends up being deleted |
| * if cache_transition is true, assumes entering the cache now. |
| */ |
| bool |
| enter_nolinking(dcontext_t *dcontext, fragment_t *was_I_flushed, bool cache_transition) |
| { |
| per_thread_t *pt = (per_thread_t *) dcontext->fragment_field; |
| bool not_flushed = true; |
| |
| /*case 7966: has no pt, no flushing either */ |
| if (RUNNING_WITHOUT_CODE_CACHE()) |
| return true; |
| |
| DOCHECK(1, { check_safe_for_flush_synch(dcontext); }); |
| |
| /* FIXME: once we have this working correctly, come up with scheme |
| * that avoids synch in common case |
| */ |
| mutex_lock(&pt->linking_lock); |
| ASSERT(pt->could_be_linking); |
| |
| wait_for_flusher_linking(dcontext); |
| not_flushed = not_flushed && check_flush_queue(dcontext, was_I_flushed); |
| pt->could_be_linking = false; |
| mutex_unlock(&pt->linking_lock); |
| |
| if (!cache_transition) |
| return not_flushed; |
| |
| /* now we act on pending actions that can only be done while nolinking |
| * FIXME: optimization if add more triggers here: use a single |
| * master trigger as a first test to avoid testing all |
| * conditionals every time |
| */ |
| |
| if (reset_pending != 0) { |
| mutex_lock(&reset_pending_lock); |
| if (reset_pending != 0) { |
| uint target = reset_pending; |
| reset_pending = 0; |
| /* fcache_reset_all_caches_proactively() will unlock */ |
| fcache_reset_all_caches_proactively(target); |
| LOG(THREAD, LOG_DISPATCH, 2, |
| "Just reset all caches, next_tag is "PFX"\n", dcontext->next_tag); |
| /* fragment is gone for sure, so return false */ |
| return false; |
| } |
| mutex_unlock(&reset_pending_lock); |
| } |
| |
| /* FIXME: perf opt: make global flag can check w/ making a call, |
| * or at least inline the call |
| */ |
| if (fcache_is_flush_pending(dcontext)) { |
| not_flushed = not_flushed && |
| fcache_flush_pending_units(dcontext, was_I_flushed); |
| } |
| |
| #ifdef UNIX |
| /* i#61/PR 211530: nudges on Linux do not use separate threads */ |
| while (dcontext->nudge_pending != NULL) { |
| /* handle_nudge may not return, so we can't call it w/ inconsistent state */ |
| pending_nudge_t local = *dcontext->nudge_pending; |
| heap_free(dcontext, dcontext->nudge_pending, sizeof(local) HEAPACCT(ACCT_OTHER)); |
| dcontext->nudge_pending = local.next; |
| if (dcontext->interrupted_for_nudge != NULL) { |
| fragment_t *f = dcontext->interrupted_for_nudge; |
| LOG(THREAD, LOG_ASYNCH, 3, "\tre-linking outgoing for interrupted F%d\n", |
| f->id); |
| SHARED_FLAGS_RECURSIVE_LOCK(f->flags, acquire, change_linking_lock); |
| link_fragment_outgoing(dcontext, f, false); |
| SHARED_FLAGS_RECURSIVE_LOCK(f->flags, release, change_linking_lock); |
| if (TEST(FRAG_HAS_SYSCALL, f->flags)) { |
| mangle_syscall_code(dcontext, f, EXIT_CTI_PC(f, dcontext->last_exit), |
| true/*skip exit cti*/); |
| } |
| dcontext->interrupted_for_nudge = NULL; |
| } |
| handle_nudge(dcontext, &local.arg); |
| /* we may have done a reset, so do not enter cache now */ |
| return false; |
| } |
| #endif |
| |
| #ifdef CLIENT_INTERFACE |
| /* Handle flush requests queued via dr_flush_fragments()/dr_delay_flush_region() */ |
| /* thread private list */ |
| process_client_flush_requests(dcontext, dcontext, dcontext->client_data->flush_list, |
| true/*flush*/); |
| dcontext->client_data->flush_list = NULL; |
| /* global list */ |
| if (client_flush_requests != NULL) { /* avoid acquiring lock every cxt switch */ |
| client_flush_req_t *req; |
| mutex_lock(&client_flush_request_lock); |
| req = client_flush_requests; |
| client_flush_requests = NULL; |
| mutex_unlock(&client_flush_request_lock); |
| /* NOTE - we must release the lock before doing the flush. */ |
| process_client_flush_requests(dcontext, GLOBAL_DCONTEXT, req, true/*flush*/); |
| /* FIXME - this is an ugly, yet effective, hack. The problem is there is no |
| * good way to tell currently if we flushed was_I_flushed. Since it could be |
| * gone by now we pretend that it was flushed if we did any flushing at all. |
| * Dispatch should refind the fragment if it wasn't flushed. */ |
| if (req != NULL) |
| not_flushed = false; |
| } |
| #endif |
| |
| return not_flushed; |
| } |
| |
| /* Returns false iff was_I_flushed ends up being deleted */ |
| bool |
| enter_couldbelinking(dcontext_t *dcontext, fragment_t *was_I_flushed, bool cache_transition) |
| { |
| per_thread_t *pt = (per_thread_t *) dcontext->fragment_field; |
| bool not_flushed; |
| |
| /*case 7966: has no pt, no flushing either */ |
| if (RUNNING_WITHOUT_CODE_CACHE()) |
| return true; |
| |
| DOCHECK(1, { check_safe_for_flush_synch(dcontext); }); |
| |
| mutex_lock(&pt->linking_lock); |
| ASSERT(!pt->could_be_linking); |
| /* ensure not still marked at_syscall */ |
| ASSERT(!DYNAMO_OPTION(syscalls_synch_flush) || !get_at_syscall(dcontext)); |
| |
| /* for thread-shared flush and thread-private flush+execareas atomicity, |
| * to avoid non-properly-nested locks (could have flusher hold |
| * pt->linking_lock for the entire flush) we need an additional |
| * synch point here for shared flushing to synch w/ all threads. |
| * I suppose switching from non-nested locks to this loop isn't |
| * nec. helping deadlock avoidance -- can still hang -- but this |
| * should be better performance-wise (only a few writes and a |
| * conditional in the common case here, no extra locks) |
| */ |
| pt->soon_to_be_linking = true; |
| wait_for_flusher_nolinking(dcontext); |
| pt->soon_to_be_linking = false; |
| |
| pt->could_be_linking = true; |
| not_flushed = check_flush_queue(dcontext, was_I_flushed); |
| mutex_unlock(&pt->linking_lock); |
| |
| return not_flushed; |
| } |
| |
| /* NOTE - this routine may be called more then one time for the same exiting thread, |
| * xref case 8047. Any once only reference counting, cleanup, etc. should be done in |
| * fragment_thread_exit(). This routine is just a stripped down version of |
| * enter_nolinking() to keep an exiting thread from deadlocking with flushing. */ |
| void |
| enter_threadexit(dcontext_t *dcontext) |
| { |
| per_thread_t *pt = (per_thread_t *) dcontext->fragment_field; |
| |
| /*case 7966: has no pt, no flushing either */ |
| if (RUNNING_WITHOUT_CODE_CACHE() || pt == NULL/*PR 536058: no pt*/) |
| return; |
| |
| mutex_lock(&pt->linking_lock); |
| /* must dec ref count on shared regions before we die */ |
| check_flush_queue(dcontext, NULL); |
| pt->could_be_linking = false; |
| if (pt->wait_for_unlink) { |
| /* make sure don't get into deadlock w/ flusher */ |
| pt->about_to_exit = true; /* let flusher know can ignore us */ |
| signal_event(pt->waiting_for_unlink); /* wake flusher up */ |
| } |
| mutex_unlock(&pt->linking_lock); |
| } |
| |
| /* caller must hold shared_cache_flush_lock */ |
| void |
| increment_global_flushtime() |
| { |
| ASSERT_OWN_MUTEX(true, &shared_cache_flush_lock); |
| /* reset will turn flushtime_global back to 0, so we schedule one |
| * when we're approaching overflow |
| */ |
| if (flushtime_global == UINT_MAX/2) { |
| ASSERT_NOT_TESTED(); /* FIXME: add -stress_flushtime_global_max */ |
| SYSLOG_INTERNAL_WARNING("flushtime_global approaching UINT_MAX, resetting"); |
| schedule_reset(RESET_ALL); |
| } |
| ASSERT(flushtime_global < UINT_MAX); |
| |
| /* compiler should 4-byte-align so no cache line crossing |
| * (asserted in fragment_init() |
| */ |
| flushtime_global++; |
| LOG(GLOBAL, LOG_VMAREAS, 2, "new flush timestamp: %u\n", |
| flushtime_global); |
| } |
| |
| /* The master flusher routines are split into 3: |
| * stage1) flush_fragments_synch_unlink_priv |
| * stage2) flush_fragments_unlink_shared |
| * stage3) flush_fragments_end_synch |
| * which MUST be used together. They are split to allow the caller to |
| * perform custom operations at certain synchronization points in the |
| * middle of flushing without using callback routines. The stages and |
| * points are: |
| * |
| * stage1: head synch, priv flushee unlink |
| * here caller can produce a custom list of fragments to flush while |
| * all threads are fully synched |
| * stage2: shared flushee unlink |
| * |
| * between stage2 and stage3, a region flush performs additional actions: |
| * region flush: exec area lock |
| * here caller can do custom exec area manipulations |
| * region flush: exec area unlock |
| * |
| * stage3: tail synch |
| * |
| * When doing a region flush with no custom region removals (the default is |
| * removing [base,base+size)), use the routine flush_fragments_and_remove_region(). |
| * When doing a region flush with custom removals, use |
| * flush_fragments_in_region_start() and flush_fragments_in_region_finish(). |
| * When doing a flush with no region removals at all use flush_fragments_from_region(). |
| * |
| * The thread requesting a flush must be !couldbelinking and must not |
| * be holding any locks that are ever grabbed while a thread is |
| * couldbelinking (pretty much all locks), as it must wait for other |
| * threads who are couldbelinking. The thread_initexit_lock is held |
| * from stage 1 to stage 3 (if there are fragments to flush), and |
| * region flushing also holds the executable_areas lock between stage |
| * 2 and stage 3. After stage 1 no thread is couldbelinking() until |
| * the synchronization release in stage 3. |
| * |
| * The general delayed-deletion flushing strategy involves first |
| * freezing the threads via the thread_initexit_lock. |
| * It then walks the thread list and marks all vm areas that overlap |
| * base...base+size as to-be-deleted, along with unlinking all fragments in those |
| * vm areas and unlinking shared_syscall for that thread. |
| * Fragments in the target area are not actually deleted until their owning thread |
| * checks its pending deletion queue, which it does prior to entering the fcache. |
| * Also flushes shared fragments, in a similar manner, with deletion delayed |
| * until all threads are out of the shared cache. |
| */ |
| |
| /* Variables shared between the 3 flush stage routines |
| * flush_fragments_synch_unlink_priv(), |
| * flush_fragments_unlink_shared(), and flush_fragments_end_synch. |
| * As there can only be one flush at a time, we only need one copy, |
| * protected by the thread_initexit_lock. |
| */ |
| /* Not persistent across code cache execution, so unprotected */ |
| DECLARE_NEVERPROT_VAR(static thread_record_t **flush_threads, NULL); |
| DECLARE_NEVERPROT_VAR(static int flush_num_threads, 0); |
| DECLARE_NEVERPROT_VAR(static int pending_delete_threads, 0); |
| DECLARE_NEVERPROT_VAR(static int shared_flushed, 0); |
| DECLARE_NEVERPROT_VAR(static bool flush_synchall, false); |
| #ifdef DEBUG |
| DECLARE_NEVERPROT_VAR(static int num_flushed, 0); |
| DECLARE_NEVERPROT_VAR(static int flush_last_stage, 0); |
| #endif |
| |
| static void |
| flush_fragments_free_futures(app_pc base, size_t size) |
| { |
| int i; |
| dcontext_t *tgt_dcontext; |
| ASSERT_OWN_MUTEX(true, &thread_initexit_lock); |
| ASSERT((allsynch_flusher == NULL && flusher == get_thread_private_dcontext()) || |
| (flusher == NULL && allsynch_flusher == get_thread_private_dcontext())); |
| ASSERT(flush_num_threads > 0); |
| ASSERT(flush_threads != NULL); |
| if (DYNAMO_OPTION(free_unmapped_futures) && !RUNNING_WITHOUT_CODE_CACHE()) { |
| /* We need to free the futures after all fragments have been unlinked, |
| * as unlinking will create new futures |
| */ |
| acquire_recursive_lock(&change_linking_lock); |
| for (i=0; i<flush_num_threads; i++) { |
| tgt_dcontext = flush_threads[i]->dcontext; |
| if (tgt_dcontext != NULL) { |
| fragment_delete_futures_in_region(tgt_dcontext, base, base + size); |
| thcounter_range_remove(tgt_dcontext, base, base + size); |
| } |
| } |
| if (SHARED_FRAGMENTS_ENABLED()) |
| fragment_delete_futures_in_region(GLOBAL_DCONTEXT, base, base + size); |
| release_recursive_lock(&change_linking_lock); |
| } /* else we leak them */ |
| } |
| |
| /* This routine begins a flush that requires full thread synch: currently, |
| * it is used for flushing coarse-grain units and for dr_flush_region() |
| */ |
| static void |
| flush_fragments_synchall_start(dcontext_t *ignored, app_pc base, size_t size, |
| bool exec_invalid) |
| { |
| dcontext_t *my_dcontext = get_thread_private_dcontext(); |
| app_pc exec_start = NULL, exec_end = NULL; |
| bool all_synched = true; |
| int i; |
| const thread_synch_state_t desired_state = |
| THREAD_SYNCH_SUSPENDED_VALID_MCONTEXT_OR_NO_XFER; |
| DEBUG_DECLARE(bool ok;) |
| KSTART(synchall_flush); |
| LOG(GLOBAL, LOG_FRAGMENT, 2, |
| "\nflush_fragments_synchall_start: thread "TIDFMT" suspending all threads\n", |
| get_thread_id()); |
| |
| STATS_INC(flush_synchall); |
| /* suspend all DR-controlled threads at safe locations */ |
| DEBUG_DECLARE(ok =) |
| synch_with_all_threads(desired_state, &flush_threads, &flush_num_threads, |
| THREAD_SYNCH_NO_LOCKS_NO_XFER, |
| /* if we fail to suspend a thread (e.g., for |
| * privilege reasons), ignore it since presumably |
| * the failed thread is some injected thread not |
| * running to-be-flushed code, so we continue |
| * with the flush! |
| */ |
| THREAD_SYNCH_SUSPEND_FAILURE_IGNORE); |
| ASSERT(ok); |
| /* now we own the thread_initexit_lock */ |
| ASSERT(OWN_MUTEX(&all_threads_synch_lock) && OWN_MUTEX(&thread_initexit_lock)); |
| |
| /* We do NOT set flusher as is_self_flushing() is all about |
| * couldbelinking, while our synch is of a different nature. The |
| * trace_abort() is_self_flushing() check is the only worrisome |
| * one: FIXME. |
| */ |
| ASSERT(flusher == NULL); |
| ASSERT(allsynch_flusher == NULL); |
| allsynch_flusher = my_dcontext; |
| flush_synchall = true; |
| ASSERT(flush_last_stage == 0); |
| DODEBUG({ flush_last_stage = 1; }); |
| |
| LOG(GLOBAL, LOG_FRAGMENT, 2, |
| "flush_fragments_synchall_start: walking the threads\n"); |
| /* We rely on coarse fragments not touching more than one vmarea region |
| * for our ibl invalidation. It's |
| * ok to invalidate more than we need to so we don't care if there are |
| * multiple coarse units within this range. We just need the exec areas |
| * bounds that overlap the flush region. |
| */ |
| if (!executable_area_overlap_bounds(base, base+size, &exec_start, &exec_end, |
| 0, true/*doesn't matter w/ 0*/)) { |
| /* caller checks for overlap but lock let go so can get here; go ahead |
| * and do synch per flushing contract. |
| */ |
| exec_start = base; |
| exec_end = base+size; |
| } |
| LOG(GLOBAL, LOG_FRAGMENT, 2, |
| "flush_fragments_synchall_start: from "PFX"-"PFX" => coarse "PFX"-"PFX"\n", |
| base, base+size, exec_start, exec_end); |
| |
| /* FIXME: share some of this code that I duplicated from reset */ |
| for (i = 0; i < flush_num_threads; i++) { |
| dcontext_t *dcontext = flush_threads[i]->dcontext; |
| if (dcontext != NULL) { /* include my_dcontext here */ |
| DEBUG_DECLARE(uint removed;) |
| LOG(GLOBAL, LOG_FRAGMENT, 2, |
| "\tconsidering thread #%d "TIDFMT"\n", i, flush_threads[i]->id); |
| if (dcontext != my_dcontext) { |
| /* must translate BEFORE freeing any memory! */ |
| if (!thread_synch_successful(flush_threads[i])) { |
| /* FIXME case 9480: if we do get here for low-privilege handles |
| * or exceeding our synch try count, best to not move the thread |
| * as we won't get a clean translation. Chances are it is |
| * not in the region being flushed. |
| */ |
| SYSLOG_INTERNAL_ERROR_ONCE("failed to synch with thread during " |
| "synchall flush"); |
| LOG(THREAD, LOG_FRAGMENT|LOG_SYNCH, 2, |
| "failed to synch with thread #%d\n", i); |
| STATS_INC(flush_synchall_fail); |
| all_synched = false; |
| } else if (is_thread_currently_native(flush_threads[i])) { |
| /* native_exec's regain-control point is in our DLL, |
| * and lost-control threads are truly native, so no |
| * state to worry about except for hooks -- and we're |
| * not freeing the interception buffer. |
| */ |
| LOG(GLOBAL, LOG_FRAGMENT, 2, |
| "\tcurrently native so no translation needed\n"); |
| } else if (thread_synch_state_no_xfer(dcontext)) { |
| /* Case 6821: do not translate other synch-all-thread users. |
| * They have no fragment state, so leave alone. |
| * All flush call points are in syscalls or from nudges. |
| * Xref case 8901 on ensuring nudges don't try to return |
| * to the code cache. |
| */ |
| LOG(GLOBAL, LOG_FRAGMENT, 2, |
| "\tat THREAD_SYNCH_NO_LOCKS_NO_XFER so no translation needed\n"); |
| STATS_INC(flush_synchall_races); |
| } else { |
| translate_from_synchall_to_dispatch(flush_threads[i], desired_state); |
| } |
| } |
| if (dcontext == my_dcontext || thread_synch_successful(flush_threads[i])) { |
| last_exit_deleted(dcontext); |
| /* case 7394: need to abort other threads' trace building |
| * since the reset xfer to dispatch will disrupt it. |
| * also, with PR 299808, we now have thread-shared |
| * "undeletable" trace-temp fragments, so we need to abort |
| * all traces. |
| */ |
| if (is_building_trace(dcontext)) { |
| LOG(THREAD, LOG_FRAGMENT, 2, |
| "\tsquashing trace of thread #%d\n", i); |
| trace_abort(dcontext); |
| } |
| } |
| /* Since coarse fragments never cross coarse/non-coarse executable region |
| * bounds, we can bound their bodies by taking |
| * executable_area_distinct_bounds(). This lets us remove by walking the |
| * ibl tables and looking only at tags, rather than walking the htable of |
| * each coarse unit. FIXME: not clear this is a perf win: I arbitrarily |
| * picked it under assumption that ibl tables are relatively small. It |
| * would be a clearer win if we could do the fine fragments this way |
| * also, but fine fragments are not constrained and could be missed using |
| * only a tag-based range remove. |
| */ |
| DEBUG_DECLARE(removed =) |
| fragment_remove_all_ibl_in_region(dcontext, exec_start, exec_end); |
| LOG(THREAD, LOG_FRAGMENT, 2, |
| "\tremoved %d ibl entries in "PFX"-"PFX"\n", |
| removed, exec_start, exec_end); |
| /* Free any fine private fragments in the region */ |
| vm_area_allsynch_flush_fragments(dcontext, dcontext, base, base+size, |
| exec_invalid, all_synched/*ignored*/); |
| if (!SHARED_IBT_TABLES_ENABLED() && SHARED_FRAGMENTS_ENABLED()) { |
| /* Remove shared fine fragments from private ibl tables */ |
| vm_area_allsynch_flush_fragments(dcontext, GLOBAL_DCONTEXT, |
| base, base+size, |
| exec_invalid, all_synched/*ignored*/); |
| } |
| } |
| } |
| /* Removed shared coarse fragments from ibl tables, before freeing any */ |
| if (SHARED_IBT_TABLES_ENABLED() && SHARED_FRAGMENTS_ENABLED()) |
| fragment_remove_all_ibl_in_region(GLOBAL_DCONTEXT, exec_start, exec_end); |
| /* Free coarse units and shared fine fragments, as well as removing shared fine |
| * entries in any shared ibl tables |
| */ |
| if (SHARED_FRAGMENTS_ENABLED()) { |
| vm_area_allsynch_flush_fragments(GLOBAL_DCONTEXT, GLOBAL_DCONTEXT, |
| base, base+size, exec_invalid, all_synched); |
| } |
| } |
| |
| static void |
| flush_fragments_synchall_end(dcontext_t *ignored) |
| { |
| thread_record_t **temp_threads = flush_threads; |
| DEBUG_DECLARE(dcontext_t *my_dcontext = get_thread_private_dcontext();) |
| LOG(GLOBAL, LOG_FRAGMENT, 2, |
| "flush_fragments_synchall_end: resuming all threads\n"); |
| |
| /* We need to clear this before we release the locks. We use a temp var |
| * so we can use end_synch_with_all_threads. |
| */ |
| flush_threads = NULL; |
| ASSERT(flusher == NULL); |
| flush_synchall = false; |
| ASSERT(dynamo_all_threads_synched); |
| ASSERT(allsynch_flusher == my_dcontext); |
| allsynch_flusher = NULL; |
| end_synch_with_all_threads(temp_threads, flush_num_threads, true/*resume*/); |
| KSTOP(synchall_flush); |
| } |
| |
| /* This routine begins a flush of the group of fragments in the memory |
| * region [base, base+size) by synchronizing with each thread and unlinking |
| * all private fragments in the region. |
| * |
| * The exec_invalid parameter must be set to indicate whether the |
| * executable area is being invalidated as well or this is just a capacity |
| * flush (or a flush to change instrumentation). |
| * |
| * If size==0 then no unlinking occurs; however, the full synch is |
| * performed (so the caller should check size if no action is desired on |
| * zero size). |
| * |
| * If size>0 and there is no executable area overlap, then no synch is |
| * performed and false is returned. The caller must acquire the executable |
| * areas lock and re-check the overlap if exec area manipulation is to be |
| * performed. Returns true otherwise. |
| */ |
| bool |
| flush_fragments_synch_unlink_priv(dcontext_t *dcontext, app_pc base, size_t size, |
| /* WARNING: case 8572: the caller owning this lock |
| * is incompatible w/ suspend-the-world flushing! |
| */ |
| bool own_initexit_lock, bool exec_invalid, |
| bool force_synchall _IF_DGCDIAG(app_pc written_pc)) |
| { |
| dcontext_t *tgt_dcontext; |
| per_thread_t *tgt_pt; |
| int i; |
| |
| LOG(THREAD, LOG_FRAGMENT, 2, |
| "FLUSH STAGE 1: synch_unlink_priv(thread "TIDFMT" flushtime %d): "PFX"-"PFX"\n", |
| dcontext->owning_thread, flushtime_global, base, base + size); |
| /* Case 9750: to specify a region of size 0, do not pass in NULL as the base! |
| * Use EMPTY_REGION_{BASE,SIZE} instead. |
| */ |
| ASSERT(base != NULL || size != 0); |
| |
| /* our flushing design requires that flushers are NOT couldbelinking |
| * and are not holding any locks |
| */ |
| ASSERT(!is_self_couldbelinking()); |
| #if defined(DEADLOCK_AVOIDANCE) && defined(DEBUG) |
| if (own_initexit_lock) { |
| /* We can get here while holding the all_threads_synch_lock |
| * for detach (& prob. TerminateProcess(0) with other threads |
| * still active) on XP and 2003 via os_thread_stack_exit() |
| * (for other thread exit) if we have executed off that stack. |
| * Can be seen with Araktest detach on violation using the stack |
| * attack button. */ |
| ASSERT(thread_owns_first_or_both_locks_only(dcontext, |
| &thread_initexit_lock, |
| &all_threads_synch_lock)); |
| } else { |
| ASSERT_OWN_NO_LOCKS(); |
| } |
| #endif |
| ASSERT(dcontext == get_thread_private_dcontext()); |
| |
| /* quick check for overlap first by using read lock and avoiding |
| * thread_initexit_lock: |
| * if no overlap, must hold lock through removal of region |
| * if overlap, ok to release for a bit |
| */ |
| if (size > 0 && !executable_vm_area_executed_from(base, base+size)) { |
| /* Only a curiosity since we can have a race (not holding exec areas lock) */ |
| ASSERT_CURIOSITY((!SHARED_FRAGMENTS_ENABLED() || |
| !thread_vm_area_overlap(GLOBAL_DCONTEXT, base, base+size)) && |
| !thread_vm_area_overlap(dcontext, base, base+size)); |
| return false; |
| } |
| /* Only a curiosity since we can have a race (not holding exec areas lock) */ |
| ASSERT_CURIOSITY(size == 0 || |
| executable_vm_area_overlap(base, base+size, false/*no lock*/)); |
| |
| STATS_INC(num_flushes); |
| |
| if (force_synchall || |
| (size > 0 && executable_vm_area_coarse_overlap(base, base+size))) { |
| /* Coarse units do not support individual unlinking (though prior to |
| * freezing they do, we ignore that) and instead require all-thread-synch |
| * in order to flush. For that we cannot be already holding |
| * thread_initexit_lock! FIXME case 8572: the only caller who does hold it |
| * now is os_thread_stack_exit(). For now relying on that stack not |
| * overlapping w/ any coarse regions. |
| */ |
| ASSERT(!own_initexit_lock); |
| /* The synchall will flush fine as well as coarse so we'll be done */ |
| flush_fragments_synchall_start(dcontext, base, size, exec_invalid); |
| return true; |
| } |
| |
| /* Take a snapshot of the threads in the system. |
| * Grab the thread lock to prevent threads from being created or |
| * exited for the duration of this routine |
| * FIXME -- is that wise? could be a relatively long time! |
| * It's unlikely a new thread will run code in a region being |
| * unmapped...but we would like to prevent threads from exiting |
| * while we're messing with their data. |
| * FIXME: need to special-case every instance where thread_initexit_lock |
| * is grabbed, to avoid deadlocks! we already do for a thread exiting. |
| */ |
| if (!own_initexit_lock) |
| mutex_lock(&thread_initexit_lock); |
| ASSERT_OWN_MUTEX(true, &thread_initexit_lock); |
| flusher = dcontext; |
| get_list_of_threads(&flush_threads, &flush_num_threads); |
| |
| ASSERT(flush_last_stage == 0); |
| DODEBUG({ flush_last_stage = 1; }); |
| |
| /* FIXME: we can optimize this even more to not grab thread_initexit_lock */ |
| if (RUNNING_WITHOUT_CODE_CACHE()) /* case 7966: nothing to flush, ever */ |
| return true; |
| |
| /* Set the ref count of threads who may be using a deleted fragment. We |
| * include ourselves in the ref count as we could be invoked with a cache |
| * return point before any synch (as done w/ hotpatches) and should NOT |
| * consider ourselves done w/ all cache fragments right now. We assume we |
| * will soon hit a real synch point to dec the count for us, and don't try |
| * to specially handle the single-threaded case here. |
| */ |
| pending_delete_threads = flush_num_threads; |
| |
| DODEBUG({ num_flushed = 0; }); |
| |
| #ifdef WINDOWS |
| /* Make sure exit fcache if currently inside syscall. For thread-shared |
| * shared_syscall, we re-link after the shared fragments have all been |
| * unlinked and removed from the ibl tables. All we lose is the bounded time |
| * on a thread checking its private flush queue, which in a thread-shared |
| * configuration doesn't seem so bad. For thread-private this will work as |
| * well, with the same time bound lost, but we're not as worried about memory |
| * usage of thread-private configurations. |
| */ |
| if (DYNAMO_OPTION(shared_syscalls) && IS_SHARED_SYSCALL_THREAD_SHARED) |
| unlink_shared_syscall(GLOBAL_DCONTEXT); |
| #endif |
| |
| /* i#849: unlink while we clear out ibt */ |
| if (!special_ibl_xfer_is_thread_private()) |
| unlink_special_ibl_xfer(GLOBAL_DCONTEXT); |
| |
| for (i=0; i<flush_num_threads; i++) { |
| tgt_dcontext = flush_threads[i]->dcontext; |
| tgt_pt = (per_thread_t *) tgt_dcontext->fragment_field; |
| LOG(THREAD, LOG_FRAGMENT, 2, |
| " considering thread #%d/%d = "TIDFMT"\n", i+1, flush_num_threads, |
| flush_threads[i]->id); |
| ASSERT(is_thread_known(tgt_dcontext->owning_thread)); |
| |
| /* can't do anything, even check if thread has any vm areas overlapping flush |
| * region, until sure thread is in fcache or somewhere that won't change |
| * vm areas or linking state |
| */ |
| mutex_lock(&tgt_pt->linking_lock); |
| /* Must explicitly check for self and avoid synch then, o/w will lock up |
| * if ever called from a could_be_linking location (currently only |
| * happens w/ app syscalls) |
| */ |
| if (tgt_dcontext != dcontext && tgt_pt->could_be_linking) { |
| /* remember we have a global lock, thread_initexit_lock, so two threads |
| * cannot be here at the same time! |
| */ |
| LOG(THREAD, LOG_FRAGMENT, 2, |
| "\twaiting for thread "TIDFMT"\n", tgt_dcontext->owning_thread); |
| tgt_pt->wait_for_unlink = true; |
| mutex_unlock(&tgt_pt->linking_lock); |
| wait_for_event(tgt_pt->waiting_for_unlink); |
| mutex_lock(&tgt_pt->linking_lock); |
| tgt_pt->wait_for_unlink = false; |
| LOG(THREAD, LOG_FRAGMENT, 2, |
| "\tdone waiting for thread "TIDFMT"\n", tgt_dcontext->owning_thread); |
| } else { |
| LOG(THREAD, LOG_FRAGMENT, 2, |
| "\tthread "TIDFMT" synch not required\n", tgt_dcontext->owning_thread); |
| } |
| |
| /* it is now safe to access link, vm, and trace info in tgt_dcontext |
| * => FIXME: rename, since not just accessing linking info? |
| * FIXME: if includes vm access, are syscalls now bad? |
| * what about handle_modified_code, considered nolinking since straight |
| * from cache via fault handler? reading should be ok, but exec area splits |
| * and whatnot? |
| */ |
| |
| if (tgt_pt->about_to_exit) { |
| /* thread is about to exit, it's waiting for us to give up |
| * thread_initexit_lock -- we don't need to flush it |
| */ |
| goto next_thread; |
| } |
| |
| /* if a trace-in-progress crosses this region, must squash the trace |
| * (all traces are essentially frozen now since threads stop in dispatch) |
| */ |
| if (size > 0 /* else, no region to cross */ && |
| is_building_trace(tgt_dcontext)) { |
| void *trace_vmlist = cur_trace_vmlist(tgt_dcontext); |
| if (trace_vmlist != NULL && |
| vm_list_overlaps(tgt_dcontext, trace_vmlist, base, base+size)) { |
| LOG(THREAD, LOG_FRAGMENT, 2, |
| "\tsquashing trace of thread "TIDFMT"\n", tgt_dcontext->owning_thread); |
| trace_abort(tgt_dcontext); |
| } |
| } |
| |
| #ifdef WINDOWS |
| /* Make sure exit fcache if currently inside syscall. If thread has no |
| * vm area overlap, will be re-linked down below before going to the |
| * next thread, unless we have shared fragments, in which case we force |
| * the thread to check_flush_queue() on its next synch point. It will |
| * re-link in vm_area_flush_fragments(). |
| */ |
| if (DYNAMO_OPTION(shared_syscalls) && !IS_SHARED_SYSCALL_THREAD_SHARED) |
| unlink_shared_syscall(tgt_dcontext); |
| #endif |
| /* i#849: unlink while we clear out ibt */ |
| if (special_ibl_xfer_is_thread_private()) |
| unlink_special_ibl_xfer(tgt_dcontext); |
| |
| /* Optimization for shared deletion strategy: perform flush work |
| * for a thread waiting at a system call on behalf of that thread |
| * (which we do before the flush tail synch, as if we did it now we |
| * would need to inc flushtime_global up front, and then we'd have synch |
| * issues trying to prevent thread we hadn't synched with from checking |
| * the pending list before we put flushed fragments on it). |
| * We do count these threads in the ref count as we check the pending |
| * queue on their behalf after adding the newly flushed fragments to |
| * the queue, so the ref count gets decremented right away. |
| * |
| * We must do this AFTER unlinking shared_syscall's post-syscall ibl, to |
| * avoid races -- the thread will hit a real synch point before accessing |
| * any fragments or link info. |
| * Do this BEFORE checking whether fragments in region to catch all threads. |
| */ |
| ASSERT(!tgt_pt->at_syscall_at_flush); |
| if (DYNAMO_OPTION(syscalls_synch_flush) && get_at_syscall(tgt_dcontext)) { |
| /* we have to know exactly which threads were at_syscall here when |
| * we get to post-flush, so we cache in this special bool |
| */ |
| DEBUG_DECLARE(bool tables_updated;) |
| tgt_pt->at_syscall_at_flush = true; |
| #ifdef DEBUG |
| tables_updated = |
| #endif |
| update_all_private_ibt_table_ptrs(tgt_dcontext, tgt_pt); |
| STATS_INC(num_shared_flush_atsyscall); |
| DODEBUG({ |
| if (tables_updated) |
| STATS_INC(num_shared_tables_updated_atsyscall); |
| }); |
| } |
| |
| /* don't need to go any further if thread has no frags in region */ |
| if (size == 0 || !thread_vm_area_overlap(tgt_dcontext, base, base+size)) { |
| LOG(THREAD, LOG_FRAGMENT, 2, |
| "\tthread "TIDFMT" has no fragments in region to flush\n", |
| tgt_dcontext->owning_thread); |
| #ifdef WINDOWS |
| /* restore, since won't be restored in vm_area_flush_fragments */ |
| if (DYNAMO_OPTION(shared_syscalls)) { |
| if (SHARED_FRAGMENTS_ENABLED()) { |
| /* we cannot re-link shared_syscall here as that would allow |
| * the target thread to enter to-be-flushed fragments prior |
| * to their being unlinked and removed from ibl tables -- so |
| * we force this thread to re-link in check_flush_queue. |
| * we could re-link after unlink/removal of fragments, |
| * if it's worth optimizing == case 6194/PR 210655. |
| */ |
| tgt_pt->flush_queue_nonempty = true; |
| STATS_INC(num_flushq_relink_syscall); |
| } else if (!IS_SHARED_SYSCALL_THREAD_SHARED) { |
| /* no shared fragments, and no private ones being flushed, |
| * so this thread is all set |
| */ |
| link_shared_syscall(tgt_dcontext); |
| } |
| } |
| #endif |
| if (special_ibl_xfer_is_thread_private()) { |
| if (SHARED_FRAGMENTS_ENABLED()) { |
| /* see shared_syscall relink comment: we have to delay the relink */ |
| tgt_pt->flush_queue_nonempty = true; |
| STATS_INC(num_flushq_relink_special_ibl_xfer); |
| } else |
| link_special_ibl_xfer(dcontext); |
| } |
| goto next_thread; |
| } |
| |
| LOG(THREAD, LOG_FRAGMENT, 2, "\tflushing fragments for thread "TIDFMT"\n", |
| flush_threads[i]->id); |
| DOLOG(2, LOG_FRAGMENT, { |
| if (tgt_dcontext != dcontext) { |
| LOG(tgt_dcontext->logfile, LOG_FRAGMENT, 2, |
| "thread "TIDFMT" is flushing our fragments\n", |
| dcontext->owning_thread); |
| } |
| }); |
| |
| if (size > 0) { |
| /* unlink all frags in overlapping regions, and mark regions for deletion */ |
| tgt_pt->flush_queue_nonempty = true; |
| #ifdef DEBUG |
| num_flushed += |
| #endif |
| vm_area_unlink_fragments(tgt_dcontext, base, base + size, 0 |
| _IF_DGCDIAG(written_pc)); |
| } |
| |
| next_thread: |
| /* for thread-shared, we CANNOT let any thread become could_be_linking, for normal |
| * flushing synch -- for thread-private, we can, but we CANNOT let any thread |
| * that we've already synched with for flushing go and change the exec areas |
| * vector! the simplest solution is to have thread-private, like thread-shared, |
| * stop all threads at a cache exit point. |
| * FIXME: optimize thread-private by allowing already-synched threads to |
| * continue but not grab executable_areas lock, while letting |
| * to-be-synched threads grab the lock (otherwise could wait forever) |
| */ |
| /* Since we must let go of lock for proper nesting, we use a new |
| * synch to stop threads at cache exit, since we need them all |
| * out of DR for duration of shared flush. |
| */ |
| if (tgt_dcontext != dcontext && !tgt_pt->could_be_linking) |
| tgt_pt->wait_for_unlink = true; /* stop at cache exit */ |
| mutex_unlock(&tgt_pt->linking_lock); |
| } |
| |
| return true; |
| } |
| |
| /* This routine continues a flush of one of two groups of fragments: |
| * 1) if list!=NULL, the list of shared fragments beginning at list and |
| * chained by next_vmarea (we assume that private fragments are |
| * easily deleted by the owning thread and do not require a flush). |
| * Caller should call vm_area_remove_fragment() for each target fragment, |
| * building a custom list chained by next_vmarea, and pass that list to this |
| * routine. |
| * 2) otherwise, all fragments in the memory region [base, base+size). |
| * |
| * This routine MUST be called after flush_fragments_synch_unlink_priv(), and |
| * must be followed with flush_fragments_end_synch(). |
| */ |
| void |
| flush_fragments_unlink_shared(dcontext_t *dcontext, app_pc base, size_t size, |
| fragment_t *list _IF_DGCDIAG(app_pc written_pc)) |
| { |
| /* we would assert that size > 0 || list != NULL but we have to pass |
| * in 0 and NULL for unit flushing when no live fragments are in the unit |
| */ |
| |
| LOG(THREAD, LOG_FRAGMENT, 2, |
| "FLUSH STAGE 2: unlink_shared(thread "TIDFMT"): flusher is "TIDFMT"\n", |
| dcontext->owning_thread, (flusher == NULL) ? -1 : flusher->owning_thread); |
| ASSERT_OWN_MUTEX(true, &thread_initexit_lock); |
| ASSERT(flush_threads != NULL); |
| ASSERT(flush_num_threads > 0); |
| ASSERT(flush_last_stage == 1); |
| DODEBUG({ flush_last_stage = 2; }); |
| |
| if (RUNNING_WITHOUT_CODE_CACHE()) /* case 7966: nothing to flush, ever */ |
| return; |
| if (flush_synchall) /* no more flush work to do */ |
| return; |
| |
| if (SHARED_FRAGMENTS_ENABLED()) { |
| /* Flushing shared fragments: the strategy is again immediate |
| * unlinking (plus hashtable removal and vm area list removal) |
| * with delayed deletion. Unlinking is atomic, and hashtable |
| * removal is for now since there are no shared bbs in ibls from |
| * the cache. But deletion needs to ensure that every thread who may |
| * hold a pointer to a shared fragment or may be inside a shared |
| * fragment is ok with the fragment being deleted. We use a reference count |
| * for each flushed region to ensure that every thread has reached a synch |
| * point before we actually free the fragments in the region. |
| * |
| * We do have pathological cases where a single thread at a wait syscall or |
| * an infinite loop in the cache will prevent all freeing. One solution |
| * to the syscall problem is to store a flag saying that a thread is at |
| * the shared_syscall routine. That routine is private and unlinked so |
| * we know the thread will hit a synch routine if it exits, so no race. |
| * We then wouldn't bother to inc the ref count for that thread. |
| * |
| * Our fallback proactive deletion is to do a full reset when the size |
| * of the pending deletion list gets too long, though we could be more |
| * efficient by only resetting the pending list, and by only suspending |
| * those threads that haven't dec-ed a ref count in a particular region. |
| */ |
| LOG(THREAD, LOG_FRAGMENT, 2, " flushing shared fragments\n"); |
| if (DYNAMO_OPTION(shared_deletion)) { |
| /* We use shared_cache_flush_lock to make atomic the increment of |
| * flushtime_global and the adding of pending deletion fragments with |
| * that flushtime, wrt other threads checking the pending list. |
| */ |
| mutex_lock(&shared_cache_flush_lock); |
| } |
| /* Increment flush count for shared deletion algorithm and for list-based |
| * flushing (such as for shared cache units). We could wait |
| * and only do this if we actually flush shared fragments, but it's |
| * simpler when done in this routine, and nearly every flush does flush |
| * shared fragments. |
| */ |
| increment_global_flushtime(); |
| /* Both vm_area_unlink_fragments and unlink_fragments_for_deletion call |
| * back to flush_invalidate_ibl_shared_target to remove shared |
| * fragments from private/shared ibl tables |
| */ |
| if (list == NULL) { |
| shared_flushed = |
| vm_area_unlink_fragments(GLOBAL_DCONTEXT, base, base + size, |
| pending_delete_threads _IF_DGCDIAG(written_pc)); |
| } else { |
| shared_flushed = unlink_fragments_for_deletion(GLOBAL_DCONTEXT, list, |
| pending_delete_threads); |
| } |
| if (DYNAMO_OPTION(shared_deletion)) |
| mutex_unlock(&shared_cache_flush_lock); |
| |
| DODEBUG({ |
| num_flushed += shared_flushed; |
| if (shared_flushed > 0) |
| STATS_INC(num_shared_flushes); |
| }); |
| } |
| |
| #ifdef WINDOWS |
| /* Re-link thread-shared shared_syscall */ |
| if (DYNAMO_OPTION(shared_syscalls) && IS_SHARED_SYSCALL_THREAD_SHARED) |
| link_shared_syscall(GLOBAL_DCONTEXT); |
| #endif |
| |
| if (!special_ibl_xfer_is_thread_private()) |
| link_special_ibl_xfer(GLOBAL_DCONTEXT); |
| |
| STATS_ADD(num_flushed_fragments, num_flushed); |
| DODEBUG({ |
| if (num_flushed > 0) { |
| LOG(THREAD, LOG_FRAGMENT, 1, "Flushed %5d fragments from "PFX"-"PFX"\n", |
| num_flushed, base, ((char *)base)+size); |
| } else { |
| STATS_INC(num_empty_flushes); |
| LOG(THREAD, LOG_FRAGMENT, 2, "Flushed 0 fragments from "PFX"-"PFX"\n", |
| base, ((char *)base)+size); |
| } |
| }); |
| } |
| |
| /* Invalidates (does not remove) shared fragment f from the private/shared |
| * ibl tables. Can only be called in flush stage 2. |
| */ |
| void |
| flush_invalidate_ibl_shared_target(dcontext_t *dcontext, fragment_t *f) |
| { |
| ASSERT(is_self_flushing()); |
| ASSERT(!flush_synchall); |
| ASSERT_OWN_MUTEX(true, &thread_initexit_lock); |
| ASSERT(flush_threads != NULL); |
| ASSERT(flush_num_threads > 0); |
| ASSERT(flush_last_stage == 2); |
| ASSERT(TEST(FRAG_SHARED, f->flags)); |
| /*case 7966: has no pt, no flushing either */ |
| if (RUNNING_WITHOUT_CODE_CACHE()) { |
| ASSERT_NOT_REACHED(); /* shouldn't get here */ |
| return; |
| } |
| if (!SHARED_IB_TARGETS()) |
| return; |
| if (SHARED_IBT_TABLES_ENABLED()) { |
| /* Remove from shared ibl tables. |
| * dcontext need not be GLOBAL_DCONTEXT. |
| */ |
| fragment_prepare_for_removal(dcontext, f); |
| } else { |
| /* We can't tell afterward which entries to remove from the private |
| * ibl tables, as we no longer keep fragment_t* pointers (we used to |
| * look for FRAG_WAS_DELETED) and we can't do a range remove (tags |
| * don't map regularly to regions). So we must invalidate each |
| * fragment as we process it. It's ok to walk the thread list |
| * here since we're post-synch for all threads. |
| */ |
| int i; |
| for (i=0; i<flush_num_threads; i++) { |
| fragment_prepare_for_removal(flush_threads[i]->dcontext, f); |
| } |
| } |
| } |
| |
| /* Must ONLY be called as the third part of flushing (after |
| * flush_fragments_synch_unlink_priv() and flush_fragments_unlink_shared()). |
| * Uses the static shared variables flush_threads and flush_num_threads. |
| */ |
| void |
| flush_fragments_end_synch(dcontext_t *dcontext, bool keep_initexit_lock) |
| { |
| dcontext_t *tgt_dcontext; |
| per_thread_t *tgt_pt; |
| int i; |
| |
| LOG(THREAD, LOG_FRAGMENT, 2, |
| "FLUSH STAGE 3: end_synch(thread "TIDFMT"): flusher is "TIDFMT"\n", |
| dcontext->owning_thread, (flusher == NULL) ? -1 : flusher->owning_thread); |
| |
| if (!is_self_flushing() && !flush_synchall/* doesn't set flusher */) { |
| LOG(THREAD, LOG_FRAGMENT, 2, "\tnothing was flushed\n"); |
| ASSERT_DO_NOT_OWN_MUTEX(!keep_initexit_lock, &thread_initexit_lock); |
| ASSERT_OWN_MUTEX(keep_initexit_lock, &thread_initexit_lock); |
| return; |
| } |
| |
| ASSERT_OWN_MUTEX(true, &thread_initexit_lock); |
| |
| ASSERT(flush_threads != NULL); |
| ASSERT(flush_num_threads > 0); |
| ASSERT(flush_last_stage == 2); |
| DODEBUG({ flush_last_stage = 0; }); |
| |
| if (flush_synchall) { |
| flush_fragments_synchall_end(dcontext); |
| return; |
| } |
| |
| /* now can let all threads at DR synch point go |
| * FIXME: if implement thread-private optimization above, this would turn into |
| * re-setting exec areas lock to treat all threads uniformly |
| */ |
| for (i=flush_num_threads-1; i>=0; i--) { |
| /*case 7966: has no pt, no flushing either */ |
| if (RUNNING_WITHOUT_CODE_CACHE()) |
| continue; |
| |
| tgt_dcontext = flush_threads[i]->dcontext; |
| tgt_pt = (per_thread_t *) tgt_dcontext->fragment_field; |
| /* re-acquire lock */ |
| mutex_lock(&tgt_pt->linking_lock); |
| |
| /* Optimization for shared deletion strategy: perform flush work |
| * for a thread waiting at a system call, as we didn't add it to the |
| * ref count in the pre-flush synch. |
| * We assume that shared_syscall is still unlinked at this point and |
| * will not be relinked until we let the thread go. |
| */ |
| if (DYNAMO_OPTION(syscalls_synch_flush) && tgt_pt->at_syscall_at_flush) { |
| /* Act on behalf of the thread as though it's at a synch point, but |
| * only wrt shared fragments (we don't free private fragments here, |
| * though we could -- should we? may make flush time while holding |
| * lock take too long? FIXME) |
| * Currently this works w/ syscalls from dispatch, and w/ |
| * -shared_syscalls by using unprotected storage (thus a slight hole |
| * but very hard to exploit for security purposes: can get stale code |
| * executed, but we already have that window, or crash us). |
| * FIXME: Does not work w/ -ignore_syscalls, but those are private |
| * for now. |
| */ |
| DEBUG_DECLARE(uint pre_flushtime = flushtime_global;) |
| vm_area_check_shared_pending(tgt_dcontext, NULL); |
| /* lazy deletion may inc flushtime_global, so may have a higher |
| * value than our cached one, but should never be lower |
| */ |
| ASSERT(tgt_pt->flushtime_last_update >= pre_flushtime); |
| tgt_pt->at_syscall_at_flush = false; |
| } |
| |
| if (tgt_dcontext != dcontext) { |
| if (tgt_pt->could_be_linking) { |
| signal_event(tgt_pt->finished_with_unlink); |
| } else { |
| /* we don't need to wait on a !could_be_linking thread |
| * so we use this bool to tell whether we should signal |
| * the event. |
| * FIXME: really we want a pulse that wakes ALL waiting |
| * threads and then resets the event! |
| */ |
| tgt_pt->wait_for_unlink = false; |
| if (tgt_pt->soon_to_be_linking) |
| signal_event(tgt_pt->finished_all_unlink); |
| } |
| } |
| mutex_unlock(&tgt_pt->linking_lock); |
| } |
| |
| /* thread init/exit can proceed now */ |
| flusher = NULL; |
| global_heap_free(flush_threads, flush_num_threads*sizeof(thread_record_t*) |
| HEAPACCT(ACCT_THREAD_MGT)); |
| flush_threads = NULL; |
| if (!keep_initexit_lock) |
| mutex_unlock(&thread_initexit_lock); |
| } |
| |
| /* This routine performs flush stages 1 and 2 (synch_unlink_priv() |
| * and unlink_shared()) and then returns after grabbing the |
| * executable_areas lock so that removal of this area from the global list |
| * is atomic w/ the flush and local removals, before letting the threads go |
| * -- we return while holding both the thread_initexit_lock and the exec |
| * areas lock |
| * |
| * FIXME: this only helps thread-shared: for thread-private, should let |
| * already-synched threads continue but not grab executable_areas lock, while |
| * letting to-be-synched threads grab the lock. |
| * |
| * returning while holding locks isn't ideal -- but other choices are worse: |
| * 1) grab exec areas lock and then let go of initexit lock -- bad nesting |
| * 2) do all work here, which has to include: |
| * A) remove region (nearly all uses) == flush_fragments_and_remove_region |
| * B) change region from rw to r (splitting if nec) (vmareas.c app rw->r) |
| * C) restore writability and change to selfmod (splitting if nec) |
| * (vmareas.c handle_modified_code) |
| * D) remove region and then look up a certain pc and return whether |
| * flushing overlapped w/ it (vmareas.c handle_modified_code) |
| */ |
| void |
| flush_fragments_in_region_start(dcontext_t *dcontext, app_pc base, size_t size, |
| bool own_initexit_lock, bool free_futures, |
| bool exec_invalid, bool force_synchall |
| _IF_DGCDIAG(app_pc written_pc)) |
| { |
| KSTART(flush_region); |
| while (true) { |
| if (flush_fragments_synch_unlink_priv(dcontext, base, size, own_initexit_lock, |
| exec_invalid, force_synchall |
| _IF_DGCDIAG(written_pc))) { |
| break; |
| } else { |
| /* grab lock and then re-check overlap */ |
| executable_areas_lock(); |
| if (!executable_vm_area_executed_from(base, base+size)) { |
| LOG(THREAD, LOG_FRAGMENT, 2, |
| "\tregion not executable, so no fragments to flush\n"); |
| /* caller will release lock! */ |
| STATS_INC(num_noncode_flushes); |
| return; |
| } |
| /* unlock and try again */ |
| executable_areas_unlock(); |
| } |
| } |
| |
| flush_fragments_unlink_shared(dcontext, base, size, NULL |
| _IF_DGCDIAG(written_pc)); |
| |
| /* We need to free the futures after all fragments have been unlinked */ |
| if (free_futures) { |
| flush_fragments_free_futures(base, size); |
| } |
| |
| executable_areas_lock(); |
| } |
| |
| /* must ONLY be called as the second half of flush_fragments_in_region_start(). |
| * uses the shared variables flush_threads and flush_num_threads |
| */ |
| void |
| flush_fragments_in_region_finish(dcontext_t *dcontext, bool keep_initexit_lock) |
| { |
| /* done w/ exec areas lock; also free any non-executed coarse units */ |
| free_nonexec_coarse_and_unlock(); |
| |
| flush_fragments_end_synch(dcontext, keep_initexit_lock); |
| KSTOP(flush_region); |
| } |
| |
| /* flush and remove region from exec list, atomically */ |
| void |
| flush_fragments_and_remove_region(dcontext_t *dcontext, app_pc base, size_t size, |
| bool own_initexit_lock, bool free_futures) |
| { |
| flush_fragments_in_region_start(dcontext, base, size, own_initexit_lock, |
| free_futures, true/*exec invalid*/, |
| false/*don't force synchall*/ _IF_DGCDIAG(NULL)); |
| /* ok to call on non-exec region, so don't need to test return value |
| * both flush routines will return quickly if nothing to flush/was flushed |
| */ |
| remove_executable_region(base, size, true/*have lock*/); |
| flush_fragments_in_region_finish(dcontext, own_initexit_lock); |
| |
| /* verify initexit lock is in the right state */ |
| ASSERT_OWN_MUTEX(own_initexit_lock, &thread_initexit_lock); |
| ASSERT_DO_NOT_OWN_MUTEX(!own_initexit_lock, &thread_initexit_lock); |
| } |
| |
| /* Flushes fragments from the region without any changes to the exec list. |
| * Does not free futures and caller can't be holding the initexit lock. |
| * FIXME - add argument parameters (free futures etc.) as needed. */ |
| void |
| flush_fragments_from_region(dcontext_t *dcontext, app_pc base, size_t size, |
| bool force_synchall) |
| { |
| /* we pass false to flush_fragments_in_region_start() below for owning the initexit |
| * lock */ |
| ASSERT_DO_NOT_OWN_MUTEX(true, &thread_initexit_lock); |
| |
| /* ok to call on non-exec region, so don't need to test return value |
| * both flush routines will return quickly if nothing to flush/was flushed */ |
| flush_fragments_in_region_start(dcontext, base, size, false /*don't own initexit*/, |
| false/*don't free futures*/, false/*exec valid*/, |
| force_synchall _IF_DGCDIAG(NULL)); |
| flush_fragments_in_region_finish(dcontext, false); |
| } |
| |
| /* Invalidate all fragments in all caches. Currently executed |
| * fragments may be alive until they reach an exit. |
| * |
| * Note not necessarily immediately freeing memory like |
| * fcache_reset_all_caches_proactively(). |
| */ |
| void |
| invalidate_code_cache() |
| { |
| dcontext_t *dcontext = get_thread_private_dcontext(); |
| LOG(GLOBAL, LOG_FRAGMENT, 2, "invalidate_code_cache()\n"); |
| flush_fragments_in_region_start(dcontext, UNIVERSAL_REGION_BASE, |
| UNIVERSAL_REGION_SIZE, |
| false, true /* remove futures */, |
| false /*exec valid*/, false /*don't force synchall*/ |
| _IF_DGCDIAG(NULL)); |
| flush_fragments_in_region_finish(dcontext, false); |
| } |
| |
| /* Flushes all areas stored in the vector toflush. |
| * Synchronization of toflush is up to caller, but as locks cannot be |
| * held when flushing, toflush must be thread-private. |
| * Currently only used for pcache hotp interop (case 9970). |
| */ |
| void |
| flush_vmvector_regions(dcontext_t *dcontext, vm_area_vector_t *toflush, |
| bool free_futures, bool exec_invalid) |
| { |
| vmvector_iterator_t vmvi; |
| app_pc start, end; |
| ASSERT(toflush != NULL && !TEST(VECTOR_SHARED, toflush->flags)); |
| ASSERT(!RUNNING_WITHOUT_CODE_CACHE()); |
| ASSERT(DYNAMO_OPTION(coarse_units) && DYNAMO_OPTION(use_persisted) |
| IF_HOTP(&& DYNAMO_OPTION(hot_patching))); |
| if (vmvector_empty(toflush)) |
| return; |
| vmvector_iterator_start(toflush, &vmvi); |
| while (vmvector_iterator_hasnext(&vmvi)) { |
| vmvector_iterator_next(&vmvi, &start, &end); |
| /* FIXME case 10086: optimization: batch the flushes together so we only |
| * synch once. Currently this is rarely used, and with few areas in the |
| * vector, so we don't bother. To batch them we'd need to assume the |
| * worst and do a synchall up front, which could be more costly than 2 |
| * or 3 separate flushes that do not involve coarse code. |
| */ |
| ASSERT_OWN_NO_LOCKS(); |
| flush_fragments_in_region_start(dcontext, start, end - start, |
| false/*no lock*/, free_futures, exec_invalid, |
| false/*don't force synchall*/ _IF_DGCDIAG(NULL)); |
| flush_fragments_in_region_finish(dcontext, false/*no lock*/); |
| STATS_INC(num_flush_vmvector); |
| } |
| vmvector_iterator_stop(&vmvi); |
| } |
| |
| /****************************************************************************/ |
| #if defined(INTERNAL) || defined(CLIENT_INTERFACE) |
| |
| void |
| fragment_output(dcontext_t *dcontext, fragment_t *f) |
| { |
| ASSERT(!TEST(FRAG_SHARED, f->flags) || |
| self_owns_recursive_lock(&change_linking_lock)); |
| if (SHOULD_OUTPUT_FRAGMENT(f->flags)) { |
| per_thread_t *pt = (dcontext == GLOBAL_DCONTEXT) ? shared_pt : |
| (per_thread_t *) dcontext->fragment_field; |
| output_trace(dcontext, pt, f, |
| IF_DEBUG_ELSE(GLOBAL_STAT(num_fragments), |
| GLOBAL_STAT(num_traces)+GLOBAL_STAT(num_bbs))+1); |
| } |
| } |
| |
| void |
| init_trace_file(per_thread_t *pt) |
| { |
| if (INTERNAL_OPTION(tracedump_binary)) { |
| /* first 4 bytes in binary file gives size of linkcounts |
| * 0 if no linkcounts |
| */ |
| tracedump_file_header_t hdr = |
| {CURRENT_API_VERSION, IF_X64_ELSE(true, false), 0 }; |
| #ifdef PROFILE_LINKCOUNT |
| if (dynamo_options.profile_counts) { |
| hdr.linkcount_size = sizeof(linkcount_type_t); |
| ASSERT_NOT_IMPLEMENTED(false == DYNAMO_OPTION(inline_trace_ibl)); |
| /* Cannot use PROFILE_LINKCOUNT with inline_trace_ibl */ |
| } |
| #endif |
| os_write(pt->tracefile, &hdr, sizeof(hdr)); |
| } |
| } |
| |
| void |
| exit_trace_file(per_thread_t *pt) |
| { |
| #ifdef PROFILE_LINKCOUNT |
| if (dynamo_options.tracedump_threshold > 0) { |
| if (INTERNAL_OPTION(tracedump_binary)) { |
| os_write(pt->tracefile, &pt->tracedump_num_below_threshold, |
| sizeof(pt->tracedump_num_below_threshold)); |
| os_write(pt->tracefile, &pt->tracedump_count_below_threshold, |
| sizeof(pt->tracedump_count_below_threshold)); |
| } else { |
| print_file(pt->tracefile, "\nTraces below dump threshold of %d: %d\n", |
| dynamo_options.tracedump_threshold, pt->tracedump_num_below_threshold); |
| print_file(pt->tracefile, "Total count below dump threshold: " |
| LINKCOUNT_FORMAT_STRING"\n", pt->tracedump_count_below_threshold); |
| } |
| } |
| #endif |
| close_log_file(pt->tracefile); |
| } |
| |
| /* Binary trace dump is used to save time and space. |
| * The format is in fragment.h. |
| * FIXME: add general symbol table support to disassembly? |
| * We'd dump num_targets, then fcache_enter, ibl, trace_cache_incr addrs? |
| * Reader would then add exit stubs & other traces to table? |
| * But links will target cache pcs... |
| */ |
| |
| #define TRACEBUF_SIZE 2048 |
| |
| #define TRACEBUF_MAKE_ROOM(p, buf, sz) do { \ |
| if (p+sz >= &buf[TRACEBUF_SIZE]) { \ |
| os_write(pt->tracefile, buf, (p - buf)); \ |
| p = buf; \ |
| } \ |
| } while (0) |
| |
| static void |
| output_trace_binary(dcontext_t *dcontext, per_thread_t *pt, fragment_t *f, |
| stats_int_t trace_num) |
| { |
| /* FIXME: |
| * We do not support PROFILE_RDTSC or various small fields |
| */ |
| /* FIXME: should allocate buffer elsewhere */ |
| byte buf[TRACEBUF_SIZE]; |
| byte *p = buf; |
| trace_only_t *t = TRACE_FIELDS(f); |
| linkstub_t *l; |
| tracedump_trace_header_t hdr = { |
| (int) trace_num, f->tag, f->start_pc, f->prefix_size, 0, f->size, |
| (INTERNAL_OPTION(tracedump_origins) ? t->num_bbs : 0), |
| IF_X64_ELSE(!TEST(FRAG_32_BIT, f->flags), false), |
| }; |
| tracedump_stub_data_t stub; |
| /* Should we widen the identifier? */ |
| IF_X64(ASSERT(CHECK_TRUNCATE_TYPE_int(trace_num))); |
| |
| for (l = FRAGMENT_EXIT_STUBS(f); l != NULL; l = LINKSTUB_NEXT_EXIT(l)) |
| hdr.num_exits++; |
| |
| TRACEBUF_MAKE_ROOM(p, buf, sizeof(hdr)); |
| *((tracedump_trace_header_t *)p) = hdr; |
| p += sizeof(hdr); |
| |
| if (INTERNAL_OPTION(tracedump_origins)) { |
| uint i; |
| for (i=0; i<t->num_bbs; i++) { |
| instr_t *inst; |
| instrlist_t *ilist; |
| int size = 0; |
| |
| TRACEBUF_MAKE_ROOM(p, buf, sizeof(app_pc)); |
| *((app_pc *)p) = t->bbs[i].tag; |
| p += sizeof(app_pc); |
| |
| /* we assume that the target is readable, since we dump prior |
| * to unloading of modules on flush events |
| */ |
| ilist = build_app_bb_ilist(dcontext, t->bbs[i].tag, INVALID_FILE); |
| for (inst = instrlist_first(ilist); inst; inst = instr_get_next(inst)) { |
| size += instr_length(dcontext, inst); |
| } |
| |
| TRACEBUF_MAKE_ROOM(p, buf, sizeof(int)); |
| *((int *)p) = size; |
| p += sizeof(int); |
| |
| for (inst = instrlist_first(ilist); inst; inst = instr_get_next(inst)) { |
| TRACEBUF_MAKE_ROOM(p, buf, instr_length(dcontext, inst)); |
| /* PR 302353: we can't use instr_encode() as it will |
| * try to re-relativize rip-rel instrs, which may fail |
| */ |
| ASSERT(instr_get_raw_bits(inst) != NULL); |
| memcpy(p, instr_get_raw_bits(inst), instr_length(dcontext, inst)); |
| p += instr_length(dcontext, inst); |
| } |
| /* free the instrlist_t elements */ |
| instrlist_clear_and_destroy(dcontext, ilist); |
| } |
| } |
| |
| ASSERT(SEPARATE_STUB_MAX_SIZE == DIRECT_EXIT_STUB_SIZE(0)); |
| |
| for (l = FRAGMENT_EXIT_STUBS(f); l != NULL; l = LINKSTUB_NEXT_EXIT(l)) { |
| cache_pc stub_pc = EXIT_STUB_PC(dcontext, f, l); |
| stub.cti_offs = l->cti_offset; |
| stub.stub_pc = stub_pc; |
| stub.target = EXIT_TARGET_TAG(dcontext, f, l); |
| stub.linked = TEST(LINK_LINKED, l->flags); |
| stub.stub_size = EXIT_HAS_STUB(l->flags, f->flags) ? |
| DIRECT_EXIT_STUB_SIZE(f->flags) : |
| 0 /* no stub neededd: -no_indirect_stubs */; |
| ASSERT(DIRECT_EXIT_STUB_SIZE(f->flags) <= SEPARATE_STUB_MAX_SIZE); |
| |
| TRACEBUF_MAKE_ROOM(p, buf, STUB_DATA_FIXED_SIZE); |
| memcpy(p, &stub, STUB_DATA_FIXED_SIZE); |
| p += STUB_DATA_FIXED_SIZE; |
| |
| #ifdef PROFILE_LINKCOUNT |
| if (dynamo_options.profile_counts) { |
| *((linkcount_type_t *)p) = l->count; |
| p += sizeof(linkcount_type_t); |
| } |
| #endif |
| if (TEST(LINK_SEPARATE_STUB, l->flags) && stub_pc != NULL) { |
| TRACEBUF_MAKE_ROOM(p, buf, DIRECT_EXIT_STUB_SIZE(f->flags)); |
| ASSERT(stub_pc < f->start_pc || stub_pc >= f->start_pc+f->size); |
| memcpy(p, stub_pc, DIRECT_EXIT_STUB_SIZE(f->flags)); |
| p += DIRECT_EXIT_STUB_SIZE(f->flags); |
| } else { /* ensure client's method of identifying separate stubs works */ |
| ASSERT(stub_pc == NULL /* no stub at all */ || |
| (stub_pc >= f->start_pc && stub_pc < f->start_pc+f->size)); |
| } |
| } |
| |
| if (f->size >= TRACEBUF_SIZE) { |
| os_write(pt->tracefile, buf, (p - buf)); |
| p = buf; |
| os_write(pt->tracefile, f->start_pc, f->size); |
| } else { |
| /* single syscall for all but largest traces */ |
| TRACEBUF_MAKE_ROOM(p, buf, f->size); |
| memcpy(p, f->start_pc, f->size); |
| p += f->size; |
| os_write(pt->tracefile, buf, (p - buf)); |
| p = buf; |
| } |
| } |
| |
| /* Output the contents of the specified trace. |
| * Does full disassembly of every instruction. |
| * If deleted_at != -1, it is taken to be the fragment id that |
| * caused the flushing of this fragment from the cache |
| * If f is shared, pt argument must be shared_pt, and caller must hold |
| * the change_linking_lock. |
| */ |
| static void |
| output_trace(dcontext_t *dcontext, per_thread_t *pt, fragment_t *f, |
| stats_int_t deleted_at) |
| { |
| trace_only_t *t = TRACE_FIELDS(f); |
| #ifdef WINDOWS |
| char buf[MAXIMUM_PATH]; |
| #endif |
| stats_int_t trace_num; |
| bool locked_vmareas = false, ok; |
| dr_isa_mode_t old_mode; |
| ASSERT(SHOULD_OUTPUT_FRAGMENT(f->flags)); |
| ASSERT(TEST(FRAG_IS_TRACE, f->flags)); |
| ASSERT(!TEST(FRAG_SELFMOD_SANDBOXED, f->flags)); /* no support for selfmod */ |
| /* already been output? caller should check this flag, just like trace flag */ |
| ASSERT(!TEST(FRAG_TRACE_OUTPUT, f->flags)); |
| ASSERT(!TEST(FRAG_SHARED, f->flags) || |
| self_owns_recursive_lock(&change_linking_lock)); |
| f->flags |= FRAG_TRACE_OUTPUT; |
| |
| #ifdef PROFILE_LINKCOUNT |
| if (dynamo_options.tracedump_threshold > 0) { |
| linkcount_type_t count = get_total_linkcount(f); |
| if (count < (linkcount_type_t) dynamo_options.tracedump_threshold) { |
| pt->tracedump_num_below_threshold++; |
| pt->tracedump_count_below_threshold += count; |
| return; |
| } |
| } |
| #endif |
| |
| LOG(THREAD, LOG_FRAGMENT, 4, "output_trace: F%d("PFX")\n", f->id, f->tag); |
| /* Recreate in same mode as original fragment */ |
| ok = dr_set_isa_mode(dcontext, FRAG_ISA_MODE(f->flags), &old_mode); |
| ASSERT(ok); |
| |
| /* xref 8131/8202 if dynamo_resetting we don't need to grab the tracedump |
| * mutex to ensure we're the only writer and grabbing here on reset path |
| * can lead to a rank-order violation. */ |
| if (!dynamo_resetting) { |
| /* We must grab shared_vm_areas lock first to avoid rank order (i#1157) */ |
| locked_vmareas = acquire_vm_areas_lock_if_not_already(dcontext, FRAG_SHARED); |
| mutex_lock(&tracedump_mutex); |
| } |
| trace_num = tcount; |
| tcount++; |
| if (!TEST(FRAG_SHARED, f->flags)) { |
| /* No lock is needed because we use thread-private files. |
| * If dumping traces for a different thread (dynamo_other_thread_exit |
| * for ex.) caller is responsible for the necessary synchronization. */ |
| ASSERT(pt != shared_pt); |
| if (!dynamo_resetting) { |
| mutex_unlock(&tracedump_mutex); |
| if (locked_vmareas) { |
| locked_vmareas = false; |
| release_vm_areas_lock(dcontext, FRAG_SHARED); |
| } |
| } |
| } else { |
| ASSERT(pt == shared_pt); |
| } |
| |
| /* binary dump requested? */ |
| if (INTERNAL_OPTION(tracedump_binary)) { |
| output_trace_binary(dcontext, pt, f, trace_num); |
| goto output_trace_done; |
| } |
| |
| /* just origins => just bb tags in text */ |
| if (INTERNAL_OPTION(tracedump_origins) && |
| !INTERNAL_OPTION(tracedump_text)) { |
| uint i; |
| print_file(pt->tracefile, "Trace %d\n", tcount); |
| #ifdef DEBUG |
| print_file(pt->tracefile, "Fragment %d\n", f->id); |
| #endif |
| for (i = 0; i < t->num_bbs; i++) { |
| print_file(pt->tracefile, "\tbb %d = "PFX"\n", i, t->bbs[i].tag); |
| } |
| print_file(pt->tracefile, "\n"); |
| goto output_trace_done; |
| } |
| |
| /* full text dump */ |
| |
| print_file(pt->tracefile, "==============================================" |
| "=============================\n\n"); |
| print_file(pt->tracefile, "TRACE # %d\n", tcount); |
| #ifdef DEBUG |
| print_file(pt->tracefile, "Fragment # %d\n", f->id); |
| #endif |
| print_file(pt->tracefile, "Tag = "PFX"\n", f->tag); |
| print_file(pt->tracefile, "Thread = %d\n", get_thread_id()); |
| if (deleted_at > -1) { |
| print_file(pt->tracefile, |
| "*** Flushed from cache when top fragment id was %d\n", |
| deleted_at); |
| } |
| |
| #ifdef WINDOWS |
| /* FIXME: for fragments flushed by unloaded modules, naturally we |
| * won't be able to get the module name b/c by now it is unloaded, |
| * the only fix is to keep a listing of mapped modules and only |
| * unmap in our listing at this point |
| */ |
| get_module_name(f->tag, buf, sizeof(buf)); |
| if (buf[0] != '\0') |
| print_file(pt->tracefile, "Module of basic block 0 = %s\n", buf); |
| else |
| print_file(pt->tracefile, "Module of basic block 0 = <unknown>\n"); |
| #endif |
| |
| #if defined(INTERNAL) || defined(CLIENT_INTERFACE) |
| if (INTERNAL_OPTION(tracedump_origins)) { |
| uint i; |
| print_file(pt->tracefile, "\nORIGINAL CODE:\n"); |
| for (i = 0; i < t->num_bbs; i++) { |
| /* we only care about the printing that the build routine does */ |
| print_file(pt->tracefile, "basic block # %d: ", i); |
| /* we assume that the target is readable, since we dump prior |
| * to unloading of modules on flush events |
| */ |
| ASSERT(is_readable_without_exception(t->bbs[i].tag, sizeof(t->bbs[i]))); |
| disassemble_app_bb(dcontext, t->bbs[i].tag, pt->tracefile); |
| } |
| print_file(pt->tracefile, "END ORIGINAL CODE\n\n"); |
| } |
| #endif |
| |
| #ifdef PROFILE_RDTSC |
| if (dynamo_options.profile_times) { |
| /* Cycle counts are too high due to extra instructions needed |
| * to save regs and store time, so we subtract a constant for |
| * each fragment entry. |
| * Experiments using large-iter loops show that: |
| * empty loop overhead = 2 cycles |
| * 2 pushes, 2 moves, 2 pops = 7 cycles - 2 cycles = 5 cycles |
| * add in 1 rdtsc = 38 cycles - 2 cycles = 36 cycles |
| * add in 2 rdtsc = 69 cycles - 2 cycles = 67 cycles |
| * If we assume the time transfer happens in the middle of the rdtsc, we |
| * should use 36 cycles. |
| */ |
| trace_only_t *tr = TRACE_FIELDS(f); |
| const int adjustment = 37; |
| uint64 real_time = tr->total_time; |
| uint64 temp; |
| uint time_top, time_bottom; |
| print_file(pt->tracefile, "Size = %d (+ %d for profiling)\n", |
| f->size-profile_call_size(), profile_call_size()); |
| print_file(pt->tracefile, "Profiling:\n"); |
| print_file(pt->tracefile, "\tcount = "UINT64_FORMAT_STRING"\n", tr->count); |
| print_file(pt->tracefile, "\tmeasured cycles = "PFX"\n", real_time); |
| temp = tr->count * (uint64)adjustment; |
| if (real_time < temp) { |
| print_file(pt->tracefile, |
| "\t ERROR: adjustment too large, cutting off at 0, should use < %d\n", |
| (int)(real_time / tr->count)); |
| real_time = 0; |
| } else { |
| real_time -= temp; |
| } |
| print_file(pt->tracefile, "\tadjusted cycles = "PFX"\n", real_time); |
| divide_uint64_print(real_time, kilo_hertz, false, 6, |
| &time_top, &time_bottom); |
| print_file(pt->tracefile, "\ttime = %u.%.6u ms\n", |
| time_top, time_bottom); |
| } else { |
| print_file(pt->tracefile, "Size = %d\n", f->size); |
| } |
| #else |
| print_file(pt->tracefile, "Size = %d\n", f->size); |
| #endif /* PROFILE_RDTSC */ |
| |
| #ifdef PROFILE_LINKCOUNT |
| if (dynamo_options.profile_counts) { |
| int num_exits = 0; |
| linkstub_t *ls; |
| print_file(pt->tracefile, "Exit stubs:\n"); |
| for (ls = FRAGMENT_EXIT_STUBS(f); ls; ls = LINKSTUB_NEXT_EXIT(ls)) { |
| int id = -1; |
| app_pc target; |
| if ((ls->flags & LINK_INDIRECT) != 0) { |
| target = 0; |
| id = -1; |
| } else { |
| # ifdef DEBUG |
| fragment_t *targetf = fragment_lookup(dcontext, |
| EXIT_TARGET_TAG(dcontext, f, ls)); |
| id = (targetf != NULL) ? targetf->id : -1; |
| # else |
| id = -1; |
| # endif |
| target = EXIT_TARGET_TAG(dcontext, f, ls); |
| } |
| print_file(pt->tracefile, |
| "\t#%d: target = "PFX" (F#%d), count = " |
| LINKCOUNT_FORMAT_STRING "%s\n", |
| num_exits, target, id, ls->count, |
| ((ls->flags & LINK_LINKED) != 0) ? ", linked" : ""); |
| num_exits++; |
| } |
| # ifdef SIDELINE_COUNT_STUDY |
| if (dynamo_options.sideline && t->count_old_pre > (linkcount_type_t)0) { |
| print_file(pt->tracefile, "Sideline: pre-opt count = " |
| LINKCOUNT_FORMAT_STRING "\n", |
| t->count_old_pre); |
| print_file(pt->tracefile, "\tpost-opt count = " |
| LINKCOUNT_FORMAT_STRING "\n", |
| t->count_old_post); |
| print_file(pt->tracefile, "\tnew trace count = " |
| LINKCOUNT_FORMAT_STRING "\n", |
| get_total_linkcount(f)); |
| } |
| # endif |
| } |
| #endif /* PROFILE_LINKCOUNT */ |
| |
| #if defined(INTERNAL) || defined(CLIENT_INTERFACE) |
| print_file(pt->tracefile, "Body:\n"); |
| disassemble_fragment_body(dcontext, f, pt->tracefile); |
| |
| print_file(pt->tracefile, "END TRACE %d\n\n", tcount); |
| #endif |
| |
| output_trace_done: |
| dr_set_isa_mode(dcontext, old_mode, NULL); |
| if (TEST(FRAG_SHARED, f->flags) && !dynamo_resetting) { |
| ASSERT_OWN_MUTEX(true, &tracedump_mutex); |
| mutex_unlock(&tracedump_mutex); |
| if (locked_vmareas) |
| release_vm_areas_lock(dcontext, FRAG_SHARED); |
| } else { |
| ASSERT_DO_NOT_OWN_MUTEX(true, &tracedump_mutex); |
| } |
| } |
| |
| #endif /* defined(INTERNAL) || defined(CLIENT_INTERFACE) */ |
| |
| /****************************************************************************/ |
| |
| #ifdef PROFILE_RDTSC |
| /* This routine is called at the start of every trace |
| * it assumes the caller has saved the caller-saved registers |
| * (eax, ecx, edx) |
| * |
| * See insert_profile_call() for the assembly code prefix that calls |
| * this routine. The end time is computed in the assembly code and passed |
| * in to have as accurate times as possible (don't want the profiling overhead |
| * added into the times). Also, the assembly code computes the new start |
| * time after this routine returns. |
| * |
| * We could generate a custom copy of this routine |
| * for every thread, but we don't do that now -- don't need to. |
| */ |
| void |
| profile_fragment_enter(fragment_t *f, uint64 end_time) |
| { |
| dcontext_t *dcontext; |
| #ifdef WINDOWS |
| /* must get last error prior to getting dcontext */ |
| int error_code = get_last_error(); |
| #endif |
| trace_only_t *t = TRACE_FIELDS(f); |
| |
| /* this is done in assembly: uint64 end_time = get_time() */ |
| dcontext = get_thread_private_dcontext(); |
| |
| /* increment this fragment's execution count */ |
| t->count++; |
| |
| /***********************************************************************/ |
| /* all-in-cache sequence profiling */ |
| |
| /* top ten cache times */ |
| dcontext->cache_frag_count++; |
| |
| /***********************************************************************/ |
| |
| /* we rely on dispatch being the only way to enter the fcache. |
| * dispatch sets prev_fragment to null prior to entry */ |
| if (dcontext->prev_fragment != NULL) { |
| trace_only_t *last_t = TRACE_FIELDS(dcontext->prev_fragment); |
| ASSERT((dcontext->prev_fragment->flags & FRAG_IS_TRACE) != 0); |
| /* charge time to last fragment */ |
| last_t->total_time += (end_time - dcontext->start_time); |
| } |
| |
| /* set up for next fragment */ |
| dcontext->prev_fragment = f; |
| /* this is done in assembly: dcontext->start_time = get_time() */ |
| |
| #ifdef WINDOWS |
| /* retore app's error code */ |
| set_last_error(error_code); |
| #endif |
| } |
| |
| /* this routine is called from dispatch after exiting the fcache |
| * it finishes up the final fragment's time slot |
| */ |
| void |
| profile_fragment_dispatch(dcontext_t *dcontext) |
| { |
| /* end time slot ASAP, should we try to move this to assembly routines exiting |
| * the cache? probably not worth it, in a long-running program shouldn't |
| * be exiting the cache that often */ |
| uint64 end_time = get_time(); |
| bool tagtable = LINKSTUB_INDIRECT(dcontext->last_exit->flags); |
| if (dcontext->prev_fragment != NULL && (dcontext->prev_fragment->flags & FRAG_IS_TRACE) != 0) { |
| /* end time slot, charge time to last fragment |
| * there's more overhead here than other time endings, so subtract |
| * some time off. these numbers are pretty arbitrary: |
| */ |
| trace_only_t *last_t = TRACE_FIELDS(dcontext->prev_fragment); |
| const uint64 adjust = (tagtable) ? 72 : 36; |
| uint64 add = end_time - dcontext->start_time; |
| if (add < adjust) { |
| SYSLOG_INTERNAL_ERROR("ERROR: profile_fragment_dispatch: add was %d, tagtable %d", |
| (int)add, tagtable); |
| add = 0; |
| } else |
| add -= adjust; |
| ASSERT((dcontext->prev_fragment->flags & FRAG_IS_TRACE) != 0); |
| last_t->total_time += add; |
| } |
| } |
| #endif /* PROFILE_RDTSC */ |
| |
| |
| /******************************************************************************* |
| * COARSE-GRAIN FRAGMENT HASHTABLE INSTANTIATION |
| */ |
| |
| /* Synch model: the htable lock should be held during all |
| * app_to_cache_t manipulations, to be consistent (not strictly |
| * necessary as there are no removals from the htable except in |
| * all-thread-synch flushes). Copies of a looked-up cache_pc are safe |
| * to use w/o the htable lock, or any lock -- they're pointers into |
| * cache units, and there are no deletions of cache units, except in |
| * all-thread-synch flushes. |
| */ |
| |
| /* 2 macros w/ name and types are duplicated in fragment.h -- keep in sync */ |
| #define NAME_KEY coarse |
| #define ENTRY_TYPE app_to_cache_t |
| static app_to_cache_t a2c_empty = { NULL, NULL }; |
| static app_to_cache_t a2c_sentinel = { /*assume invalid*/(app_pc)PTR_UINT_MINUS_1, |
| NULL }; |
| /* FIXME: want to inline the app_to_cache_t struct just like lookuptable |
| * does and use it for main table -- no support for that right now |
| */ |
| /* not defining HASHTABLE_USE_LOOKUPTABLE */ |
| #define ENTRY_TAG(f) ((ptr_uint_t)(f).app) |
| #define ENTRY_EMPTY (a2c_empty) |
| #define ENTRY_SENTINEL (a2c_sentinel) |
| #define ENTRY_IS_EMPTY(f) ((f).app == a2c_empty.app) |
| #define ENTRY_IS_SENTINEL(f) ((f).app == a2c_sentinel.app) |
| #define ENTRY_IS_INVALID(f) (false) /* no invalid entries */ |
| #define ENTRIES_ARE_EQUAL(t,f,g) ((f).app == (g).app) |
| #define HASHTABLE_WHICH_HEAP(flags) (ACCT_FRAG_TABLE) |
| /* note that we give the th table a lower-ranked coarse_th_htable_rwlock */ |
| #define COARSE_HTLOCK_RANK coarse_table_rwlock /* for use after hashtablex.h */ |
| #define HTLOCK_RANK COARSE_HTLOCK_RANK |
| #define HASHTABLE_SUPPORT_PERSISTENCE 1 |
| |
| #include "hashtablex.h" |
| /* All defines are undef-ed at end of hashtablex.h |
| * Would be nice to re-use ENTRY_IS_EMPTY, etc., though w/ multiple htables |
| * in same file can't realistically get away w/o custom defines like these: |
| */ |
| #define A2C_ENTRY_IS_EMPTY(a2c) ((a2c).app == NULL) |
| #define A2C_ENTRY_IS_SENTINEL(a2c) ((a2c).app == a2c_sentinel.app) |
| #define A2C_ENTRY_IS_REAL(a2c) (!A2C_ENTRY_IS_EMPTY(a2c) && !A2C_ENTRY_IS_SENTINEL(a2c)) |
| |
| /* required routines for hashtable interface that we don't need for this instance */ |
| |
| static void |
| hashtable_coarse_init_internal_custom(dcontext_t *dcontext, coarse_table_t *htable) |
| { /* nothing */ |
| } |
| |
| static void |
| hashtable_coarse_resized_custom(dcontext_t *dcontext, coarse_table_t *htable, |
| uint old_capacity, app_to_cache_t *old_table, |
| app_to_cache_t *old_table_unaligned, |
| uint old_ref_count, uint old_table_flags) |
| { /* nothing */ |
| } |
| |
| # ifdef DEBUG |
| static void |
| hashtable_coarse_study_custom(dcontext_t *dcontext, coarse_table_t *htable, |
| uint entries_inc/*amnt table->entries was pre-inced*/) |
| { /* nothing */ |
| } |
| # endif |
| |
| static void |
| hashtable_coarse_free_entry(dcontext_t *dcontext, coarse_table_t *htable, |
| app_to_cache_t entry) |
| { |
| /* nothing to do, data is inlined */ |
| } |
| |
| /* i#670: To handle differing app addresses from different module |
| * bases across different executions, we store the persist-time abs |
| * addrs in our tables and always shift on lookup. For frozen exit |
| * stubs, we have the frozen fcache return convert to a cur-base abs |
| * addr so this shift will then restore to persist-time. |
| * |
| * XXX: this is needed for -persist_trust_textrel, but for Windows |
| * bases will only be different when we need to apply relocations, and |
| * there we could just add an extra relocation per exit stub, which |
| * would end up being lower overhead for long-running apps, but may be |
| * higher than the overhead of shifting for short-running. For now, |
| * I'm enabling this as cross-platform, and we can do perf |
| * measurements later if desired. |
| * |
| * Storing persist-time abs addr versus storing module offsets: note |
| * that whatever we do we want a frozen-unit-only solution so we don't |
| * want to, say, always store module offsets for non-frozen units. |
| * This is because we support mixing coarse and fine and we have to |
| * use abs addrs in fine tables b/c they're not per-module. We could |
| * store offsets in frozen only, but then we'd have to do extra work |
| * when freezing. Plus, by storing absolute, when the module base |
| * lines up we can avoid the shift on the exit stubs (although today |
| * we only avoid exit stub overhead for trace heads when the base |
| * matches). |
| */ |
| static inline app_to_cache_t |
| coarse_lookup_internal(dcontext_t *dcontext, app_pc tag, coarse_table_t *table) |
| { |
| /* note that for mod_shift we don't need to compare to bounds b/c |
| * this is a table for this module only |
| */ |
| app_to_cache_t a2c = |
| hashtable_coarse_lookup(dcontext, (ptr_uint_t)(tag + table->mod_shift), table); |
| if (table->mod_shift != 0 && A2C_ENTRY_IS_REAL(a2c)) |
| a2c.app -= table->mod_shift; |
| return a2c; |
| } |
| /* I would #define hashtable_coarse_lookup as DO_NOT_USE but we have to use for |
| * pclookup |
| */ |
| |
| /* Pass 0 for the initial capacity to use the default. |
| * Initial capacities are number of entires and NOT bits in mask or anything. |
| */ |
| void |
| fragment_coarse_htable_create(coarse_info_t *info, uint init_capacity, |
| uint init_th_capacity) |
| { |
| coarse_table_t *th_htable; |
| coarse_table_t *htable; |
| uint init_size; |
| ASSERT(SHARED_FRAGMENTS_ENABLED()); |
| |
| /* Case 9537: If we start the new table small and grow it we have large |
| * collision chains as we map the lower address space of a large table into |
| * the same lower fraction of a smaller table, so we create our table fully |
| * sized up front. |
| */ |
| if (init_capacity != 0) { |
| init_size = hashtable_bits_given_entries(init_capacity, |
| DYNAMO_OPTION(coarse_htable_load)); |
| } else |
| init_size = INIT_HTABLE_SIZE_COARSE; |
| LOG(GLOBAL, LOG_FRAGMENT, 2, "Coarse %s htable %d capacity => %d bits\n", |
| info->module, init_capacity, init_size); |
| htable = NONPERSISTENT_HEAP_TYPE_ALLOC(GLOBAL_DCONTEXT, coarse_table_t, |
| ACCT_FRAG_TABLE); |
| hashtable_coarse_init(GLOBAL_DCONTEXT, htable, init_size, |
| DYNAMO_OPTION(coarse_htable_load), |
| (hash_function_t)INTERNAL_OPTION(alt_hash_func), |
| 0 /* hash_mask_offset */, |
| HASHTABLE_ENTRY_SHARED | HASHTABLE_SHARED | |
| HASHTABLE_RELAX_CLUSTER_CHECKS |
| _IF_DEBUG("coarse htable")); |
| htable->mod_shift = 0; |
| info->htable = (void *) htable; |
| |
| /* We could create th_htable lazily independently of htable but not worth it */ |
| if (init_th_capacity != 0) { |
| init_size = hashtable_bits_given_entries(init_th_capacity, |
| DYNAMO_OPTION(coarse_th_htable_load)); |
| } else |
| init_size = INIT_HTABLE_SIZE_COARSE_TH; |
| LOG(GLOBAL, LOG_FRAGMENT, 2, "Coarse %s th htable %d capacity => %d bits\n", |
| info->module, init_th_capacity, init_size); |
| th_htable = NONPERSISTENT_HEAP_TYPE_ALLOC(GLOBAL_DCONTEXT, coarse_table_t, |
| ACCT_FRAG_TABLE); |
| hashtable_coarse_init(GLOBAL_DCONTEXT, th_htable, init_size, |
| DYNAMO_OPTION(coarse_th_htable_load), |
| (hash_function_t)INTERNAL_OPTION(alt_hash_func), |
| 0 /* hash_mask_offset */, |
| HASHTABLE_ENTRY_SHARED | HASHTABLE_SHARED | |
| HASHTABLE_RELAX_CLUSTER_CHECKS |
| _IF_DEBUG("coarse th htable")); |
| th_htable->mod_shift = 0; |
| /* We give th table a lower lock rank for coarse_body_from_htable_entry(). |
| * FIXME: add param to init() that takes in lock rank? |
| */ |
| ASSIGN_INIT_READWRITE_LOCK_FREE(th_htable->rwlock, coarse_th_table_rwlock); |
| info->th_htable = (void *) th_htable; |
| } |
| |
| /* Adds all entries from stable into dtable, offsetting by dst_cache_offset, |
| * which is the offset from dst->cache_start_pc at which |
| * the src cache has been placed. |
| */ |
| static void |
| fragment_coarse_htable_merge_helper(dcontext_t *dcontext, |
| coarse_info_t *dst, coarse_table_t *dtable, |
| coarse_info_t *src, coarse_table_t *stable, |
| ssize_t dst_cache_offset) |
| { |
| app_to_cache_t a2c, look_a2c; |
| uint i; |
| /* assumption: dtable is private to this thread and so does not need synch */ |
| DODEBUG({ dtable->is_local = true; }); |
| TABLE_RWLOCK(stable, read, lock); |
| for (i = 0; i < stable->capacity; i++) { |
| a2c = stable->table[i]; |
| if (A2C_ENTRY_IS_REAL(a2c)) { |
| look_a2c = coarse_lookup_internal(dcontext, a2c.app, dtable); |
| if (A2C_ENTRY_IS_EMPTY(look_a2c)) { |
| a2c.cache += dst_cache_offset; |
| if (!dst->frozen) { /* adjust absolute value */ |
| ASSERT_NOT_TESTED(); |
| a2c.cache += (dst->cache_start_pc - src->cache_start_pc); |
| } |
| hashtable_coarse_add(dcontext, a2c, dtable); |
| } else { |
| /* Our merging-with-dups strategy requires that we not |
| * merge them in this early |
| */ |
| ASSERT_NOT_REACHED(); |
| } |
| } |
| } |
| TABLE_RWLOCK(stable, read, unlock); |
| DODEBUG({ dtable->is_local = false; }); |
| } |
| |
| /* Merges the main and th htables from info1 and info2 into new htables for dst. |
| * If !add_info2, makes room for but does not add entries from info2. |
| * If !add_th_htable, creates but does not add entries to dst->th_htable. |
| * FIXME: we can't use hashtable_coarse_merge() b/c we don't want to add |
| * entries from the 2nd table, and we do custom mangling of entries when adding. |
| * Is it worth parametrizing hashtable_coarse_merge() to share anything? |
| */ |
| void |
| fragment_coarse_htable_merge(dcontext_t *dcontext, coarse_info_t *dst, |
| coarse_info_t *info1, coarse_info_t *info2, |
| bool add_info2, bool add_th_htable) |
| { |
| coarse_table_t *thht_dst, *thht1, *thht2; |
| coarse_table_t *ht_dst, *ht1, *ht2; |
| uint merged_entries = 0; |
| |
| ASSERT(SHARED_FRAGMENTS_ENABLED()); |
| ASSERT(info1 != NULL && info2 != NULL); |
| ht1 = (coarse_table_t *) info1->htable; |
| ht2 = (coarse_table_t *) info2->htable; |
| thht1 = (coarse_table_t *) info1->th_htable; |
| thht2 = (coarse_table_t *) info2->th_htable; |
| ASSERT(dst != NULL && dst->htable == NULL && dst->th_htable == NULL); |
| |
| /* We go to the trouble of determining non-dup total entries to |
| * avoid repeatedly increasing htable size on merges and hitting |
| * collision asserts. FIXME: should we shrink afterward instead? |
| * Or just ignore collision asserts until at full size? |
| */ |
| merged_entries = hashtable_coarse_num_unique_entries(dcontext, ht1, ht2); |
| STATS_ADD(coarse_merge_dups, ht1->entries + ht2->entries - merged_entries); |
| LOG(THREAD, LOG_FRAGMENT, 2, "Merging %s: %d + %d => %d (%d unique) entries\n", |
| info1->module, ht1->entries, ht2->entries, ht1->entries + ht2->entries, |
| merged_entries); |
| |
| /* We could instead copy ht1 and then add ht2; for simplicity we re-use |
| * our create-empty routine and add both |
| */ |
| fragment_coarse_htable_create(dst, merged_entries, |
| /* Heuristic to never over-size, yet try to avoid |
| * collision asserts while resizing the table; |
| * FIXME: if we shrink the main htable afterward |
| * could do the same here and start at sum of |
| * entries */ |
| MAX(thht1->entries, thht2->entries)); |
| ht_dst = (coarse_table_t *) dst->htable; |
| thht_dst = (coarse_table_t *) dst->th_htable; |
| ASSERT(ht_dst != NULL && thht_dst != NULL); |
| |
| /* For now we only support frozen tables; else will have to change |
| * the offsets to be stubs for main table and cache for th table |
| */ |
| ASSERT(info1->frozen && info2->frozen); |
| fragment_coarse_htable_merge_helper(dcontext, dst, ht_dst, info1, ht1, 0); |
| if (add_info2) { |
| fragment_coarse_htable_merge_helper(dcontext, dst, ht_dst, info2, ht2, |
| info1->cache_end_pc - |
| info1->cache_start_pc); |
| } |
| if (add_th_htable) { |
| ASSERT_NOT_TESTED(); |
| fragment_coarse_htable_merge_helper(dcontext, dst, thht_dst, info1, thht1, 0); |
| fragment_coarse_htable_merge_helper(dcontext, dst, thht_dst, info2, thht2, |
| /* stubs_end_pc is not allocated end */ |
| info1->mmap_pc + info1->mmap_size - |
| info1->stubs_start_pc); |
| } |
| } |
| |
| #ifndef DEBUG |
| /* not in header file unless debug, in which case it's static */ |
| static void |
| coarse_body_from_htable_entry(dcontext_t *dcontext, coarse_info_t *info, |
| app_pc tag, cache_pc res, |
| cache_pc *stub_pc_out/*OUT*/, |
| cache_pc *body_pc_out/*OUT*/); |
| #endif |
| |
| static void |
| study_and_free_coarse_htable(coarse_info_t *info, coarse_table_t *htable, |
| bool never_persisted _IF_DEBUG(const char *name)) |
| { |
| LOG(GLOBAL, LOG_FRAGMENT, 1, "Coarse %s %s hashtable stats:\n", |
| info->module, name); |
| DOLOG(1, LOG_FRAGMENT|LOG_STATS, { |
| hashtable_coarse_load_statistics(GLOBAL_DCONTEXT, htable); |
| }); |
| DODEBUG({ |
| hashtable_coarse_study(GLOBAL_DCONTEXT, htable, 0/*table consistent*/); |
| }); |
| DOLOG(3, LOG_FRAGMENT, { |
| hashtable_coarse_dump_table(GLOBAL_DCONTEXT, htable); |
| }); |
| #ifdef CLIENT_INTERFACE |
| /* Only raise deletion events if client saw creation events: so no persisted |
| * units |
| */ |
| if (!info->persisted && htable == info->htable && |
| dr_fragment_deleted_hook_exists()) { |
| app_to_cache_t a2c; |
| uint i; |
| dcontext_t *dcontext = get_thread_private_dcontext(); |
| cache_pc body = NULL; |
| TABLE_RWLOCK(htable, read, lock); |
| for (i = 0; i < htable->capacity; i++) { |
| a2c = htable->table[i]; |
| if (A2C_ENTRY_IS_REAL(a2c)) { |
| if (info->frozen) |
| body = a2c.cache; |
| else { /* make sure not an entrance stub w/ no body */ |
| coarse_body_from_htable_entry(dcontext, info, a2c.app, a2c.cache, |
| NULL, &body); |
| } |
| if (body != NULL) { |
| instrument_fragment_deleted(get_thread_private_dcontext(), a2c.app, |
| FRAGMENT_COARSE_WRAPPER_FLAGS); |
| } |
| } |
| } |
| TABLE_RWLOCK(htable, read, unlock); |
| } |
| #endif |
| /* entries are inlined so nothing external to free */ |
| if (info->persisted && !never_persisted) { |
| /* ensure won't try to free (part of mmap) */ |
| ASSERT(htable->table_unaligned == NULL); |
| } |
| hashtable_coarse_free(GLOBAL_DCONTEXT, htable); |
| NONPERSISTENT_HEAP_TYPE_FREE(GLOBAL_DCONTEXT, htable, coarse_table_t, ACCT_FRAG_TABLE); |
| } |
| |
| void |
| fragment_coarse_free_entry_pclookup_table(dcontext_t *dcontext, coarse_info_t *info) |
| { |
| if (info->pclookup_htable != NULL) { |
| ASSERT(DYNAMO_OPTION(coarse_pclookup_table)); |
| study_and_free_coarse_htable(info, (coarse_table_t *) info->pclookup_htable, |
| true/*never persisted*/ _IF_DEBUG("pclookup")); |
| info->pclookup_htable = NULL; |
| } |
| } |
| |
| void |
| fragment_coarse_htable_free(coarse_info_t *info) |
| { |
| ASSERT_OWN_MUTEX(!info->is_local, &info->lock); |
| if (info->htable == NULL) { |
| /* lazily initialized, so common to have empty units */ |
| ASSERT(info->th_htable == NULL); |
| ASSERT(info->pclookup_htable == NULL); |
| return; |
| } |
| study_and_free_coarse_htable(info, (coarse_table_t *) info->htable, false |
| _IF_DEBUG("main")); |
| info->htable = NULL; |
| study_and_free_coarse_htable(info, (coarse_table_t *) info->th_htable, false |
| _IF_DEBUG("tracehead")); |
| info->th_htable = NULL; |
| if (info->pclookup_last_htable != NULL) { |
| generic_hash_destroy(GLOBAL_DCONTEXT, info->pclookup_last_htable); |
| info->pclookup_last_htable = NULL; |
| } |
| fragment_coarse_free_entry_pclookup_table(GLOBAL_DCONTEXT, info); |
| } |
| |
| uint |
| fragment_coarse_num_entries(coarse_info_t *info) |
| { |
| coarse_table_t *htable; |
| ASSERT(info != NULL); |
| htable = (coarse_table_t *) info->htable; |
| if (htable == NULL) |
| return 0; |
| return htable->entries; |
| } |
| |
| /* Add coarse fragment represented by wrapper f to the hashtable |
| * for unit info. |
| */ |
| void |
| fragment_coarse_add(dcontext_t *dcontext, coarse_info_t *info, |
| app_pc tag, cache_pc cache) |
| { |
| coarse_table_t *htable; |
| app_to_cache_t a2c = {tag, cache}; |
| |
| ASSERT(info != NULL && info->htable != NULL); |
| htable = (coarse_table_t *) info->htable; |
| |
| DOCHECK(1, { |
| /* We have lock rank order problems b/c this lookup may acquire |
| * the th table read lock, and we can't split its rank from |
| * the main table's. So we live w/ a racy assert that could |
| * have false positives or negatives. |
| */ |
| cache_pc stub; |
| cache_pc body; |
| /* OK to have dup entries for entrance stubs in other units, |
| * or a stub already present for a trace head */ |
| fragment_coarse_lookup_in_unit(dcontext, info, tag, &stub, &body); |
| ASSERT(body == NULL); |
| ASSERT(stub == NULL || |
| coarse_is_trace_head_in_own_unit(dcontext, tag, stub, |
| /* case 10876: pass what we're adding */ |
| (ptr_uint_t)cache + info->cache_start_pc, |
| true, info)); |
| /* There can only be one body. But similarly to above, |
| * fragment_coarse_lookup may look in the secondary unit, so |
| * we can't do this lookup holding the write lock. |
| */ |
| if (!coarse_is_entrance_stub(cache)) { |
| coarse_info_t *xinfo = get_executable_area_coarse_info(tag); |
| fragment_t *f; |
| ASSERT(xinfo != NULL); |
| /* If an official unit, tag should not exist in any other official unit */ |
| ASSERT((info != xinfo && info != xinfo->non_frozen) || |
| fragment_coarse_lookup(dcontext, tag) == NULL); |
| f = fragment_lookup(dcontext, tag); |
| /* ok for trace to shadow coarse trace head */ |
| ASSERT(f == NULL || TEST(FRAG_IS_TRACE, f->flags)); |
| } |
| }); |
| TABLE_RWLOCK(htable, write, lock); |
| /* Table is not an ibl table so we ignore resize return value */ |
| hashtable_coarse_add(dcontext, a2c, htable); |
| TABLE_RWLOCK(htable, write, unlock); |
| |
| #ifdef SHARING_STUDY |
| if (INTERNAL_OPTION(fragment_sharing_study)) { |
| ASSERT_NOT_IMPLEMENTED(false && "need to pass f in to add_shared_block"); |
| } |
| #endif |
| } |
| |
| /* Returns the body pc of the coarse trace head fragment corresponding to tag, or |
| * NULL if not found. Caller must hold the th table's read or write lock! |
| */ |
| static cache_pc |
| fragment_coarse_th_lookup(dcontext_t *dcontext, coarse_info_t *info, app_pc tag) |
| { |
| cache_pc res = NULL; |
| app_to_cache_t a2c; |
| coarse_table_t *htable; |
| ASSERT(info != NULL); |
| ASSERT(info->htable != NULL); |
| htable = (coarse_table_t *) info->th_htable; |
| ASSERT(TABLE_PROTECTED(htable)); |
| a2c = coarse_lookup_internal(dcontext, tag, htable); |
| if (!A2C_ENTRY_IS_EMPTY(a2c)) { |
| ASSERT(BOOLS_MATCH(info->frozen, info->stubs_start_pc != NULL)); |
| /* for frozen, th_htable only holds stubs */ |
| res = ((ptr_uint_t)a2c.cache) + info->stubs_start_pc; |
| } |
| return res; |
| } |
| |
| /* Performs two actions while holding the trace head table's write lock, |
| * making them atomic (solving the race in case 8795): |
| * 1) unlinks the coarse fragment's entrance pc and points it at the |
| * trace head exit routine; |
| * 2) adds the coarse fragment's body pc to the trace head hashtable. |
| * |
| * Case 8628: if we split the main htable into stubs and bodies then |
| * we can eliminate the th htable as it will be part of the body table. |
| * |
| * We could merge this info in to the thcounter table, and assume that |
| * most trace heads are coarse so we're not wasting much memory with |
| * the extra field. But then we have to walk entire stub table on |
| * cache flush and individually clear body pcs from monitor table, so |
| * better to have own th table? |
| */ |
| void |
| fragment_coarse_th_unlink_and_add(dcontext_t *dcontext, app_pc tag, |
| cache_pc stub_pc, cache_pc body_pc) |
| { |
| ASSERT(stub_pc != NULL); |
| if (body_pc != NULL) { |
| /* trace head is in this unit, so we have to add it to our th htable */ |
| coarse_info_t *info = get_fcache_coarse_info(body_pc); |
| coarse_table_t *th_htable; |
| app_to_cache_t a2c = {tag, body_pc}; |
| ASSERT(info != NULL && info->th_htable != NULL); |
| ASSERT(!info->frozen); |
| th_htable = (coarse_table_t *) info->th_htable; |
| TABLE_RWLOCK(th_htable, write, lock); |
| ASSERT(fragment_coarse_th_lookup(dcontext, info, tag) == NULL); |
| unlink_entrance_stub(dcontext, stub_pc, FRAG_IS_TRACE_HEAD, info); |
| /* Table is not an ibl table so we ignore resize return value */ |
| hashtable_coarse_add(dcontext, a2c, th_htable); |
| TABLE_RWLOCK(th_htable, write, unlock); |
| LOG(THREAD, LOG_FRAGMENT, 4, |
| "adding to coarse th table for %s: "PFX"->"PFX"\n", |
| info->module, tag, body_pc); |
| } else { |
| /* ensure lives in another unit */ |
| ASSERT(fragment_coarse_lookup(dcontext, tag) != stub_pc); |
| unlink_entrance_stub(dcontext, stub_pc, FRAG_IS_TRACE_HEAD, NULL); |
| } |
| } |
| |
| /* Only use when building up a brand-new table. Otherwise use |
| * fragment_coarse_th_unlink_and_add(). |
| */ |
| void |
| fragment_coarse_th_add(dcontext_t *dcontext, coarse_info_t *info, |
| app_pc tag, cache_pc cache) |
| { |
| coarse_table_t *th_htable; |
| app_to_cache_t a2c = {tag, cache}; |
| ASSERT(info != NULL && info->th_htable != NULL); |
| ASSERT(info->frozen); /* only used when merging units */ |
| th_htable = (coarse_table_t *) info->th_htable; |
| TABLE_RWLOCK(th_htable, write, lock); |
| ASSERT(fragment_coarse_th_lookup(dcontext, info, tag) == NULL); |
| /* Table is not an ibl table so we ignore resize return value */ |
| hashtable_coarse_add(dcontext, a2c, th_htable); |
| TABLE_RWLOCK(th_htable, write, unlock); |
| } |
| |
| /* The input here is the result of a lookup in the main htable. |
| * For a frozen unit, this actually looks up the stub pc since res is |
| * always the body pc. |
| * For a non-frozen unit this determines where to obtain the body pc. |
| * FIXME: case 8628 will simplify this whole thing |
| */ |
| /* exported only for assert in push_pending_freeze() */ |
| IF_DEBUG_ELSE(,static) void |
| coarse_body_from_htable_entry(dcontext_t *dcontext, coarse_info_t *info, |
| app_pc tag, cache_pc res, |
| cache_pc *stub_pc_out/*OUT*/, |
| cache_pc *body_pc_out/*OUT*/) |
| { |
| cache_pc stub_pc = NULL, body_pc = NULL; |
| /* Should be passing absolute pc, not offset */ |
| ASSERT(!info->frozen || res == NULL || res >= info->cache_start_pc); |
| if (info->frozen) { |
| /* We still need the stub table for lazy linking and for |
| shifting links from a trace head to a trace. |
| */ |
| body_pc = res; |
| if (stub_pc_out != NULL) { |
| TABLE_RWLOCK((coarse_table_t *) info->th_htable, read, lock); |
| stub_pc = fragment_coarse_th_lookup(dcontext, info, tag); |
| TABLE_RWLOCK((coarse_table_t *) info->th_htable, read, unlock); |
| } |
| } else { |
| /* In a non-frozen unit, htable entries are always stubs */ |
| DOCHECK(CHKLVL_DEFAULT+1, { ASSERT(coarse_is_entrance_stub(res)); }); |
| stub_pc = res; |
| if (body_pc_out != NULL) { |
| /* keep the th table entry and stub link status linked atomically */ |
| TABLE_RWLOCK((coarse_table_t *) info->th_htable, read, lock); |
| body_pc = fragment_coarse_th_lookup(dcontext, info, tag); |
| if (body_pc != NULL) { |
| /* We can't just check coarse_is_trace_head(res) because a |
| * shadowed trace head is directly linked to its trace. The |
| * trace has lookup order precedence, but the head must show up |
| * if asked about here! |
| */ |
| /* FIXME: we could have a flags OUT param and set FRAG_IS_TRACE_HEAD |
| * to help out fragment_lookup_fine_and_coarse*(). |
| */ |
| } else { |
| if (entrance_stub_linked(res, info)) { |
| /* We only want to set body_pc if it is present in this unit */ |
| cache_pc tgt = entrance_stub_jmp_target(res); |
| if (get_fcache_coarse_info(tgt) == info) |
| body_pc = tgt; |
| else |
| body_pc = NULL; |
| } else |
| body_pc = NULL; |
| DOCHECK(CHKLVL_DEFAULT+1, { |
| ASSERT(!coarse_is_trace_head(res) || |
| /* allow targeting trace head in another unit */ |
| body_pc == NULL); |
| }); |
| } |
| TABLE_RWLOCK((coarse_table_t *) info->th_htable, read, unlock); |
| } |
| } |
| if (stub_pc_out != NULL) |
| *stub_pc_out = stub_pc; |
| if (body_pc_out != NULL) |
| *body_pc_out = body_pc; |
| } |
| |
| /* Coarse fragments have two entrance points: the actual fragment |
| * body, and the entrance stub that is used for indirection and |
| * convenient incremental building for intra-unit non-frozen linking |
| * as well as always used at the source end for inter-unit linking. |
| * This routine returns both. If the stub_pc returned is non-NULL, it |
| * only indicates that there is an outgoing link from a fragment |
| * present in this unit that targets the queried tag (and for |
| * intra-unit links in a frozen unit, such a link may exist but |
| * stub_pc will be NULL as the entrance stub indirection will have |
| * been replaced with a direct link). The fragment corresponding to |
| * the tag is only present in this unit if the body_pc returned is |
| * non-NULL. |
| */ |
| void |
| fragment_coarse_lookup_in_unit(dcontext_t *dcontext, coarse_info_t *info, app_pc tag, |
| /* FIXME: have separate type for stub pc vs body pc? */ |
| cache_pc *stub_pc_out/*OUT*/, |
| cache_pc *body_pc_out/*OUT*/) |
| { |
| cache_pc res = NULL; |
| cache_pc stub_pc = NULL, body_pc = NULL; |
| app_to_cache_t a2c; |
| coarse_table_t *htable; |
| ASSERT(info != NULL); |
| if (info->htable == NULL) /* not initialized yet, so no code there */ |
| goto coarse_lookup_return; |
| htable = (coarse_table_t *) info->htable; |
| TABLE_RWLOCK(htable, read, lock); |
| a2c = coarse_lookup_internal(dcontext, tag, htable); |
| if (!A2C_ENTRY_IS_EMPTY(a2c)) { |
| LOG(THREAD, LOG_FRAGMENT, 5, "%s: %s %s tag="PFX" => app="PFX" cache="PFX"\n", |
| __FUNCTION__, info->module, info->frozen ? "frozen" : "", |
| tag, a2c.app, a2c.cache); |
| ASSERT(BOOLS_MATCH(info->frozen, info->cache_start_pc != NULL)); |
| /* for frozen, htable only holds body pc */ |
| res = ((ptr_uint_t)a2c.cache) + info->cache_start_pc; |
| } |
| if (res != NULL) |
| coarse_body_from_htable_entry(dcontext, info, tag, res, &stub_pc, &body_pc); |
| else if (info->frozen) /* need a separate lookup for stub */ |
| coarse_body_from_htable_entry(dcontext, info, tag, res, &stub_pc, &body_pc); |
| TABLE_RWLOCK(htable, read, unlock); |
| /* cannot have both a shared coarse and shared fine bb for same tag |
| * (can have shared trace shadowing shared coarse trace head bb, or |
| * private bb shadowing shared coarse bb) |
| */ |
| ASSERT(body_pc == NULL || fragment_lookup_shared_bb(dcontext, tag) == NULL); |
| coarse_lookup_return: |
| if (stub_pc_out != NULL) |
| *stub_pc_out = stub_pc; |
| if (body_pc_out != NULL) |
| *body_pc_out = body_pc; |
| } |
| |
| /* Returns the body pc of the coarse fragment corresponding to tag, or |
| * NULL if not found |
| */ |
| cache_pc |
| fragment_coarse_lookup(dcontext_t *dcontext, app_pc tag) |
| { |
| cache_pc res = NULL; |
| coarse_info_t *info = get_executable_area_coarse_info(tag); |
| /* We must check each unit in turn */ |
| while (info != NULL) { /* loop over primary and secondary unit */ |
| fragment_coarse_lookup_in_unit(dcontext, info, tag, NULL, &res); |
| if (res != NULL) |
| return res; |
| ASSERT(info->frozen || info->non_frozen == NULL); |
| info = info->non_frozen; |
| ASSERT(info == NULL || !info->frozen); |
| } |
| return NULL; |
| } |
| |
| /* It's up to the caller to hold locks preventing simultaneous writes to wrapper */ |
| void |
| fragment_coarse_wrapper(fragment_t *wrapper, app_pc tag, cache_pc body_pc) |
| { |
| ASSERT(wrapper != NULL); |
| if (wrapper == NULL) |
| return; |
| ASSERT(tag != NULL); |
| ASSERT(body_pc != NULL); |
| /* FIXME: fragile to rely on other routines not inspecting other |
| * fields of fragment_t -- but setting to 0 perhaps no better than garbage |
| * in that case. We do need to set prefix_size, incoming_stubs, and |
| * {next,prev}_vmarea to NULL, for sure. |
| */ |
| memset(wrapper, 0, sizeof(*wrapper)); |
| wrapper->tag = tag; |
| wrapper->start_pc = body_pc; |
| wrapper->flags = FRAGMENT_COARSE_WRAPPER_FLAGS; |
| /* We don't have stub pc so can't fill in FRAG_IS_TRACE_HEAD -- so we rely |
| * on callers passing src info to fragment_lookup_fine_and_coarse() |
| */ |
| } |
| |
| /* If finds a coarse fragment for tag, returns wrapper; else returns NULL */ |
| fragment_t * |
| fragment_coarse_lookup_wrapper(dcontext_t *dcontext, app_pc tag, fragment_t *wrapper) |
| { |
| cache_pc coarse; |
| ASSERT(wrapper != NULL); |
| coarse = fragment_coarse_lookup(dcontext, tag); |
| if (coarse != NULL) { |
| fragment_coarse_wrapper(wrapper, tag, coarse); |
| return wrapper; |
| } |
| return NULL; |
| } |
| |
| /* Takes in last_exit in order to mark trace headness. |
| * FIXME case 8600: should we replace all other lookups with this one? |
| * Fragile to only have select callers use this and everyone else |
| * ignore coarse fragments... |
| */ |
| fragment_t * |
| fragment_lookup_fine_and_coarse(dcontext_t *dcontext, app_pc tag, |
| fragment_t *wrapper, linkstub_t *last_exit) |
| { |
| fragment_t *res = fragment_lookup(dcontext, tag); |
| if (DYNAMO_OPTION(coarse_units)) { |
| ASSERT(wrapper != NULL); |
| if (res == NULL) { |
| res = fragment_coarse_lookup_wrapper(dcontext, tag, wrapper); |
| /* FIXME: no way to know if source is a fine fragment! */ |
| if (res != NULL && last_exit == get_coarse_trace_head_exit_linkstub()) |
| res->flags |= FRAG_IS_TRACE_HEAD; |
| } else { |
| /* cannot have both coarse and fine shared bb for same tag |
| * (can have shared trace shadowing shared coarse trace head bb, |
| * or private bb with same tag) |
| */ |
| ASSERT(TEST(FRAG_IS_TRACE, res->flags) || |
| !TEST(FRAG_SHARED, res->flags) || |
| fragment_coarse_lookup(dcontext, tag) == NULL); |
| } |
| } |
| return res; |
| } |
| |
| /* Takes in last_exit in order to mark trace headness. |
| * FIXME: should we replace all other same_sharing lookups with this one? |
| * Fragile to only have select callers use this and everyone else |
| * ignore coarse fragments... |
| */ |
| fragment_t * |
| fragment_lookup_fine_and_coarse_sharing(dcontext_t *dcontext, app_pc tag, |
| fragment_t *wrapper, linkstub_t *last_exit, |
| uint share_flags) |
| { |
| fragment_t *res = |
| fragment_lookup_same_sharing(dcontext, tag, share_flags); |
| if (DYNAMO_OPTION(coarse_units) && TEST(FRAG_SHARED, share_flags)) { |
| ASSERT(wrapper != NULL); |
| if (res == NULL) { |
| res = fragment_coarse_lookup_wrapper(dcontext, tag, wrapper); |
| if (res != NULL && last_exit == get_coarse_trace_head_exit_linkstub()) |
| res->flags |= FRAG_IS_TRACE_HEAD; |
| } |
| } |
| return res; |
| } |
| |
| /* Returns the owning unit of f (versus get_executable_area_coarse_info(f->tag) |
| * which may return a frozen unit that must be checked for f along with its |
| * secondary unfrozen unit). |
| */ |
| coarse_info_t * |
| get_fragment_coarse_info(fragment_t *f) |
| { |
| /* We have multiple potential units per vmarea region, so we use the fcache |
| * pc to get the unambiguous owning unit |
| */ |
| if (!TEST(FRAG_COARSE_GRAIN, f->flags)) |
| return NULL; |
| ASSERT(FCACHE_ENTRY_PC(f) != NULL); |
| return get_fcache_coarse_info(FCACHE_ENTRY_PC(f)); |
| } |
| |
| /* Pass in info if you know it; else this routine will look it up. |
| * Checks for stub targeting a trace head, or targeting a trace thus |
| * indicating that this is a shadowed trace head. |
| * If body_in or info_in is NULL, this routine will look them up. |
| */ |
| bool |
| coarse_is_trace_head_in_own_unit(dcontext_t *dcontext, app_pc tag, cache_pc stub, |
| cache_pc body_in, bool body_valid, |
| coarse_info_t *info_in) |
| { |
| coarse_info_t *info = info_in; |
| ASSERT(stub != NULL); |
| if (coarse_is_trace_head(stub)) |
| return true; |
| if (info == NULL) |
| info = get_stub_coarse_info(stub); |
| if (info == NULL) |
| return false; |
| /* If a coarse stub is linked to a fine fragment and there exists |
| * a body for that target tag in the same unit as the stub, we assume that |
| * we have a shadowed coarse trace head. |
| */ |
| if (entrance_stub_linked(stub, info) && |
| /* Target is fine if no info for it */ |
| get_fcache_coarse_info(entrance_stub_jmp_target(stub)) == NULL) { |
| cache_pc body = body_in; |
| if (!body_valid) { |
| ASSERT(body == NULL); |
| fragment_coarse_lookup_in_unit(dcontext, info, tag, NULL, &body); |
| } |
| if (body != NULL) |
| return true; |
| } |
| return false; |
| } |
| |
| /* Returns whether an entry exists. */ |
| bool |
| fragment_coarse_replace(dcontext_t *dcontext, coarse_info_t *info, app_pc tag, |
| cache_pc new_value) |
| { |
| app_to_cache_t old_entry = {tag, NULL/*doesn't matter*/}; |
| app_to_cache_t new_entry = {tag, new_value}; |
| coarse_table_t *htable; |
| bool res = false; |
| ASSERT(info != NULL && info->htable != NULL); |
| htable = (coarse_table_t *) info->htable; |
| TABLE_RWLOCK(htable, read, lock); |
| res = hashtable_coarse_replace(old_entry, new_entry, info->htable); |
| TABLE_RWLOCK(htable, read, unlock); |
| return res; |
| } |
| |
| /************************************************** |
| * PC LOOKUP |
| */ |
| |
| /* Table for storing results of prior coarse pclookups (i#658) */ |
| #define PCLOOKUP_LAST_HTABLE_INIT_SIZE 6 /*should remain small*/ |
| |
| /* Alarm signals can result in many pclookups from a variety of places |
| * (unlike usual DGC patterns) so we bound the size of the table. |
| * We want to err on the side of using more memory, since failing to |
| * cache all the instrs that are writing DGC can result in 2x slowdowns. |
| * It's not worth fancy replacement: we just clear the table if it gets |
| * really large and start over. Remember that this is per-coarse-unit. |
| */ |
| #define PCLOOKUP_LAST_HTABLE_MAX_ENTRIES 8192 |
| |
| typedef struct _pclookup_last_t { |
| app_pc tag; |
| cache_pc entry; |
| } pclookup_last_t; |
| |
| static void |
| pclookup_last_free(pclookup_last_t *last) |
| { |
| HEAP_TYPE_FREE(GLOBAL_DCONTEXT, last, pclookup_last_t, ACCT_FRAG_TABLE, PROTECTED); |
| } |
| |
| /* Returns the tag for the coarse fragment whose body contains pc. |
| * If that returned tag != NULL, also returns the body pc in the optional OUT param. |
| * FIXME: verify not called too often: watch the kstat. |
| */ |
| app_pc |
| fragment_coarse_pclookup(dcontext_t *dcontext, coarse_info_t *info, cache_pc pc, |
| /*OUT*/cache_pc *body_out) |
| { |
| app_to_cache_t a2c; |
| coarse_table_t *htable; |
| generic_table_t *pc_htable; |
| pclookup_last_t *last; |
| cache_pc body_pc; |
| ssize_t closest_distance = SSIZE_T_MAX; |
| app_pc closest = NULL; |
| uint i; |
| ASSERT(info != NULL); |
| if (info->htable == NULL) |
| return NULL; |
| KSTART(coarse_pclookup); |
| htable = (coarse_table_t *) info->htable; |
| |
| if (info->pclookup_last_htable == NULL) { |
| /* lazily allocate table of all pclookups to avoid htable walk |
| * on frequent codemod instrs (i#658). |
| * I tried using a small array instead but chrome v8 needs at least |
| * 12 entries, and rather than LFU or LRU to avoid worst-case, |
| * it seems reasonable to simply store all lookups. |
| * Then the worst case is some extra memory, not 4x slowdowns. |
| */ |
| mutex_lock(&info->lock); |
| if (info->pclookup_last_htable == NULL) { |
| /* coarse_table_t isn't quite enough b/c we need the body pc, |
| * which would require an extra lookup w/ coarse_table_t |
| */ |
| pc_htable = generic_hash_create(GLOBAL_DCONTEXT, |
| PCLOOKUP_LAST_HTABLE_INIT_SIZE, |
| 80 /* load factor: not perf-critical */, |
| HASHTABLE_ENTRY_SHARED | HASHTABLE_SHARED | |
| HASHTABLE_RELAX_CLUSTER_CHECKS, |
| (void(*)(void*)) pclookup_last_free |
| _IF_DEBUG("pclookup last table")); |
| /* Only when fully initialized can we set it, as we hold no lock for it */ |
| info->pclookup_last_htable = (void *) pc_htable; |
| } |
| mutex_unlock(&info->lock); |
| } |
| |
| pc_htable = (generic_table_t *) info->pclookup_last_htable; |
| ASSERT(pc_htable != NULL); |
| TABLE_RWLOCK(pc_htable, read, lock); |
| last = (pclookup_last_t *) generic_hash_lookup(GLOBAL_DCONTEXT, pc_htable, |
| (ptr_uint_t)pc); |
| if (last != NULL) { |
| closest = last->tag; |
| ASSERT(pc >= last->entry); |
| closest_distance = pc - last->entry; |
| } |
| TABLE_RWLOCK(pc_htable, read, unlock); |
| |
| if (closest == NULL) { |
| /* do the htable walk */ |
| TABLE_RWLOCK(htable, read, lock); |
| for (i = 0; i < htable->capacity; i++) { |
| a2c = htable->table[i]; |
| /* must check for sentinel */ |
| if (A2C_ENTRY_IS_REAL(a2c)) { |
| a2c.app -= htable->mod_shift; |
| ASSERT(BOOLS_MATCH(info->frozen, info->cache_start_pc != NULL)); |
| /* for frozen, htable only holds body pc */ |
| a2c.cache += (ptr_uint_t) info->cache_start_pc; |
| /* We have no body length so we must walk entire table */ |
| coarse_body_from_htable_entry(dcontext, info, a2c.app, |
| a2c.cache, NULL, &body_pc); |
| if (body_pc != NULL && |
| body_pc <= pc && (pc - body_pc) < closest_distance) { |
| closest_distance = pc - body_pc; |
| closest = a2c.app; |
| } |
| } |
| } |
| if (closest != NULL) { |
| /* Update the cache of results. Note that since this is coarse we |
| * don't have to do anything special to invalidate on codemod as the |
| * whole coarse unit will be thrown out. |
| */ |
| TABLE_RWLOCK(pc_htable, write, lock); |
| /* Check for race (i#1191) */ |
| last = (pclookup_last_t *) generic_hash_lookup(GLOBAL_DCONTEXT, pc_htable, |
| (ptr_uint_t)pc); |
| if (last != NULL) { |
| closest = last->tag; |
| ASSERT(pc >= last->entry); |
| closest_distance = pc - last->entry; |
| } else { |
| last = HEAP_TYPE_ALLOC(GLOBAL_DCONTEXT, pclookup_last_t, |
| ACCT_FRAG_TABLE, PROTECTED); |
| last->tag = closest; |
| last->entry = pc - closest_distance; |
| |
| if (pc_htable->entries >= PCLOOKUP_LAST_HTABLE_MAX_ENTRIES) { |
| /* See notes above: we don't want an enormous table. |
| * We just clear rather than a fancy replacement policy. |
| */ |
| generic_hash_clear(GLOBAL_DCONTEXT, pc_htable); |
| } |
| |
| generic_hash_add(GLOBAL_DCONTEXT, pc_htable, (ptr_uint_t)pc, |
| (void *)last); |
| STATS_INC(coarse_pclookup_cached); |
| } |
| TABLE_RWLOCK(pc_htable, write, unlock); |
| } |
| TABLE_RWLOCK(htable, read, unlock); |
| } |
| |
| if (body_out != NULL) |
| *body_out = pc - closest_distance; |
| KSTOP(coarse_pclookup); |
| LOG(THREAD, LOG_FRAGMENT, 4, "%s: "PFX" => "PFX"\n", __FUNCTION__, pc, closest); |
| return closest; |
| } |
| |
| /* Creates a reverse lookup table. For a non-frozen unit, the caller should only |
| * do this while all threads are suspended, and should free the table before |
| * resuming other threads. |
| */ |
| void |
| fragment_coarse_create_entry_pclookup_table(dcontext_t *dcontext, coarse_info_t *info) |
| { |
| app_to_cache_t main_a2c; |
| app_to_cache_t pc_a2c; |
| coarse_table_t *main_htable; |
| coarse_table_t *pc_htable; |
| cache_pc body_pc; |
| uint i; |
| ASSERT(info != NULL); |
| if (info->htable == NULL) |
| return; |
| if (info->pclookup_htable == NULL) { |
| mutex_lock(&info->lock); |
| if (info->pclookup_htable == NULL) { |
| /* set up reverse lookup table */ |
| main_htable = (coarse_table_t *) info->htable; |
| pc_htable = NONPERSISTENT_HEAP_TYPE_ALLOC(GLOBAL_DCONTEXT, coarse_table_t, |
| ACCT_FRAG_TABLE); |
| hashtable_coarse_init(GLOBAL_DCONTEXT, pc_htable, main_htable->hash_bits, |
| DYNAMO_OPTION(coarse_pclookup_htable_load), |
| (hash_function_t)INTERNAL_OPTION(alt_hash_func), |
| 0 /* hash_mask_offset */, |
| HASHTABLE_ENTRY_SHARED | HASHTABLE_SHARED | |
| HASHTABLE_RELAX_CLUSTER_CHECKS |
| _IF_DEBUG("coarse pclookup htable")); |
| pc_htable->mod_shift = 0; |
| /* We give pc table a lower lock rank so we can add below |
| * while holding the lock, though the table is still local |
| * while we hold info->lock and do not write it out to its |
| * info-> field, so we could update the add() and |
| * deadlock-avoidance routines instead (xref case 9522). |
| * FIXME: add param to init() that takes in lock rank? |
| */ |
| ASSIGN_INIT_READWRITE_LOCK_FREE(pc_htable->rwlock, |
| coarse_pclookup_table_rwlock); |
| TABLE_RWLOCK(main_htable, read, lock); |
| TABLE_RWLOCK(pc_htable, write, lock); |
| for (i = 0; i < main_htable->capacity; i++) { |
| main_a2c = main_htable->table[i]; |
| /* must check for sentinel */ |
| if (A2C_ENTRY_IS_REAL(main_a2c)) { |
| main_a2c.app -= main_htable->mod_shift; |
| ASSERT(BOOLS_MATCH(info->frozen, info->cache_start_pc != NULL)); |
| coarse_body_from_htable_entry(dcontext, info, main_a2c.app, |
| main_a2c.cache |
| /* frozen htable only holds body pc */ |
| + (ptr_uint_t) info->cache_start_pc, |
| NULL, &body_pc); |
| if (body_pc != NULL) { |
| /* We can have two tags with the same cache pc, if |
| * one is a single jmp that was elided (but we only |
| * do that if -unsafe_freeze_elide_sole_ubr). |
| * It's unsafe b/c of case 9677: when translating back |
| * to app pc we will just take 1st one in linear walk of |
| * htable, which may be the wrong one! |
| */ |
| pc_a2c = hashtable_coarse_lookup(dcontext, |
| (ptr_uint_t)body_pc, pc_htable); |
| if (A2C_ENTRY_IS_EMPTY(pc_a2c)) { |
| pc_a2c.app = body_pc; |
| pc_a2c.cache = main_a2c.app; |
| hashtable_coarse_add(dcontext, pc_a2c, pc_htable); |
| } else { |
| ASSERT(DYNAMO_OPTION(unsafe_freeze_elide_sole_ubr)); |
| } |
| } |
| } |
| } |
| TABLE_RWLOCK(pc_htable, write, unlock); |
| TABLE_RWLOCK(main_htable, read, unlock); |
| /* Only when fully initialized can we set it, as we hold no lock for it */ |
| info->pclookup_htable = (void *) pc_htable; |
| } |
| mutex_unlock(&info->lock); |
| } |
| } |
| |
| /* Returns the tag for the coarse fragment whose body _begins at_ pc */ |
| app_pc |
| fragment_coarse_entry_pclookup(dcontext_t *dcontext, coarse_info_t *info, cache_pc pc) |
| { |
| app_to_cache_t pc_a2c; |
| coarse_table_t *pc_htable; |
| cache_pc body_pc; |
| app_pc res = NULL; |
| ASSERT(info != NULL); |
| if (info->htable == NULL) |
| return NULL; |
| /* FIXME: we could use this table for non-frozen if we updated it |
| * when we add to main htable; for now we only support frozen use |
| */ |
| if (!DYNAMO_OPTION(coarse_pclookup_table) || |
| /* we do create a table for non-frozen while we're freezing (i#735) */ |
| (!info->frozen && info->pclookup_htable == NULL)) { |
| res = fragment_coarse_pclookup(dcontext, info, pc, &body_pc); |
| if (body_pc == pc) { |
| LOG(THREAD, LOG_FRAGMENT, 4, "%s: "PFX" => "PFX"\n", __FUNCTION__, pc, res); |
| return res; |
| } else |
| return NULL; |
| } |
| KSTART(coarse_pclookup); |
| |
| if (info->pclookup_htable == NULL) { |
| fragment_coarse_create_entry_pclookup_table(dcontext, info); |
| } |
| |
| pc_htable = (coarse_table_t *) info->pclookup_htable; |
| ASSERT(pc_htable != NULL); |
| TABLE_RWLOCK(pc_htable, read, lock); |
| pc_a2c = hashtable_coarse_lookup(dcontext, (ptr_uint_t)pc, pc_htable); |
| if (!A2C_ENTRY_IS_EMPTY(pc_a2c)) |
| res = pc_a2c.cache; |
| TABLE_RWLOCK(pc_htable, read, unlock); |
| KSTOP(coarse_pclookup); |
| LOG(THREAD, LOG_FRAGMENT, 4, "%s: "PFX" => "PFX"\n", __FUNCTION__, pc, res); |
| return res; |
| } |
| |
| /* case 9900: must have dynamo_all_threads_synched since we haven't resolved |
| * lock rank ordering issues with the hashtable locks |
| */ |
| static void |
| fragment_coarse_entry_freeze(dcontext_t *dcontext, coarse_freeze_info_t *freeze_info, |
| pending_freeze_t *pending) |
| { |
| app_to_cache_t frozen_a2c; |
| app_to_cache_t looka2c = {0,0}; |
| coarse_table_t *frozen_htable; |
| cache_pc tgt; |
| if (pending->entrance_stub) { |
| frozen_htable = (coarse_table_t *) freeze_info->dst_info->th_htable; |
| /* case 9900: rank order conflict with coarse_info_incoming_lock: |
| * do not grab frozen_htable write lock (only need read but we were |
| * grabbing write to match assert): mark is_local instead and rely |
| * on dynamo_all_threads_synched |
| */ |
| DODEBUG({ frozen_htable->is_local = true; }); |
| ASSERT_NOT_IMPLEMENTED(dynamo_all_threads_synched && "case 9900"); |
| } else { |
| frozen_htable = (coarse_table_t *) freeze_info->dst_info->htable; |
| } |
| ASSERT_OWN_WRITE_LOCK(!frozen_htable->is_local, &frozen_htable->rwlock); |
| looka2c = coarse_lookup_internal(dcontext, pending->tag, frozen_htable); |
| if (A2C_ENTRY_IS_EMPTY(looka2c)) { |
| frozen_a2c.app = pending->tag; |
| if (pending->entrance_stub) { |
| /* add inter-unit/trace-head stub to htable */ |
| LOG(THREAD, LOG_FRAGMENT, 4, |
| " adding pending stub "PFX"."PFX" => "PFX"\n", |
| pending->tag, pending->cur_pc, freeze_info->stubs_cur_pc); |
| frozen_a2c.cache = (cache_pc) |
| (freeze_info->stubs_cur_pc - freeze_info->stubs_start_pc); |
| hashtable_coarse_add(dcontext, frozen_a2c, frozen_htable); |
| /* copy to new stubs area */ |
| transfer_coarse_stub(dcontext, freeze_info, pending->cur_pc, |
| pending->trace_head, true/*replace*/); |
| } else { |
| /* fall-through optimization */ |
| if (DYNAMO_OPTION(coarse_freeze_elide_ubr) && |
| pending->link_cti_opnd != NULL && |
| pending->link_cti_opnd + 4 == freeze_info->cache_cur_pc && |
| pending->elide_ubr) { |
| ASSERT(!pending->trace_head); |
| LOG(THREAD, LOG_FRAGMENT, 4, " fall-through opt from prev fragment\n"); |
| freeze_info->cache_cur_pc -= JMP_LONG_LENGTH; |
| pending->link_cti_opnd = NULL; |
| STATS_INC(coarse_freeze_fallthrough); |
| DODEBUG({ freeze_info->num_elisions++; }); |
| } |
| LOG(THREAD, LOG_FRAGMENT, 4, |
| " adding pending %sfragment "PFX"."PFX" => "PFX"\n", |
| pending->trace_head ? "trace head " : "", |
| pending->tag, pending->cur_pc, freeze_info->cache_cur_pc); |
| /* add to htable the offset from start of cache */ |
| frozen_a2c.cache = (cache_pc) |
| (freeze_info->cache_cur_pc - freeze_info->cache_start_pc); |
| hashtable_coarse_add(dcontext, frozen_a2c, frozen_htable); |
| /* copy to new cache */ |
| transfer_coarse_fragment(dcontext, freeze_info, pending->cur_pc); |
| } |
| tgt = frozen_a2c.cache; |
| } else { |
| tgt = looka2c.cache; |
| /* Should not hit any links to TH, so should hit once, from htable walk */ |
| ASSERT(!pending->trace_head || pending->entrance_stub); |
| /* May have added entrance stub for intra-unit TH as non-TH if it was |
| * linked to a trace, since didn't have body pc at the time, so we |
| * fix up here on the proactive add when adding its body |
| */ |
| if (pending->entrance_stub && pending->trace_head && freeze_info->unlink) { |
| cache_pc abs_tgt = tgt + (ptr_uint_t)freeze_info->stubs_start_pc; |
| transfer_coarse_stub_fix_trace_head(dcontext, freeze_info, abs_tgt); |
| } |
| } |
| if (pending->link_cti_opnd != NULL) { |
| /* fix up incoming link */ |
| cache_pc patch_tgt = (cache_pc) |
| (((ptr_uint_t)(pending->entrance_stub ? freeze_info->stubs_start_pc : |
| freeze_info->cache_start_pc)) + tgt); |
| ASSERT(!pending->trace_head || pending->entrance_stub); |
| LOG(THREAD, LOG_FRAGMENT, 4, " patch link "PFX" => "PFX"."PFX"%s\n", |
| pending->link_cti_opnd, pending->tag, patch_tgt, |
| pending->entrance_stub ? " stub" : ""); |
| insert_relative_target(pending->link_cti_opnd, patch_tgt, NOT_HOT_PATCHABLE); |
| } |
| if (pending->entrance_stub) { |
| DODEBUG({ frozen_htable->is_local = false; }); |
| } |
| } |
| |
| /* There are several strategies for copying each fragment and non-inter-unit stub |
| * to new, compact storage. Here we use a cache-driven approach, necessarily |
| * augmented with the htable as we cannot find tags for arbitrary fragments in |
| * the cache. We use a pending-add stack to avoid a second pass. |
| * By adding ubr targets last, we can elide fall-through jmps. |
| * FIXME case 9428: |
| * - Sorting entrance stubs for faster lazy linking lookup |
| * - Use 8-bit-relative jmps when possible for compaction, though this |
| * requires pc translation support and probably an extra pass when freezing. |
| * |
| * Tasks: |
| * Copy each new fragment and non-inter-unit stub to the new region |
| * Unlink all inter-unit entrance stubs, unless freezing and not |
| * writing to disk. |
| * Re-target intra-unit jmps from old entrance stub to new fragment location |
| * Re-target jmp to stub |
| * Re-target indirect stubs to new prefix |
| * Re-target inter-unit and trace head stubs to new prefix |
| * |
| * case 9900: must have dynamo_all_threads_synched since we haven't resolved |
| * lock rank ordering issues with the hashtable locks |
| */ |
| void |
| fragment_coarse_unit_freeze(dcontext_t *dcontext, coarse_freeze_info_t *freeze_info) |
| { |
| pending_freeze_t pending_local; |
| pending_freeze_t *pending; |
| app_to_cache_t a2c; |
| coarse_table_t *frozen_htable; |
| coarse_table_t *htable; |
| cache_pc body_pc; |
| uint i; |
| ASSERT(freeze_info != NULL && freeze_info->src_info != NULL); |
| if (freeze_info->src_info->htable == NULL) |
| return; |
| LOG(THREAD, LOG_FRAGMENT, 2, |
| "freezing fragments in %s\n", freeze_info->src_info->module); |
| |
| /* we walk just the main htable and find trace heads by asking for the body pc */ |
| htable = (coarse_table_t *) freeze_info->src_info->htable; |
| DOSTATS({ |
| LOG(THREAD, LOG_ALL, 1, |
| "htable pre-freezing %s\n", freeze_info->src_info->module); |
| hashtable_coarse_study(dcontext, htable, 0/*clean state*/); |
| }); |
| frozen_htable = (coarse_table_t *) freeze_info->dst_info->htable; |
| /* case 9900: rank order conflict with coarse_info_incoming_lock: |
| * do not grab frozen_htable write lock: mark is_local instead and rely |
| * on dynamo_all_threads_synched |
| */ |
| DODEBUG({ frozen_htable->is_local = true; }); |
| ASSERT_NOT_IMPLEMENTED(dynamo_all_threads_synched && "case 9900"); |
| /* FIXME case 9522: not grabbing TABLE_RWLOCK(htable, read, lock) due to |
| * rank order violation with frozen_htable write lock! We go w/ the write |
| * lock since lookup routines check for it. To solve we'll need a way to |
| * tell deadlock checking that frozen_htable is private to this thread and |
| * that no other thread can hold its lock. For now we only support |
| * all-synch, but if later we want !in_place that needs no synch we'll need |
| * a solution. |
| */ |
| ASSERT_NOT_IMPLEMENTED(dynamo_all_threads_synched && "case 9522"); |
| /* FIXME: we're doing htable order, better to use either tag (original app) |
| * or cache (execution) order? |
| */ |
| for (i = 0; i < htable->capacity || freeze_info->pending != NULL; i++) { |
| |
| /* Process pending entries first; then continue through htable */ |
| while (freeze_info->pending != NULL) { |
| pending = freeze_info->pending; |
| freeze_info->pending = pending->next; |
| fragment_coarse_entry_freeze(dcontext, freeze_info, pending); |
| HEAP_TYPE_FREE(dcontext, pending, pending_freeze_t, |
| ACCT_MEM_MGT/*appropriate?*/, UNPROTECTED); |
| } |
| |
| if (i < htable->capacity) { |
| a2c = htable->table[i]; |
| /* must check for sentinel */ |
| if (!A2C_ENTRY_IS_REAL(a2c)) |
| continue; |
| } else |
| continue; |
| |
| LOG(THREAD, LOG_FRAGMENT, 4, |
| " %d app="PFX", cache="PFX"\n", i, a2c.app, a2c.cache); |
| coarse_body_from_htable_entry(dcontext, freeze_info->src_info, a2c.app, |
| a2c.cache, NULL, &body_pc); |
| |
| if (body_pc == NULL) { |
| /* We add only when targeted by fragments, so we don't have to |
| * figure out multiple times whether intra-unit or not |
| */ |
| LOG(THREAD, LOG_FRAGMENT, 4, |
| " ignoring entrance stub "PFX"\n", a2c.cache); |
| } else { |
| pending_local.tag = a2c.app; |
| pending_local.cur_pc = body_pc; |
| pending_local.entrance_stub = false; |
| pending_local.link_cti_opnd = NULL; |
| pending_local.elide_ubr = true; /* doesn't matter since no link */ |
| pending_local.trace_head = |
| coarse_is_trace_head_in_own_unit(dcontext, a2c.app, a2c.cache, |
| body_pc, true, freeze_info->src_info); |
| pending_local.next = NULL; |
| fragment_coarse_entry_freeze(dcontext, freeze_info, &pending_local); |
| |
| if (pending_local.trace_head) { |
| /* we do need to proactively add the entrance stub, in |
| * case it is only targeted by an indirect branch |
| */ |
| LOG(THREAD, LOG_FRAGMENT, 4, |
| " adding trace head entrance stub "PFX"\n", a2c.cache); |
| pending_local.tag = a2c.app; |
| pending_local.cur_pc = a2c.cache; |
| pending_local.entrance_stub = true; |
| pending_local.link_cti_opnd = NULL; |
| pending_local.elide_ubr = true; /* doesn't matter since no link */ |
| pending_local.trace_head = true; |
| fragment_coarse_entry_freeze(dcontext, freeze_info, &pending_local); |
| } |
| } |
| } |
| DODEBUG({ frozen_htable->is_local = false; }); |
| DOSTATS({ |
| /* The act of freezing tends to improve hashtable layout */ |
| LOG(THREAD, LOG_ALL, 1, |
| "htable post-freezing %s\n", freeze_info->src_info->module); |
| hashtable_coarse_study(dcontext, frozen_htable, 0/*clean state*/); |
| }); |
| } |
| |
| uint |
| fragment_coarse_htable_persist_size(dcontext_t *dcontext, coarse_info_t *info, |
| bool cache_table) |
| { |
| coarse_table_t *htable = (coarse_table_t *) |
| (cache_table ? info->htable : info->th_htable); |
| return hashtable_coarse_persist_size(dcontext, htable); |
| } |
| |
| /* Returns true iff all writes succeeded. */ |
| bool |
| fragment_coarse_htable_persist(dcontext_t *dcontext, coarse_info_t *info, |
| bool cache_table, file_t fd) |
| { |
| coarse_table_t *htable = (coarse_table_t *) |
| (cache_table ? info->htable : info->th_htable); |
| ASSERT(fd != INVALID_FILE); |
| return hashtable_coarse_persist(dcontext, htable, fd); |
| } |
| |
| void |
| fragment_coarse_htable_resurrect(dcontext_t *dcontext, coarse_info_t *info, |
| bool cache_table, byte *mapped_table) |
| { |
| coarse_table_t **htable = (coarse_table_t **) |
| (cache_table ? &info->htable : &info->th_htable); |
| ASSERT(info->frozen); |
| ASSERT(mapped_table != NULL); |
| ASSERT(*htable == NULL); |
| *htable = hashtable_coarse_resurrect(dcontext, mapped_table |
| _IF_DEBUG(cache_table ? |
| "persisted cache htable" : |
| "persisted stub htable")); |
| (*htable)->mod_shift = info->mod_shift; |
| /* generally want to keep basic alignment */ |
| ASSERT_CURIOSITY(ALIGNED((*htable)->table, sizeof(app_pc))); |
| } |
| |
| /*******************************************************************************/ |