| /* ********************************************************** |
| * Copyright (c) 2011-2014 Google, Inc. All rights reserved. |
| * Copyright (c) 2006-2010 VMware, Inc. All rights reserved. |
| * **********************************************************/ |
| |
| /* |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are met: |
| * |
| * * Redistributions of source code must retain the above copyright notice, |
| * this list of conditions and the following disclaimer. |
| * |
| * * Redistributions in binary form must reproduce the above copyright notice, |
| * this list of conditions and the following disclaimer in the documentation |
| * and/or other materials provided with the distribution. |
| * |
| * * Neither the name of VMware, Inc. nor the names of its contributors may be |
| * used to endorse or promote products derived from this software without |
| * specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| * ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE |
| * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
| * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
| * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH |
| * DAMAGE. |
| */ |
| |
| /* Copyright (c) 2006-2007 Determina Corp. */ |
| |
| /* |
| * perscache.c - coarse-grain units and persistent cache management |
| */ |
| |
| #include "globals.h" |
| #include "link.h" |
| #include "fragment.h" |
| #include "fcache.h" |
| #include "monitor.h" |
| #include "perscache.h" |
| #include "instr.h" |
| #include "decode_fast.h" |
| #include "hotpatch.h" |
| #include "synch.h" |
| #include "module_shared.h" |
| #include <string.h> /* for memset */ |
| #include <stddef.h> /* for offsetof */ |
| #ifdef CLIENT_INTERFACE |
| # include "instrument.h" |
| #endif |
| |
| #ifdef DEBUG |
| # include "disassemble.h" |
| #endif |
| |
| #define MAX_PCACHE_OPTIONS_STRING (MAX_OPTIONS_STRING/2) |
| /* case 10823: align option string to keep hashtable data aligned. |
| * we're not using a cache-line-aligned lookuptable. */ |
| #define OPTION_STRING_ALIGNMENT (sizeof(app_pc)) |
| /* in general we want new data sections aligned to keep hashtable aligned */ |
| #define CLIENT_ALIGNMENT (sizeof(app_pc)) |
| |
| /* used while merging */ |
| typedef struct _jmp_tgt_list_t { |
| app_pc tag; |
| cache_pc jmp_end_pc; |
| struct _jmp_tgt_list_t *next; |
| } jmp_tgt_list_t; |
| |
| /* Forward decls */ |
| static void |
| persist_calculate_module_digest(module_digest_t *digest, app_pc modbase, size_t modsize, |
| app_pc code_start, app_pc code_end, |
| uint validation_option); |
| static bool |
| get_persist_dir(char *directory /* OUT */, |
| uint directory_len, |
| bool create); |
| |
| #if defined(DEBUG) && defined(INTERNAL) |
| static void |
| print_module_digest(file_t f, module_digest_t *digest, const char *prefix); |
| #endif |
| |
| static void |
| coarse_unit_shift_jmps(dcontext_t *dcontext, coarse_info_t *info, |
| ssize_t cache_shift, ssize_t stubs_shift, size_t old_mapsz); |
| |
| static void |
| coarse_unit_merge_persist_info(dcontext_t *dcontext, coarse_info_t *dst, |
| coarse_info_t *info1, coarse_info_t *info2); |
| |
| #ifdef DEBUG |
| /* used below for pcache_dir_check_permissions() */ |
| DECLARE_CXTSWPROT_VAR(static mutex_t pcache_dir_check_lock, |
| INIT_LOCK_FREE(pcache_dir_check_lock)); |
| #endif |
| |
| /*************************************************************************** |
| * COARSE-GRAIN UNITS |
| */ |
| |
| /* case 9653/10380: only one coarse unit in a module's +x region(s) is persisted */ |
| static void |
| coarse_unit_mark_primary(coarse_info_t *info) |
| { |
| if (!info->in_use) |
| return; |
| #ifdef WINDOWS |
| /* FIXME PR 295529: put in for Linux once we have per-module flags */ |
| /* Go ahead and get write lock up front; else have to check again; not |
| * frequently called so don't need perf opt here. |
| */ |
| os_get_module_info_write_lock(); |
| if (!os_module_get_flag(info->base_pc, MODULE_HAS_PRIMARY_COARSE)) { |
| os_module_set_flag(info->base_pc, MODULE_HAS_PRIMARY_COARSE); |
| ASSERT(os_module_get_flag(info->base_pc, MODULE_HAS_PRIMARY_COARSE)); |
| info->primary_for_module = true; |
| LOG(GLOBAL, LOG_CACHE, 1, "marking "PFX"-"PFX" as primary coarse for %s\n", |
| info->base_pc, info->end_pc, info->module); |
| } |
| os_get_module_info_write_unlock(); |
| #else |
| info->primary_for_module = true; |
| #endif |
| } |
| |
| static void |
| coarse_unit_unmark_primary(coarse_info_t *info) |
| { |
| #ifdef WINDOWS |
| /* FIXME PR 295529: put in for Linux once we have per-module flags */ |
| if (info->primary_for_module && info->in_use) { |
| ASSERT(os_module_get_flag(info->base_pc, MODULE_HAS_PRIMARY_COARSE)); |
| os_module_clear_flag(info->base_pc, MODULE_HAS_PRIMARY_COARSE); |
| info->primary_for_module = false; |
| } |
| #else |
| info->primary_for_module = false; |
| #endif |
| } |
| |
| void |
| coarse_unit_mark_in_use(coarse_info_t *info) |
| { |
| info->in_use = true; |
| coarse_unit_mark_primary(info); |
| } |
| |
| coarse_info_t * |
| coarse_unit_create(app_pc base_pc, app_pc end_pc, module_digest_t *digest, |
| bool for_execution) |
| { |
| coarse_info_t *info = HEAP_TYPE_ALLOC(GLOBAL_DCONTEXT, coarse_info_t, |
| /* FIXME: have separate heap acct? */ |
| ACCT_VMAREAS, PROTECTED); |
| memset(info, 0, sizeof(*info)); |
| ASSIGN_INIT_LOCK_FREE(info->lock, coarse_info_lock); |
| ASSIGN_INIT_LOCK_FREE(info->incoming_lock, coarse_info_incoming_lock); |
| info->base_pc = base_pc; |
| /* XXX i#704: handle overflow: better to store size */ |
| info->end_pc = end_pc; |
| /* FIXME: set PERSCACHE_X86_{32,64} here since for x64 the live |
| * unit's flags are used for 32-bit code in 64-bit processes. |
| * app_memory_allocation() may need a "bool x86_mode" param that is at |
| * least passed in to here: not clear if it should be stored in the |
| * vmarea. |
| */ |
| DODEBUG({ |
| info->is_local = false; |
| info->module = os_get_module_name_strdup(info->base_pc HEAPACCT(ACCT_VMAREAS)); |
| if (info->module == NULL) { |
| /* else our LOG statements will crash */ |
| info->module = dr_strdup("" HEAPACCT(ACCT_VMAREAS)); |
| } |
| }); |
| if (for_execution) |
| coarse_unit_mark_in_use(info); |
| if (digest != NULL) { |
| memcpy(&info->module_md5, digest, sizeof(info->module_md5)); |
| } else if (TEST(PERSCACHE_MODULE_MD5_AT_LOAD, |
| DYNAMO_OPTION(persist_gen_validation))) { |
| /* case 9735: calculate the module md5 at load time so we have a consistent |
| * point at which to compare it when loading in a persisted cache file. |
| * If we inject at different points we may see different views of |
| * post-loader vs pre-loader module changes but we'll live with that. |
| * Should have consistent injection points in steady state usage. |
| * FIXME PR 215036: for 4.4 we'll want to not record the at-mmap md5, but |
| * rather the 1st-execution-time post-rebase md5. |
| */ |
| app_pc modbase = get_module_base(info->base_pc); |
| size_t modsize; |
| os_get_module_info_lock(); |
| /* For linux we can't do module segment walking at initial mmap time |
| * b/c the segments are not set up: we hit SIGBUS! |
| */ |
| IF_UNIX(ASSERT_BUG_NUM(215036, true)); |
| if (os_get_module_info(modbase, NULL, NULL, &modsize, |
| NULL, NULL, NULL)) { |
| os_get_module_info_unlock(); |
| persist_calculate_module_digest(&info->module_md5, modbase, modsize, |
| info->base_pc, info->end_pc, |
| DYNAMO_OPTION(persist_gen_validation)); |
| DOLOG(1, LOG_CACHE, { |
| print_module_digest(GLOBAL, &info->module_md5, "md5 at load time: "); |
| }); |
| } else |
| os_get_module_info_unlock(); |
| } |
| /* the rest is initialized lazily in coarse_unit_init() */ |
| RSTATS_ADD_PEAK(num_coarse_units, 1); |
| return info; |
| } |
| |
| void |
| coarse_unit_free(dcontext_t *dcontext, coarse_info_t *info) |
| { |
| ASSERT(info != NULL); |
| /* Elements should have been freed in coarse_unit_reset_free() */ |
| ASSERT(info->htable == NULL); |
| ASSERT(info->th_htable == NULL); |
| ASSERT(info->pclookup_htable == NULL); |
| ASSERT(info->cache == NULL); |
| ASSERT(info->incoming == NULL); |
| ASSERT(info->stubs == NULL); |
| ASSERT(info->cache_start_pc == NULL); |
| ASSERT(info->stubs_start_pc == NULL); |
| DODEBUG({ |
| if (info->module != NULL) |
| dr_strfree(info->module HEAPACCT(ACCT_VMAREAS)); |
| }); |
| DELETE_LOCK(info->lock); |
| DELETE_LOCK(info->incoming_lock); |
| HEAP_TYPE_FREE(GLOBAL_DCONTEXT, info, coarse_info_t, |
| ACCT_VMAREAS, PROTECTED); |
| RSTATS_DEC(num_coarse_units); |
| } |
| |
| void |
| coarse_unit_init(coarse_info_t *info, void *cache) |
| { |
| ASSERT(info != NULL); |
| ASSERT(cache != NULL); |
| ASSERT_OWN_MUTEX(true, &info->lock); |
| fragment_coarse_htable_create(info, 0, 0); |
| coarse_stubs_create(info, NULL, 0); |
| /* cache is passed in since it can't be created while holding info->lock */ |
| info->cache = cache; |
| } |
| |
| /* If caller holds change_linking_lock and info->lock, have_locks should be true. |
| * If !need_info_lock, info must be a thread-local, unlinked, private pointer! |
| */ |
| static void |
| coarse_unit_reset_free_internal(dcontext_t *dcontext, coarse_info_t *info, |
| bool have_locks, bool unlink, bool abdicate_primary, |
| bool need_info_lock) |
| { |
| DEBUG_DECLARE(bool ok;) |
| ASSERT(info != NULL); |
| LOG(THREAD, LOG_CACHE, 2, "coarse_unit_reset_free %s\n", info->module); |
| if (!have_locks) { |
| /* Though only called during all-threads-synch, we still grab our lock here */ |
| /* higher rank than info, needed for unlink */ |
| if (unlink) |
| acquire_recursive_lock(&change_linking_lock); |
| if (need_info_lock) |
| mutex_lock(&info->lock); |
| } |
| ASSERT(!unlink || self_owns_recursive_lock(&change_linking_lock)); |
| ASSERT_OWN_MUTEX(need_info_lock, &info->lock); |
| ASSERT(need_info_lock || !unlink); /* else will get deadlock */ |
| /* case 11064: avoid rank order */ |
| DODEBUG({ |
| if (!need_info_lock) |
| info->is_local = true; |
| }); |
| if (unlink) |
| coarse_unit_unlink(dcontext, info); |
| fragment_coarse_htable_free(info); |
| coarse_stubs_delete(info); |
| fcache_coarse_cache_delete(dcontext, info); |
| if (info->in_use && abdicate_primary) |
| coarse_unit_unmark_primary(info); |
| if (info->frozen) { |
| ASSERT(info->mmap_size > 0); |
| if (info->persisted) { |
| #if defined(RETURN_AFTER_CALL) || defined(RCT_IND_BRANCH) |
| if (info->in_use) { |
| rct_module_table_persisted_invalidate(dcontext, info->base_pc); |
| } |
| #endif |
| /* We use GLOBAL_DCONTEXT always for these in case in use */ |
| #ifdef RCT_IND_BRANCH |
| if (info->rct_table != NULL) |
| rct_table_free(GLOBAL_DCONTEXT, info->rct_table, false/*data mmapped*/); |
| #endif |
| #ifdef RETURN_AFTER_CALL |
| if (info->rac_table != NULL) |
| rct_table_free(GLOBAL_DCONTEXT, info->rac_table, false/*data mmapped*/); |
| #endif |
| ASSERT(info->mmap_pc != NULL); |
| if (info->mmap_ro_size > 0) { |
| /* two views */ |
| DEBUG_DECLARE(ok =) unmap_file(info->mmap_pc, info->mmap_ro_size); |
| ASSERT(ok); |
| DEBUG_DECLARE(ok =) unmap_file(info->mmap_pc+info->mmap_ro_size, |
| info->mmap_size-info->mmap_ro_size); |
| ASSERT(ok); |
| info->mmap_ro_size = 0; |
| } else { |
| DEBUG_DECLARE(ok =) unmap_file(info->mmap_pc, info->mmap_size); |
| ASSERT(ok); |
| } |
| if (DYNAMO_OPTION(persist_lock_file)) { |
| ASSERT(info->fd != INVALID_FILE); |
| os_close(info->fd); |
| info->fd = INVALID_FILE; |
| } |
| } else { |
| ASSERT(info->cache_start_pc != NULL); |
| ASSERT(info->stubs_start_pc != NULL); |
| ASSERT(info->mmap_ro_size == 0); |
| heap_munmap(info->cache_start_pc, info->mmap_size); |
| if (info->has_persist_info) { |
| /* Persisted units point at their mmaps for these structures; |
| * non-persisted dynamically allocate them from DR heap. |
| */ |
| #ifdef RCT_IND_BRANCH |
| if (info->rct_table != NULL) |
| rct_table_free(dcontext, info->rct_table, true); |
| #endif |
| #ifdef RETURN_AFTER_CALL |
| if (info->rac_table != NULL) |
| rct_table_free(dcontext, info->rac_table, true); |
| #endif |
| #ifdef HOT_PATCHING_INTERFACE |
| if (info->hotp_ppoint_vec != NULL) { |
| HEAP_ARRAY_FREE(dcontext, info->hotp_ppoint_vec, app_rva_t, |
| info->hotp_ppoint_vec_num, ACCT_HOT_PATCHING, PROTECTED); |
| } |
| #endif |
| } |
| } |
| } else { |
| ASSERT(info->mmap_size == 0); |
| ASSERT(info->cache_start_pc == NULL); |
| ASSERT(info->stubs_start_pc == NULL); |
| ASSERT(!info->has_persist_info); |
| } |
| /* This struct may be re-used for a non-frozen/persisted unit if it was reset due |
| * to a non-cache-consistency reason. Thus we want to preserve the locks, vm |
| * region, and md5, but clear everything else (case 10119). |
| */ |
| memset(info, 0, offsetof(coarse_info_t, lock)); |
| if (!have_locks) { |
| if (need_info_lock) |
| mutex_unlock(&info->lock); |
| if (unlink) |
| release_recursive_lock(&change_linking_lock); |
| } |
| } |
| |
| /* If caller holds change_linking_lock and info->lock, have_locks should be true */ |
| void |
| coarse_unit_reset_free(dcontext_t *dcontext, coarse_info_t *info, |
| bool have_locks, bool unlink, bool abdicate_primary) |
| { |
| coarse_unit_reset_free_internal(dcontext, info, |
| have_locks, unlink, abdicate_primary, |
| true/*need_info_lock*/); |
| } |
| |
| /* currently only one such directory expected matching |
| * primary user token, see case 8812 |
| */ |
| static file_t perscache_user_directory = INVALID_FILE; |
| |
| void |
| perscache_init(void) |
| { |
| if (DYNAMO_OPTION(use_persisted) && |
| DYNAMO_OPTION(persist_per_user) && |
| DYNAMO_OPTION(validate_owner_dir)) { |
| char dir[MAXIMUM_PATH]; |
| |
| /* case 8812 we need to hold a handle to the user directory |
| * from startup (we could delay until we open our first pcache file) |
| */ |
| if (get_persist_dir(dir, BUFFER_SIZE_ELEMENTS(dir), |
| true /* note we MUST always create directory |
| * even if never persisting */ |
| )) { |
| /* we just need READ_CONTROL (on Windows) to check |
| * ownership, and we are NOT OK with the directory being |
| * renamed (or deleted and recreated by a malactor) while |
| * we still have a handle to it. |
| */ |
| perscache_user_directory = os_open_directory(dir, 0); |
| ASSERT(perscache_user_directory != INVALID_FILE); |
| |
| /* note that now that we have the actual handle open, we can validate */ |
| /* see os_current_user_directory() for details */ |
| if (perscache_user_directory != INVALID_FILE && |
| !os_validate_user_owned(perscache_user_directory)) { |
| SYSLOG_INTERNAL_ERROR("%s is OWNED by an impostor!" |
| " Persistent cache use is disabled.", |
| dir); |
| os_close(perscache_user_directory); |
| perscache_user_directory = INVALID_FILE; |
| /* we could also turn off use_persisted */ |
| } else { |
| /* either FAT32 or we are the proper owner */ |
| |
| /* FIXME: we have to verify that the final permissions and |
| * sharing attributes for cache/ and for the current |
| * directory, do NOT allow anyone to rename our directory |
| * while in use, and replace it. Otherwise we'd still |
| * have to verify owner for each file as well with |
| * -validate_owner_file. See duplicate comment in |
| * open_relocated_dlls_filecache_directory() |
| */ |
| } |
| } |
| } |
| } |
| |
| void |
| perscache_fast_exit(void) |
| { |
| if (DYNAMO_OPTION(coarse_freeze_at_exit)) { |
| coarse_units_freeze_all(false/*!in place*/); |
| } |
| |
| if (perscache_user_directory != INVALID_FILE) { |
| ASSERT_CURIOSITY(DYNAMO_OPTION(validate_owner_dir)); |
| os_close(perscache_user_directory); |
| perscache_user_directory = INVALID_FILE; |
| } |
| ASSERT(perscache_user_directory == INVALID_FILE); |
| } |
| |
| void |
| perscache_slow_exit(void) |
| { |
| DODEBUG(DELETE_LOCK(pcache_dir_check_lock);); |
| } |
| |
| /*************************************************************************** |
| * FROZEN UNITS |
| */ |
| |
| /* Separated out to keep priv_mcontext_t out of critical stack path */ |
| static void |
| coarse_units_freeze_translate(thread_record_t *tr, |
| const thread_synch_state_t desired_state) |
| { |
| priv_mcontext_t mc; |
| bool res; |
| res = thread_get_mcontext(tr, &mc); |
| ASSERT(res); |
| /* We're freeing coarse fragments so we must translate all |
| * threads who are currently in a coarse unit, or about |
| * to enter one (case 10030). We don't translate threads |
| * in fine-grained caches as an optimization. |
| * If we did one unit at a time, could compare to just that unit. |
| */ |
| if (!res || !in_fcache((cache_pc)mc.pc) || |
| get_fcache_coarse_info((cache_pc)mc.pc) != NULL) { |
| /* FIXME optimization: pass cxt for translation */ |
| translate_from_synchall_to_dispatch(tr, desired_state); |
| } else { |
| LOG(GLOBAL, LOG_FRAGMENT, 2, |
| "\tin fine-grained cache so no translation needed\n"); |
| } |
| } |
| |
| /* If !in_place this routine freezes (if not already) and persists. |
| * FIXME case 9975: provide support for freezing in place and |
| * persisting in one call? Should we support loading in a newly |
| * persisted version to replace the in-memory unit? |
| */ |
| void |
| coarse_units_freeze_all(bool in_place) |
| { |
| thread_record_t **threads = NULL; |
| int i, num_threads = 0; |
| bool own_synch; |
| dcontext_t *my_dcontext = get_thread_private_dcontext(); |
| const thread_synch_state_t desired_state = |
| THREAD_SYNCH_SUSPENDED_VALID_MCONTEXT_OR_NO_XFER; |
| if (!DYNAMO_OPTION(coarse_units) || !DYNAMO_OPTION(coarse_enable_freeze) || |
| RUNNING_WITHOUT_CODE_CACHE()) |
| return; |
| KSTART(coarse_freeze_all); |
| /* on a detach we don't need to synch or xlate the threads */ |
| own_synch = !dynamo_all_threads_synched; |
| ASSERT(own_synch IF_WINDOWS(|| doing_detach)); |
| STATS_INC(coarse_freezes); |
| if (own_synch) { |
| /* Called from nudge threads from the code cache, so |
| * if the calling fragment could be coarse, we have to |
| * terminate this thread. Case 8711 does not allow calls from |
| * coarse fragments so we're fine for now. |
| */ |
| if (!synch_with_all_threads(desired_state, &threads, &num_threads, |
| /* FIXME: can we set mcontext->pc to next_tag and |
| * use THREAD_SYNCH_VALID_MCONTEXT? not if nudge |
| * comes here */ |
| THREAD_SYNCH_NO_LOCKS_NO_XFER, /* Case 6821 */ |
| /* if we fail to suspend a thread (e.g., for |
| * privilege reasons) just abort */ |
| THREAD_SYNCH_SUSPEND_FAILURE_ABORT |
| /* if we get in a race with detach, or are having |
| * synch issues for whatever reason, bail out sooner |
| * rather than later */ |
| | THREAD_SYNCH_SMALL_LOOP_MAX)) { |
| /* just give up */ |
| ASSERT(!OWN_MUTEX(&all_threads_synch_lock) && |
| !OWN_MUTEX(&thread_initexit_lock)); |
| ASSERT(threads == NULL); |
| ASSERT(!dynamo_all_threads_synched); |
| STATS_INC(coarse_freeze_abort); |
| LOG(GLOBAL, LOG_CACHE, 2, |
| "coarse_unit_freeze: aborting due to thread synch failure\n"); |
| SYSLOG_INTERNAL_WARNING("coarse freeze aborted due to thread synch failure"); |
| KSTOP(coarse_freeze_all); |
| return; |
| } |
| } |
| ASSERT(dynamo_all_threads_synched); |
| ASSERT(OWN_MUTEX(&all_threads_synch_lock) && OWN_MUTEX(&thread_initexit_lock)); |
| |
| DOSTATS({ |
| SYSLOG_INTERNAL_INFO("freezing all coarse units @ "SSZFMT" fragments", |
| GLOBAL_STAT(num_fragments)); |
| }); |
| |
| /* This routine does the actual freeze and persist calls |
| * FIXME case 9641: should we end the synch after freezing |
| * so other threads can make progress while we persist? |
| */ |
| vm_area_coarse_units_freeze(in_place); |
| |
| if (in_place && own_synch) { |
| DEBUG_DECLARE(uint removed;) |
| for (i = 0; i < num_threads; i++) { |
| dcontext_t *dcontext = threads[i]->dcontext; |
| if (dcontext != NULL && dcontext != my_dcontext) { |
| /* FIXME: share these checks w/ other synchall-and-abort users |
| * (reset) and synchall-and-don't-abort (flush). |
| */ |
| /* Should have aborted if we had any synch failures */ |
| ASSERT(thread_synch_successful(threads[i])); |
| if (is_thread_currently_native(threads[i])) { |
| /* Whether in native_exec or we lost control, since we're not |
| * freeing the interception buffer, no state to worry about. |
| */ |
| LOG(GLOBAL, LOG_FRAGMENT, 2, |
| "\tcurrently native so no translation needed\n"); |
| } else if (thread_synch_state_no_xfer(dcontext)) { |
| /* Case 6821: do not translate other synch-all-thread users. */ |
| LOG(GLOBAL, LOG_FRAGMENT, 2, |
| "\tat THREAD_SYNCH_NO_LOCKS_NO_XFER so no translation needed\n"); |
| } else { |
| /* subroutine to avoid priv_mcontext_t on our stack when we |
| * freeze + merge&load */ |
| coarse_units_freeze_translate(threads[i], desired_state); |
| } |
| last_exit_deleted(dcontext); |
| if (is_building_trace(dcontext)) { |
| LOG(THREAD, LOG_FRAGMENT, 2, |
| "\tsquashing trace of thread "TIDFMT"\n", i); |
| trace_abort(dcontext); |
| } |
| if (DYNAMO_OPTION(bb_ibl_targets)) { |
| /* FIXME: we could just remove the coarse ibl entries */ |
| DEBUG_DECLARE(removed =) |
| fragment_remove_all_ibl_in_region(dcontext, |
| UNIVERSAL_REGION_BASE, |
| UNIVERSAL_REGION_END); |
| LOG(THREAD, LOG_FRAGMENT, 2, "\tremoved %d ibl entries\n", removed); |
| } |
| } |
| } |
| if (DYNAMO_OPTION(bb_ibl_targets)) { |
| /* FIXME: we could just remove the coarse ibl entries */ |
| DEBUG_DECLARE(removed =) |
| fragment_remove_all_ibl_in_region(GLOBAL_DCONTEXT, |
| UNIVERSAL_REGION_BASE, |
| UNIVERSAL_REGION_END); |
| LOG(GLOBAL, LOG_FRAGMENT, 2, "\tremoved %d ibl entries\n", removed); |
| } |
| } |
| |
| if (own_synch) |
| end_synch_with_all_threads(threads, num_threads, true/*resume*/); |
| KSTOP(coarse_freeze_all); |
| } |
| |
| /* Removes dst's data and replaces it with src's data. Frees src. |
| * Assumes that src is thread-local and not reachable by any other thread, |
| * and that dst's lock is held. |
| */ |
| static void |
| coarse_replace_unit(dcontext_t *dcontext, coarse_info_t *dst, coarse_info_t *src) |
| { |
| /* Perhaps we should separately allocate the locks to avoid this copying |
| * for preservation? Or memcpy all but the lock fields? Or delete and |
| * re-init them? If we move to a model where the world isn't suspended |
| * we have to ensure no other thread is trying to lock. |
| */ |
| coarse_info_t *non_frozen; |
| mutex_t temp_lock, temp_incoming_lock; |
| DEBUG_DECLARE(const char *modname;) |
| ASSERT_OWN_MUTEX(true, &dst->lock); |
| mutex_lock(&dst->incoming_lock); |
| ASSERT(src->incoming == NULL); /* else we leak */ |
| src->incoming = dst->incoming; |
| dst->incoming = NULL; /* do not free incoming */ |
| mutex_unlock(&dst->incoming_lock); |
| non_frozen = dst->non_frozen; |
| coarse_unit_reset_free(dcontext, dst, true/*have locks*/, |
| false/*do not unlink*/, false/*keep primary*/); |
| temp_lock = dst->lock; |
| temp_incoming_lock = dst->incoming_lock; |
| DODEBUG({ modname = dst->module; }); |
| memcpy(dst, src, sizeof(*dst)); |
| dst->lock = temp_lock; |
| dst->incoming_lock = temp_incoming_lock; |
| dst->non_frozen = non_frozen; |
| DODEBUG({ dst->module = modname; }); |
| ASSERT(dst->incoming == src->incoming); |
| /* update pointers from src to dst */ |
| fcache_coarse_set_info(dcontext, dst); |
| patch_coarse_exit_prefix(dcontext, dst); |
| coarse_stubs_set_info(dst); |
| DODEBUG({ |
| /* avoid asserts */ |
| src->htable = NULL; |
| src->th_htable = NULL; |
| src->pclookup_htable = NULL; |
| src->cache = NULL; |
| src->incoming = NULL; |
| src->stubs = NULL; |
| src->cache_start_pc = NULL; |
| src->stubs_start_pc = NULL; |
| }); |
| coarse_unit_free(dcontext, src); |
| } |
| |
| /* In-place freezing replaces info with a frozen copy. |
| * Otherwise, a new copy is created for persisting, while the original |
| * copy is undisturbed and unfrozen. |
| * Only purpose of !in_place is to write out to disk a |
| * snapshot while letting coarse unit creation continue. Can |
| * write in_place out to disk as well, so we leave that up to |
| * caller. |
| * Caller must hold change_linking_lock. |
| * If in_place, caller is responsible for flushing the ibl tables (case 11057). |
| */ |
| coarse_info_t * |
| coarse_unit_freeze(dcontext_t *dcontext, coarse_info_t *info, bool in_place) |
| { |
| coarse_info_t *frozen = NULL; |
| coarse_info_t *res = NULL; |
| size_t frozen_stub_size, frozen_cache_size; |
| uint num_fragments, num_stubs; |
| coarse_freeze_info_t *freeze_info = |
| HEAP_TYPE_ALLOC(dcontext, coarse_freeze_info_t, |
| ACCT_MEM_MGT/*appropriate?*/, PROTECTED); |
| |
| LOG(THREAD, LOG_CACHE, 2, "coarse_unit_freeze %s\n", info->module); |
| STATS_INC(coarse_units_frozen); |
| /* FIXME: Suspend world needed if not in-place? Even though unit lock |
| * is not held when changing unit links (e.g., unit flush holds only |
| * target unit lock when unlinking incoming), the change_linking_lock |
| * should give us guarantees. |
| * But if we don't suspend-all we'll have other issues: |
| * - fcache_coarse_init_frozen() will need to |
| * grab the shared cache lock, which is higher rank than coarse unit lock! |
| * - same issue w/ fcache_coarse_cache_delete (via coarse_unit_reset_free) |
| */ |
| /* FIXME: support single-unit freeze by having this routine itself |
| * do a synch-all? |
| */ |
| ASSERT(dynamo_all_threads_synched); |
| |
| ASSERT(info != NULL); |
| ASSERT_OWN_RECURSIVE_LOCK(true, &change_linking_lock); |
| |
| /* trigger lazy initialize to avoid deadlock on calling |
| * coarse_cti_is_intra_fragment() during shifting |
| */ |
| fragment_coarse_create_entry_pclookup_table(dcontext, info); |
| |
| mutex_lock(&info->lock); |
| ASSERT(info->cache != NULL); /* don't freeze empty units */ |
| ASSERT(!info->frozen); /* don't freeze already frozen units */ |
| if (info->cache == NULL || info->frozen) |
| goto coarse_unit_freeze_exit; |
| /* invalid unit shouldn't get this far */ |
| ASSERT(!TEST(PERSCACHE_CODE_INVALID, info->flags)); |
| if (TEST(PERSCACHE_CODE_INVALID, info->flags)) /* paranoid */ |
| goto coarse_unit_freeze_exit; |
| |
| memset(freeze_info, 0, sizeof(*freeze_info)); |
| freeze_info->src_info = info; |
| |
| /* Tasks: |
| * 1) Calculate final size of cache and stub space: |
| * Walk entrance stubs and count how many are intra-unit links |
| * that can be changed to direct jmps |
| * 2) Create single contiguous region to hold both cache and stubs, |
| * rounding up to a page boundary in the middle for +r->+rw |
| * 3) Copy each fragment and stub over |
| * |
| * FIXME case 9428: shrink the cache to take advantage of elided jmps! |
| * Requires a separate pass to touch up jmps to stubs/prefixes, or |
| * re-ordering w/ stubs on top and cache on bottom. That would also |
| * put a read-only page at the end, so no guard page needed -- unless we |
| * hook our own cache (case 9673) and are worried about brief periods of +w. |
| */ |
| |
| frozen_stub_size = coarse_frozen_stub_size(dcontext, info, &num_fragments, |
| &num_stubs); |
| frozen_cache_size = coarse_frozen_cache_size(dcontext, info); |
| /* we need the stubs to start on a new page since will be +rw vs cache +r */ |
| frozen_cache_size = ALIGN_FORWARD(frozen_cache_size, PAGE_SIZE); |
| freeze_info->cache_start_pc = |
| (cache_pc) heap_mmap(frozen_stub_size + frozen_cache_size); |
| /* FIXME: should show full non-frozen size as well */ |
| LOG(THREAD, LOG_CACHE, 2, |
| "%d frozen stubs @ "SZFMT" bytes + %d fragments @ "SZFMT" bytes => "PFX"\n", |
| num_stubs, frozen_stub_size, num_fragments, |
| frozen_cache_size, freeze_info->cache_start_pc); |
| STATS_ADD(coarse_fragments_frozen, num_fragments); |
| |
| /* We use raw pcs to build up our cache and stubs, and later we impose |
| * our regular data structures on them |
| */ |
| |
| /* Whether freezing in-place or not, we create a new coarse_info_t. |
| * If in-place we delete the old one afterward. |
| */ |
| frozen = coarse_unit_create(info->base_pc, info->end_pc, &info->module_md5, |
| in_place && info->in_use); |
| freeze_info->dst_info = frozen; |
| frozen->frozen = true; |
| frozen->cache_start_pc = freeze_info->cache_start_pc; |
| frozen->mmap_size = frozen_stub_size + frozen_cache_size; |
| /* Our relative jmps require that we do not exceed 32-bit reachability */ |
| IF_X64(ASSERT(CHECK_TRUNCATE_TYPE_int(frozen->mmap_size))); |
| /* Same bounds, so same persistence privileges */ |
| frozen->primary_for_module = info->primary_for_module; |
| |
| freeze_info->stubs_start_pc = |
| coarse_stubs_create(frozen, freeze_info->cache_start_pc + frozen_cache_size, |
| frozen_stub_size); |
| ASSERT(freeze_info->stubs_start_pc != NULL); |
| ASSERT(ALIGNED(freeze_info->stubs_start_pc, coarse_stub_alignment(info))); |
| frozen->stubs_start_pc = freeze_info->stubs_start_pc; |
| ASSERT(frozen->fcache_return_prefix == |
| freeze_info->cache_start_pc + frozen_cache_size); |
| #if 0 |
| ASSERT(frozen->trace_head_return_prefix == frozen->fcache_return_prefix + |
| (info->trace_head_return_prefix - info->fcache_return_prefix)); |
| ASSERT(frozen->ibl_ret_prefix == frozen->fcache_return_prefix + |
| (info->ibl_ret_prefix - info->fcache_return_prefix)); |
| ASSERT(frozen->ibl_call_prefix == frozen->fcache_return_prefix + |
| (info->ibl_call_prefix - info->fcache_return_prefix)); |
| ASSERT(frozen->ibl_jmp_prefix == frozen->fcache_return_prefix + |
| (info->ibl_jmp_prefix - info->fcache_return_prefix)); |
| #endif |
| |
| fragment_coarse_htable_create(frozen, num_fragments, num_stubs); |
| |
| fcache_coarse_init_frozen(dcontext, frozen, freeze_info->cache_start_pc, |
| frozen_cache_size); |
| |
| /* assumption: leave inter-unit links intact for in_place, but not (for |
| * persisting) otherwise |
| */ |
| freeze_info->unlink = !in_place; |
| |
| freeze_info->cache_cur_pc = freeze_info->cache_start_pc; |
| freeze_info->stubs_cur_pc = freeze_info->stubs_start_pc; |
| |
| fragment_coarse_unit_freeze(dcontext, freeze_info); |
| ASSERT(freeze_info->pending == NULL); |
| ASSERT(freeze_info->cache_cur_pc <= |
| freeze_info->cache_start_pc + frozen_cache_size); |
| ASSERT(freeze_info->stubs_cur_pc <= |
| freeze_info->stubs_start_pc + frozen_stub_size); |
| if (frozen->fcache_return_prefix + frozen_stub_size == |
| freeze_info->stubs_cur_pc) |
| frozen->stubs_end_pc = freeze_info->stubs_cur_pc; |
| else { |
| /* FIXME case 9428: strange history here: I don't see a problem now, |
| * but leaving some release-build code just in case. |
| */ |
| ASSERT_NOT_REACHED(); |
| coarse_stubs_set_end_pc(frozen, freeze_info->stubs_cur_pc); |
| } |
| frozen->cache_end_pc = freeze_info->cache_cur_pc; |
| |
| LOG(THREAD, LOG_CACHE, 2, "frozen code stats for %s:\n %6d app code\n", |
| info->module, freeze_info->app_code_size); |
| LOG(THREAD, LOG_CACHE, 2, " %6d fallthrough\n", freeze_info->added_fallthrough); |
| LOG(THREAD, LOG_CACHE, 2, " %6d ind br mangle\n", freeze_info->added_indbr_mangle); |
| LOG(THREAD, LOG_CACHE, 2, " %6d indr br stubs\n", freeze_info->added_indbr_stub); |
| LOG(THREAD, LOG_CACHE, 2, " %6d jecxz mangle\n", freeze_info->added_jecxz_mangle); |
| LOG(THREAD, LOG_CACHE, 2, " -%6d = 5 x %d elisions\n", |
| freeze_info->num_elisions*5, freeze_info->num_elisions); |
| LOG(THREAD, LOG_CACHE, 2, "ctis: %5d cbr, %5d jmp, %5d call, %5d ind\n", |
| freeze_info->num_cbr, freeze_info->num_jmp, freeze_info->num_call, |
| freeze_info->num_indbr); |
| LOG(THREAD, LOG_CACHE, 2, |
| "frozen final size: stubs "SZFMT" bytes + cache "SZFMT" bytes\n", |
| freeze_info->stubs_cur_pc - freeze_info->stubs_start_pc, |
| freeze_info->cache_cur_pc - freeze_info->cache_start_pc); |
| |
| /* FIXME case 9687: mark cache as read-only */ |
| |
| if (in_place) { |
| coarse_replace_unit(dcontext, info, frozen); |
| frozen = NULL; |
| mark_executable_area_coarse_frozen(info); |
| coarse_unit_shift_links(dcontext, info); |
| res = info; |
| } else { |
| /* we made separate copy that has no outgoing or incoming links */ |
| res = frozen; |
| } |
| |
| coarse_unit_freeze_exit: |
| HEAP_TYPE_FREE(dcontext, freeze_info, coarse_freeze_info_t, |
| ACCT_MEM_MGT/*appropriate?*/, PROTECTED); |
| |
| mutex_unlock(&info->lock); |
| |
| /* be sure to free to avoid missing entries if we add to info later */ |
| fragment_coarse_free_entry_pclookup_table(dcontext, info); |
| |
| DOLOG(3, LOG_CACHE, { |
| if (res != NULL) { |
| byte *pc = frozen->cache_start_pc; |
| LOG(THREAD, LOG_CACHE, 1, "frozen cache for %s:\n", info->module); |
| do { |
| app_pc tag = fragment_coarse_entry_pclookup(dcontext, frozen, pc); |
| if (tag != NULL) |
| LOG(THREAD, LOG_CACHE, 1, "tag "PFX":\n", tag); |
| pc = disassemble_with_bytes(dcontext, pc, THREAD); |
| } while (pc < frozen->cache_end_pc); |
| } |
| }); |
| |
| return res; |
| } |
| |
| /* These decode-and-instr-using routines could go in arch/ as they assume that direct |
| * jump operands are 4 bytes and are at the end of the instruction. |
| */ |
| |
| /* Transfers a coarse stub to a new location. |
| * If freeze_info->dst_info is non-NULL, |
| * shifts any unlinked stubs to point at the prefixes in freeze_info->dst_info. |
| * If freeze_info->unlink is true, |
| * points any linked stubs at freeze_info->dst_info->fcache_return_prefix if |
| * freeze_info->dst_info is non-NULL, else |
| * freeze_info->src_info->fcache_return_prefix. If trace_head, points at |
| * trace_head_return_prefix instead of fcache_return_prefix. |
| * replace_outgoing really only applies if in_place: should we replace |
| * outgoing link incoming entries (true), or add new ones (false) |
| */ |
| void |
| transfer_coarse_stub(dcontext_t *dcontext, coarse_freeze_info_t *freeze_info, |
| cache_pc stub, bool trace_head, bool replace_outgoing) |
| { |
| cache_pc tgt = entrance_stub_jmp_target(stub); |
| cache_pc pc = freeze_info->stubs_cur_pc; /* target pc */ |
| uint sz; |
| bool update_out = false; |
| /* Should not be targeting the cache, else our later shift will be wrong */ |
| ASSERT(tgt < freeze_info->src_info->cache_start_pc || |
| tgt >= freeze_info->src_info->cache_end_pc); |
| if (tgt == freeze_info->src_info->fcache_return_prefix) { |
| ASSERT(!trace_head); |
| if (freeze_info->dst_info != NULL) |
| tgt = freeze_info->dst_info->fcache_return_prefix; |
| LOG(THREAD, LOG_FRAGMENT, 4, |
| " transfer_coarse_stub "PFX": tgt is fcache_return_prefix\n", stub); |
| } else if (tgt == freeze_info->src_info->trace_head_return_prefix) { |
| ASSERT(trace_head); |
| if (freeze_info->dst_info != NULL) |
| tgt = freeze_info->dst_info->trace_head_return_prefix; |
| LOG(THREAD, LOG_FRAGMENT, 4, |
| " transfer_coarse_stub "PFX": tgt is trace_head_return_prefix\n", stub); |
| } else if (freeze_info->unlink) { |
| coarse_info_t *info = (freeze_info->dst_info != NULL) ? |
| freeze_info->dst_info : freeze_info->src_info; |
| if (trace_head) { |
| tgt = info->trace_head_return_prefix; |
| LOG(THREAD, LOG_FRAGMENT, 4, |
| " transfer_coarse_stub "PFX": unlinking as trace head\n", stub); |
| } else { |
| tgt = info->fcache_return_prefix; |
| LOG(THREAD, LOG_FRAGMENT, 4, |
| " transfer_coarse_stub "PFX": unlinking as non-trace head\n", stub); |
| } |
| } else |
| update_out = true; |
| sz = exit_stub_size(dcontext, tgt, FRAG_COARSE_GRAIN) |
| - (JMP_LONG_LENGTH - 1/*get opcode*/); |
| memcpy(pc, stub, sz); |
| pc += sz; |
| ASSERT(pc == entrance_stub_jmp(freeze_info->stubs_cur_pc) + 1/*skip opcode*/); |
| #ifdef X86 |
| ASSERT(*(pc-1) == JMP_OPCODE); |
| #elif defined(ARM) |
| /* FIXME i#1551: NYI on ARM */ |
| ASSERT_NOT_IMPLEMENTED(false); |
| #endif |
| /* if tgt unchanged we still need to re-relativize it */ |
| ASSERT(dynamo_all_threads_synched); /* thus NOT_HOT_PATCHABLE */ |
| pc = insert_relative_target(pc, tgt, NOT_HOT_PATCHABLE); |
| if (update_out) { |
| coarse_update_outgoing(dcontext, stub, freeze_info->stubs_cur_pc, |
| freeze_info->src_info, replace_outgoing); |
| } |
| pc = (cache_pc) ALIGN_FORWARD(pc, coarse_stub_alignment(freeze_info->src_info)); |
| freeze_info->stubs_cur_pc = pc; |
| } |
| |
| void |
| transfer_coarse_stub_fix_trace_head(dcontext_t *dcontext, |
| coarse_freeze_info_t *freeze_info, |
| cache_pc stub) |
| { |
| /* We don't know body pc at fragment exit processing time and so can |
| * add a stub and unlink it as a non-trace head if it was linked to |
| * a trace, so we fix it up later |
| */ |
| coarse_info_t *info = (freeze_info->dst_info != NULL) ? |
| freeze_info->dst_info : freeze_info->src_info; |
| ASSERT(freeze_info->unlink); |
| if (entrance_stub_jmp_target(stub) == info->fcache_return_prefix) { |
| cache_pc tgt = info->trace_head_return_prefix; |
| ASSERT(dynamo_all_threads_synched); /* thus NOT_HOT_PATCHABLE */ |
| insert_relative_target(entrance_stub_jmp(stub) + 1/*skip opcode*/, |
| tgt, NOT_HOT_PATCHABLE); |
| LOG(THREAD, LOG_FRAGMENT, 4, |
| " fixing up stub "PFX" to be unlinked as a trace head\n", stub); |
| } else |
| ASSERT(entrance_stub_jmp_target(stub) == info->trace_head_return_prefix); |
| } |
| |
| static void |
| push_pending_freeze(dcontext_t *dcontext, coarse_freeze_info_t *freeze_info, |
| cache_pc exit_tgt, uint cti_len, cache_pc cti_pc, |
| cache_pc body_start_pc) |
| { |
| pending_freeze_t *pending; |
| cache_pc stub_target; |
| uint sz; |
| |
| pending = HEAP_TYPE_ALLOC(dcontext, pending_freeze_t, |
| ACCT_MEM_MGT/*appropriate?*/, UNPROTECTED); |
| ASSERT(coarse_is_entrance_stub(exit_tgt)); |
| pending->tag = entrance_stub_target_tag(exit_tgt, freeze_info->src_info); |
| stub_target = entrance_stub_jmp_target(exit_tgt); |
| if (entrance_stub_linked(exit_tgt, freeze_info->src_info) && |
| get_fcache_coarse_info(stub_target) == freeze_info->src_info) { |
| /* Intra-unit non-trace-head target: eliminate stub */ |
| pending->entrance_stub = false; |
| pending->cur_pc = stub_target; |
| /* trace heads are discovered in the htable walk, never pushed here */ |
| pending->trace_head = false; |
| } else { |
| /* Leave stub */ |
| pending->entrance_stub = true; |
| pending->cur_pc = exit_tgt; |
| DOCHECK(1, { |
| cache_pc body; |
| /* A rank order violation (grabbing src htable read lock while |
| * holding frozen htable write lock) prevents us from calling the |
| * exported interface fragment_coarse_lookup_in_unit, so instead we |
| * have a hack where we call the lower-level routine that is |
| * exported only for us. |
| */ |
| coarse_body_from_htable_entry(dcontext, freeze_info->src_info, |
| pending->tag, exit_tgt, NULL, &body); |
| ASSERT(body == NULL || |
| coarse_is_trace_head(exit_tgt) || |
| fragment_lookup_trace(dcontext, pending->tag) != NULL); |
| }); |
| /* We do not look up body pc to see if a trace head stub linked to |
| * a trace; instead we fix up the unlinked target (for freeze_info->unlink) |
| * when we proactively add the stub when processing the head body |
| */ |
| pending->trace_head = coarse_is_trace_head(exit_tgt); |
| /* If target is trace head in same unit, we could add to pending, |
| * but don't have body pc handy, so we let htable walk find it |
| */ |
| } |
| ASSERT(cti_len > 4); |
| sz = cti_len - 4; |
| pending->link_cti_opnd = freeze_info->cache_cur_pc + sz; |
| memcpy(freeze_info->cache_cur_pc, cti_pc, sz); |
| if (body_start_pc == cti_pc && !DYNAMO_OPTION(unsafe_freeze_elide_sole_ubr)) { |
| /* case 9677: unsafe to elide entire-bb-ubr */ |
| pending->elide_ubr = false; |
| } else /* elide if possible */ |
| pending->elide_ubr = true; |
| freeze_info->cache_cur_pc += cti_len; |
| pending->next = freeze_info->pending; |
| freeze_info->pending = pending; |
| } |
| |
| static cache_pc |
| redirect_to_tgt_ibl_prefix(dcontext_t *dcontext, coarse_freeze_info_t *freeze_info, |
| cache_pc tgt) |
| { |
| ASSERT(freeze_info != NULL && freeze_info->src_info != NULL && |
| freeze_info->dst_info != NULL); |
| if (tgt == freeze_info->src_info->ibl_ret_prefix) |
| return freeze_info->dst_info->ibl_ret_prefix; |
| else if (tgt == freeze_info->src_info->ibl_call_prefix) |
| return freeze_info->dst_info->ibl_call_prefix; |
| else if (tgt == freeze_info->src_info->ibl_jmp_prefix) |
| return freeze_info->dst_info->ibl_jmp_prefix; |
| else |
| ASSERT_NOT_REACHED(); |
| return tgt; /* best chance of continuing on */ |
| } |
| |
| /* Transfers a coarse fragment to a new location. Queues up all of its |
| * exit targets for transfer as well, scheduling ubr last to enable ubr elision. |
| */ |
| void |
| transfer_coarse_fragment(dcontext_t *dcontext, coarse_freeze_info_t *freeze_info, |
| cache_pc body) |
| { |
| /* FIXME: for maximum code re-use, use decode_fragment instead of trying |
| * to be efficient? |
| * FIXME case 9428: 8-bit conversion |
| */ |
| cache_pc pc = body, next_pc = pc; /* source pcs */ |
| app_pc tgt; |
| size_t sz; |
| bool intra_fragment = false; |
| instr_t *instr; |
| instr = instr_create(dcontext); |
| do { |
| instr_reset(dcontext, instr); |
| pc = next_pc; |
| ASSERT(pc - body <= MAX_FRAGMENT_SIZE); |
| next_pc = decode_cti(dcontext, pc, instr); |
| /* Case 8711: we can't distinguish exit ctis from others, |
| * so we must assume that any cti is an exit cti, although |
| * we do now support intra-fragment ctis (i#665). |
| * Assumption: coarse-grain bbs have 1 ind exit or 2 direct, |
| * and no code beyond the last exit! |
| */ |
| intra_fragment = false; |
| if (instr_opcode_valid(instr) && instr_is_cti(instr)) { |
| if (instr_is_cti_short_rewrite(instr, pc)) { |
| /* Pull in the two short jmps for a "short-rewrite" instr. |
| * We must do this before asking whether it's an |
| * intra-fragment so we don't just look at the |
| * first part of the sequence. |
| */ |
| next_pc = remangle_short_rewrite(dcontext, instr, pc, |
| 0/*same target*/); |
| } |
| if (coarse_cti_is_intra_fragment(dcontext, freeze_info->src_info, |
| instr, body)) |
| intra_fragment = true; |
| } |
| } while (!instr_opcode_valid(instr) || !instr_is_cti(instr) || intra_fragment); |
| |
| /* copy body of fragment, up to start of cti */ |
| sz = pc - body; |
| memcpy(freeze_info->cache_cur_pc, body, sz); |
| freeze_info->cache_cur_pc += sz; |
| DODEBUG({ freeze_info->app_code_size += sz; }); |
| |
| /* Ensure we get proper target for short cti sequence */ |
| if (instr_is_cti_short_rewrite(instr, pc)) { |
| /* We already remangled if a short-rewrite */ |
| DODEBUG({ |
| /* We mangled 2-byte jecxz/loop* into 9-byte sequence */ |
| freeze_info->app_code_size -= 7; |
| freeze_info->added_jecxz_mangle += 7; |
| }); |
| } |
| tgt = opnd_get_pc(instr_get_target(instr)); |
| if (tgt == next_pc) { |
| ASSERT(instr_is_ubr(instr)); |
| /* indirect exit stub */ |
| ASSERT(coarse_is_indirect_stub(tgt)); |
| /* elide the jmp to the stub */ |
| pc += JMP_LONG_LENGTH /*ubr to stub*/; |
| sz = coarse_indirect_stub_size(freeze_info->src_info) - 4; |
| memcpy(freeze_info->cache_cur_pc, pc, sz); |
| freeze_info->cache_cur_pc += sz; |
| pc += sz; |
| tgt = PC_RELATIVE_TARGET(pc); |
| DODEBUG({ |
| freeze_info->num_indbr++; |
| freeze_info->app_code_size -= 6; /* save ecx */ |
| freeze_info->added_indbr_mangle += 6 /* save ecx */; |
| if (tgt == freeze_info->src_info->ibl_ret_prefix) { |
| /* ret imm goes from 3 bytes to 1+4=5 bytes |
| L3 c2 18 00 ret $0x0018 %esp (%esp) -> %esp |
| => |
| L4 67 64 89 0e e8 0e addr16 mov %ecx -> %fs:0xee8 |
| L4 59 pop %esp (%esp) -> %ecx %esp |
| L4 8d 64 24 18 lea 0x18(%esp) -> %esp |
| */ |
| /* guaranteed to be able to read 5 bytes back */ |
| if (*(pc - 4) == 0x8d && *(pc - 3) == 0x6d && *(pc - 2) == 0x24) { |
| freeze_info->app_code_size -= 2; |
| freeze_info->added_indbr_mangle += 2; |
| } |
| } else if (tgt == freeze_info->src_info->ibl_call_prefix) { |
| /* change from call* to mov is no size diff */ |
| freeze_info->added_indbr_mangle += 5 /* push immed */; |
| } else { |
| /* jmp*: change to mov is no size difference */ |
| } |
| freeze_info->added_indbr_stub += |
| coarse_indirect_stub_size(freeze_info->src_info) ; |
| }); |
| tgt = redirect_to_tgt_ibl_prefix(dcontext, freeze_info, tgt); |
| ASSERT(dynamo_all_threads_synched); /* thus NOT_HOT_PATCHABLE */ |
| freeze_info->cache_cur_pc = |
| insert_relative_target(freeze_info->cache_cur_pc, tgt, NOT_HOT_PATCHABLE); |
| } else { |
| /* FIXME: if we had profile info we could reverse the branch and |
| * make our cache trace-like |
| */ |
| DEBUG_DECLARE(bool is_cbr = false;) |
| if (instr_is_cbr(instr)) { |
| uint cbr_len; |
| /* push cbr target on todo-stack */ |
| if (instr_is_cti_short_rewrite(instr, pc)) |
| cbr_len = CBR_SHORT_REWRITE_LENGTH; |
| else |
| cbr_len = CBR_LONG_LENGTH; |
| push_pending_freeze(dcontext, freeze_info, tgt, cbr_len, pc, body); |
| ASSERT(pc + cbr_len == next_pc); |
| |
| /* process ubr next */ |
| instr_reset(dcontext, instr); |
| pc = next_pc; |
| next_pc = decode_cti(dcontext, pc, instr); |
| ASSERT(instr_opcode_valid(instr) && instr_is_ubr(instr)); |
| tgt = opnd_get_pc(instr_get_target(instr)); |
| DODEBUG({ |
| freeze_info->num_cbr++; |
| /* FIXME: assumes 32-bit cbr! */ |
| freeze_info->app_code_size += cbr_len; |
| freeze_info->added_fallthrough += 5; |
| is_cbr = true; |
| }); |
| } |
| |
| ASSERT(instr_is_ubr(instr)); |
| /* push ubr last, so we can elide the jmp if we process it next */ |
| push_pending_freeze(dcontext, freeze_info, tgt, JMP_LONG_LENGTH, pc, body); |
| ASSERT(pc + JMP_LONG_LENGTH == next_pc); |
| DODEBUG({ |
| if (!is_cbr) { |
| if (pc >= body + 5 && *(pc - 5) == 0x68) { |
| /* FIXME: could be an app push immed followed by app jmp */ |
| /* call => push immed: same size, but adding jmp */ |
| freeze_info->num_call++; |
| freeze_info->added_fallthrough += 5; /* jmp */ |
| } else { |
| /* FIXME: assumes 32-bit jmp! */ |
| freeze_info->app_code_size += 5; |
| freeze_info->num_jmp++; |
| } |
| } |
| }); |
| } |
| instr_destroy(dcontext, instr); |
| } |
| |
| /* This routine walks info's cache and updates extra-cache jmp targets by cache_shift |
| * and jmps to stubs by stubs_shift. |
| * If !is_cache, assumes these are stubs and decodes and acts appropriately. |
| */ |
| static void |
| coarse_unit_shift_jmps_internal(dcontext_t *dcontext, coarse_info_t *info, |
| ssize_t cache_shift, ssize_t stubs_shift, |
| size_t old_mapsz, |
| cache_pc start, cache_pc end, |
| cache_pc bounds_start, cache_pc bounds_end, |
| bool is_cache) |
| { |
| /* We must patch up indirect and direct stub jmps to prefixes */ |
| cache_pc pc = start; |
| cache_pc next_pc = pc; |
| app_pc tgt; |
| instr_t *instr; |
| ASSERT(dynamo_all_threads_synched); /* thus NOT_HOT_PATCHABLE */ |
| ASSERT(info->frozen); |
| instr = instr_create(dcontext); |
| while (next_pc < end) { |
| instr_reset(dcontext, instr); |
| pc = next_pc; |
| next_pc = decode_cti(dcontext, pc, instr); |
| /* Case 8711: we can't distinguish exit ctis from others. |
| * Note that we don't need to distinguish intra-fragment ctis here |
| * b/c we want to shift them by the same amount (xref i#665). |
| */ |
| if (instr_opcode_valid(instr) && instr_is_cti(instr)) { |
| if (instr_is_cti_short_rewrite(instr, pc)) |
| next_pc = remangle_short_rewrite(dcontext, instr, pc, 0/*same target*/); |
| tgt = opnd_get_pc(instr_get_target(instr)); |
| if (tgt < bounds_start || tgt >= bounds_end) { |
| ssize_t shift; |
| if (is_cache) { |
| /* break down into whether targeting stubs or not |
| * ok to use new prefix start, which is where old padding was |
| */ |
| if (tgt >= info->fcache_return_prefix && |
| tgt < info->cache_start_pc + old_mapsz) |
| shift = stubs_shift; |
| else |
| shift = cache_shift; |
| } else { |
| /* Shifting jmps from stubs |
| * We started with [cache | padding | stubs | padding] |
| * We then allocate new memory and copy there [cache | stubs] |
| * Thus, the stubs have a double shift: once for padding bet |
| * cache and stubs, and once for shift of whole alloc. |
| * This doesn't work if stubs target cache, but we assert on |
| * that in transfer_coarse_stub(). |
| */ |
| shift = cache_shift - stubs_shift; |
| } |
| LOG(THREAD, LOG_FRAGMENT, 4, |
| "\tshifting jmp @"PFX" "PFX" from "PFX" to "PFX"\n", |
| pc, next_pc - 4, tgt, tgt + shift); |
| insert_relative_target(next_pc - 4, tgt + shift, NOT_HOT_PATCHABLE); |
| if (!is_cache) { |
| /* we must update incoming after fixing target, since old_stub is |
| * inconsistent and we need a complete stub to dereference |
| */ |
| cache_pc old_stub, new_stub; |
| /* double-check post-shift: no prefix targets */ |
| ASSERT(tgt + shift < bounds_start || tgt + shift >= bounds_end); |
| new_stub = (cache_pc) ALIGN_BACKWARD(pc, coarse_stub_alignment(info)); |
| old_stub = new_stub + shift; |
| /* we can't assert that old_stub or new_stub are entrance_stubs |
| * since targets are currently inconsistent wrt info |
| */ |
| /* must update incoming stub for target */ |
| coarse_update_outgoing(dcontext, old_stub, new_stub, |
| info, true/*replace*/); |
| } |
| } |
| if (!is_cache) { |
| /* for stubs, skip the padding (which we'll decode as garbage */ |
| ASSERT(next_pc + IF_X64_ELSE(3, 1) == |
| (cache_pc) ALIGN_FORWARD(next_pc, coarse_stub_alignment(info))); |
| next_pc = (cache_pc) ALIGN_FORWARD(next_pc, coarse_stub_alignment(info)); |
| } |
| } |
| } |
| instr_destroy(dcontext, instr); |
| } |
| |
| /* This routine walks info's cache and updates extra-cache jmp targets by cache_shift |
| * and jmps to stubs by stubs_shift. |
| * It also walks info's stubs and updates targets that are not prefixes: |
| * in other coarse units or in fine-grained fragment caches. |
| */ |
| static void |
| coarse_unit_shift_jmps(dcontext_t *dcontext, coarse_info_t *info, |
| ssize_t cache_shift, ssize_t stubs_shift, size_t old_mapsz) |
| { |
| LOG(THREAD, LOG_FRAGMENT, 4, |
| "shifting jmps for cache "PFX"-"PFX"\n", |
| info->cache_start_pc, info->cache_end_pc); |
| coarse_unit_shift_jmps_internal(dcontext, info, |
| cache_shift, stubs_shift, old_mapsz, |
| info->cache_start_pc, info->cache_end_pc, |
| info->cache_start_pc, info->cache_end_pc, |
| true/*cache*/); |
| LOG(THREAD, LOG_FRAGMENT, 4, |
| "shifting jmps for stubs "PFX"-"PFX"\n", |
| info->stubs_start_pc, info->stubs_end_pc); |
| coarse_unit_shift_jmps_internal(dcontext, info, |
| cache_shift, stubs_shift, old_mapsz, |
| info->stubs_start_pc, info->stubs_end_pc, |
| /* do not re-relativize prefix targets */ |
| info->fcache_return_prefix, info->stubs_end_pc, |
| false/*stubs*/); |
| } |
| |
| /*************************************************************************** |
| * MERGING FROZEN UNITS |
| */ |
| |
| /* Processes a stub in the original old source unit at stub whose |
| * targeting cti has been copied into the new being-built merged unit |
| * at dst_cache_pc and has length cti_len. Passing dst_cache_pc==NULL |
| * causes no cti patching to occur. |
| */ |
| static void |
| coarse_merge_process_stub(dcontext_t *dcontext, coarse_freeze_info_t *freeze_info, |
| cache_pc old_stub, uint cti_len, cache_pc dst_cache_pc, |
| bool replace_outgoing) |
| { |
| app_pc old_stub_tgt; |
| cache_pc dst_body, dst_stub, patch_pc, src_body; |
| bool trace_head; |
| ASSERT(coarse_is_entrance_stub(old_stub)); |
| ASSERT(dynamo_all_threads_synched); /* thus NOT_HOT_PATCHABLE */ |
| ASSERT((dst_cache_pc == NULL && cti_len == 0) || cti_len > 4); |
| patch_pc = dst_cache_pc + cti_len - 4; |
| old_stub_tgt = entrance_stub_target_tag(old_stub, freeze_info->src_info); |
| fragment_coarse_lookup_in_unit(dcontext, freeze_info->dst_info, |
| old_stub_tgt, &dst_stub, &dst_body); |
| /* We need to know for sure whether a trace head as we're not doing |
| * a pass through the htable like we do for regular freezing |
| */ |
| fragment_coarse_lookup_in_unit(dcontext, freeze_info->src_info, old_stub_tgt, |
| NULL, &src_body); |
| /* Consider both sources for headness */ |
| trace_head = |
| coarse_is_trace_head_in_own_unit(dcontext, old_stub_tgt, old_stub, |
| src_body, true, freeze_info->src_info) || |
| (dst_stub != NULL && |
| coarse_is_trace_head_in_own_unit(dcontext, old_stub_tgt, dst_stub, |
| dst_body, true, freeze_info->dst_info)); |
| /* Should only be adding w/ no source cti if a trace head or for the stub |
| * walk for the larger unit where we have a dup stub and aren't replacing */ |
| ASSERT(dst_cache_pc != NULL || trace_head || |
| (dst_body == NULL && dst_stub != NULL && !replace_outgoing)); |
| if (dst_body != NULL && !trace_head) { |
| /* Directly link and do not copy the stub */ |
| LOG(THREAD, LOG_FRAGMENT, 4, |
| "\ttarget "PFX" is in other cache @"PFX": directly linking\n", |
| old_stub_tgt, dst_body); |
| ASSERT(dst_stub == NULL); |
| ASSERT(dst_body >= freeze_info->dst_info->cache_start_pc && |
| dst_body < freeze_info->dst_info->cache_end_pc); |
| if (dst_cache_pc != NULL) |
| insert_relative_target(patch_pc, dst_body, NOT_HOT_PATCHABLE); |
| if (!freeze_info->unlink && entrance_stub_linked(old_stub, |
| freeze_info->src_info)) { |
| /* ASSUMPTION: unlink == !in_place |
| * If in-place, we must update target incoming info, whether source is |
| * primary (being replaced) or secondary (probably being deleted since |
| * now in merge result, but we don't want to crash while unlinking it |
| * (case 10382)) source. |
| */ |
| coarse_remove_outgoing(dcontext, old_stub, freeze_info->src_info); |
| } |
| } else if (dst_stub != NULL) { |
| LOG(THREAD, LOG_FRAGMENT, 4, |
| "\ttarget "PFX" is already in stubs @"PFX"\n", old_stub_tgt, dst_stub); |
| ASSERT(dst_body == NULL || trace_head); |
| /* Stub already exists: point to it */ |
| if (dst_cache_pc != NULL) |
| insert_relative_target(patch_pc, dst_stub, NOT_HOT_PATCHABLE); |
| /* Must remove incoming if one mergee had a cross link to the other */ |
| if ((dst_body != NULL || |
| /* If secondary merger was smaller and had a stub for the same target, |
| * we need to remove our outgoing since secondary added a new one. |
| * We want to do this only the 1st time we get here, and not if the |
| * primary merger added the stub, so we have the primary unlink |
| * the old stub (in else code below). |
| */ |
| replace_outgoing) && |
| entrance_stub_linked(old_stub, freeze_info->src_info)) { |
| coarse_remove_outgoing(dcontext, old_stub, freeze_info->src_info); |
| } |
| } else { |
| /* Copy stub */ |
| cache_pc stub_pc = freeze_info->stubs_cur_pc; |
| ASSERT(dst_body == NULL || trace_head); |
| LOG(THREAD, LOG_FRAGMENT, 4, |
| "\ttarget "PFX" is %s, adding stub @"PFX"\n", |
| old_stub_tgt, trace_head ? "trace head" : "not present", stub_pc); |
| transfer_coarse_stub(dcontext, freeze_info, old_stub, trace_head, |
| replace_outgoing); |
| if (replace_outgoing) { |
| /* Signal to later stubs that they don't need to remove the outgoing |
| * entry (as opposed to new stubs added by the secondary merger, |
| * for which we do need to remove). |
| * Assumption: if replace_outgoing then it's ok to unlink the old stub |
| * since it's going away anyway. |
| */ |
| unlink_entrance_stub(dcontext, old_stub, |
| trace_head ? FRAG_IS_TRACE_HEAD : 0, |
| freeze_info->src_info); |
| } |
| ASSERT(freeze_info->stubs_cur_pc == stub_pc + |
| coarse_stub_alignment(freeze_info->src_info)); |
| fragment_coarse_th_add(dcontext, freeze_info->dst_info, old_stub_tgt, |
| stub_pc - (ptr_uint_t) |
| freeze_info->dst_info->stubs_start_pc); |
| if (dst_cache_pc != NULL) |
| insert_relative_target(patch_pc, stub_pc, NOT_HOT_PATCHABLE); |
| } |
| } |
| |
| /* Assumption: cache has already been copied from src to dst. |
| * This routine walks the copied cache to find inter-unit links; it |
| * directly links them, eliminating their entrance stubs. |
| * replace_outgoing really only applies if in_place: should we replace |
| * outgoing link incoming entries (true), or add new ones (false) |
| */ |
| static void |
| coarse_merge_update_jmps(dcontext_t *dcontext, coarse_freeze_info_t *freeze_info, |
| bool replace_outgoing) |
| { |
| /* Plan: cache has already been copied from src to dst, but we need to do |
| * inter-unit links. So we decode from the original cache to find the |
| * target stubs: if a target is present in dst, we do not copy the stub and |
| * we directly link; if not present, we copy the stub and re-relativize the |
| * jmp to the stub. We must also patch up indirect and direct stub jmps |
| * to prefixes. |
| */ |
| cache_pc pc = freeze_info->src_info->cache_start_pc; |
| cache_pc next_pc = pc; |
| cache_pc stop_pc = freeze_info->src_info->cache_end_pc; |
| app_pc tgt; |
| uint sz; |
| /* FIXME: share code w/ decode_fragment() and transfer_coarse_fragment() */ |
| instr_t *instr; |
| /* Since mucking with caches, though if thread-private not necessary */ |
| ASSERT(dynamo_all_threads_synched); |
| ASSERT(freeze_info->src_info->frozen); |
| LOG(THREAD, LOG_FRAGMENT, 4, |
| "coarse_merge_update_jmps %s "PFX" => "PFX"\n", |
| freeze_info->src_info->module, pc, freeze_info->cache_start_pc); |
| instr = instr_create(dcontext); |
| while (next_pc < stop_pc) { |
| instr_reset(dcontext, instr); |
| pc = next_pc; |
| next_pc = decode_cti(dcontext, pc, instr); |
| /* Case 8711: we can't distinguish exit ctis from others, |
| * so we must assume that any cti is an exit cti. |
| */ |
| /* We don't care about fragment boundaries so we can ignore elision. |
| * We only care about jmps to stubs. |
| */ |
| if (instr_opcode_valid(instr) && instr_is_cti(instr)) { |
| /* Ensure we get proper target for short cti sequence */ |
| if (instr_is_cti_short_rewrite(instr, pc)) |
| next_pc = remangle_short_rewrite(dcontext, instr, pc, 0/*same target*/); |
| tgt = opnd_get_pc(instr_get_target(instr)); |
| if (in_coarse_stub_prefixes(tgt)) { |
| /* We should not encounter prefix targets other than indirect while |
| * in the body of the cache (rest are from the stubs) */ |
| ASSERT(coarse_is_indirect_stub |
| (next_pc - coarse_indirect_stub_size(freeze_info->src_info))); |
| /* indirect exit stub: need to update jmp to prefix */ |
| ASSERT(instr_is_ubr(instr)); |
| sz = JMP_LONG_LENGTH /*ubr to stub*/ - 4; |
| pc += sz; |
| tgt = PC_RELATIVE_TARGET(pc); |
| tgt = redirect_to_tgt_ibl_prefix(dcontext, freeze_info, tgt); |
| ASSERT(dynamo_all_threads_synched); /* thus NOT_HOT_PATCHABLE */ |
| insert_relative_target(freeze_info->cache_start_pc + |
| (pc - freeze_info->src_info->cache_start_pc), |
| tgt, NOT_HOT_PATCHABLE); |
| next_pc = pc + 4; |
| } else if (tgt < freeze_info->src_info->cache_start_pc || tgt >= stop_pc) { |
| /* Must go through a stub */ |
| cache_pc dst_cache_pc = freeze_info->cache_start_pc + |
| (pc - freeze_info->src_info->cache_start_pc); |
| ASSERT(tgt >= freeze_info->src_info->stubs_start_pc && |
| tgt < freeze_info->src_info->stubs_end_pc); |
| if (instr_is_cbr(instr)) { |
| uint cbr_len; |
| if (instr_is_cti_short_rewrite(instr, pc)) |
| cbr_len = CBR_SHORT_REWRITE_LENGTH; |
| else |
| cbr_len = CBR_LONG_LENGTH; |
| ASSERT(pc + cbr_len == next_pc); |
| coarse_merge_process_stub(dcontext, freeze_info, tgt, cbr_len, |
| dst_cache_pc, replace_outgoing); |
| /* If there is a ubr next (could be elided) we just hit |
| * it next time around loop */ |
| } else { |
| ASSERT(instr_is_ubr(instr)); |
| ASSERT(pc + JMP_LONG_LENGTH == next_pc); |
| coarse_merge_process_stub(dcontext, freeze_info, tgt, |
| JMP_LONG_LENGTH, dst_cache_pc, |
| replace_outgoing); |
| } |
| } else { |
| /* intra-cache target */ |
| /* I would assert that a pclookup finds an entry but that hits |
| * a recursive lock on non-recursive freeze_info->src_info->lock */ |
| } |
| } |
| } |
| instr_destroy(dcontext, instr); |
| |
| /* Do the loop even w/o traces in debug for the assert */ |
| if (!DYNAMO_OPTION(disable_traces) IF_DEBUG(|| true)) { |
| /* We can have trace heads with no intra-unit targeters (secondary trace |
| * heads!) so we must also walk the stubs. Rather than require an |
| * iterator or helper routine in fragment or link we directly |
| * walk here. */ |
| for (pc = freeze_info->src_info->stubs_start_pc; |
| pc < freeze_info->src_info->stubs_end_pc; |
| pc += coarse_stub_alignment(freeze_info->src_info)) { |
| if (in_coarse_stub_prefixes(pc)) |
| continue; |
| ASSERT(coarse_is_entrance_stub(pc)); |
| if (entrance_stub_linked(pc, freeze_info->src_info)) { |
| cache_pc src_body; |
| /* for non-in-place merging we don't unlink stubs targeting |
| * the other mergee, so we must rule that out here. |
| * the only internally-untargeted stubs we need to add are |
| * those for our own bodies. */ |
| fragment_coarse_lookup_in_unit |
| (dcontext, freeze_info->src_info, |
| entrance_stub_target_tag(pc, freeze_info->src_info), |
| NULL, &src_body); |
| if (src_body != NULL) { |
| ASSERT(!DYNAMO_OPTION(disable_traces)); |
| coarse_merge_process_stub(dcontext, freeze_info, pc, |
| 0, NULL, replace_outgoing); |
| } |
| } |
| } |
| } |
| } |
| |
| /* Assumption: cache to be merged with has already been copied to dst. |
| * This routine walks the other src and copies over non-dup fragments, |
| * directly linking inter-unit links along the way. |
| * replace_outgoing really only applies if in_place: should we replace |
| * outgoing link incoming entries (true), or add new ones (false) |
| */ |
| static void |
| coarse_merge_without_dups(dcontext_t *dcontext, coarse_freeze_info_t *freeze_info, |
| ssize_t cache_offs, bool replace_outgoing) |
| { |
| /* Plan: we need to append the non-dup portions of src to the already-copied |
| * other source, as well as fixing up inter-unit links: if a target is present in |
| * dst, we do not copy the stub and we directly link; if not present, we copy the |
| * stub and re-relativize the jmp to the stub. We must also patch up indirect |
| * and direct stub jmps to prefixes. |
| * Though we need to know fragment boundaries, note that walking the htable |
| * instead of the cache doesn't buy us much: due to elision we still have to do |
| * pclookups, so we go ahead and walk the cache as it is already laid out. |
| */ |
| cache_pc pc = freeze_info->src_info->cache_start_pc; |
| cache_pc src_body, next_pc = pc, fallthrough_body = NULL; |
| cache_pc dst_body = NULL, last_dst_body; |
| cache_pc stop_pc = freeze_info->src_info->cache_end_pc; |
| app_pc tag, fallthrough_tag = NULL, tgt = NULL; |
| /* FIXME: share code w/ decode_fragment() and transfer_coarse_fragment() */ |
| instr_t *instr; |
| /* stored targets for fixup */ |
| jmp_tgt_list_t *jmp_list = NULL; |
| bool intra_fragment = false; |
| /* Since mucking with caches, though if thread-private not necessary */ |
| ASSERT(dynamo_all_threads_synched); |
| ASSERT(freeze_info->src_info->frozen); |
| LOG(THREAD, LOG_FRAGMENT, 4, |
| "coarse_merge_without_dups %s "PFX" => "PFX"\n", |
| freeze_info->src_info->module, pc, freeze_info->cache_cur_pc); |
| instr = instr_create(dcontext); |
| while (next_pc < stop_pc) { |
| last_dst_body = dst_body; |
| if (fallthrough_tag != NULL) { |
| /* still at dup fallthrough pc */ |
| ASSERT(fragment_coarse_entry_pclookup(dcontext, freeze_info->src_info, |
| next_pc) == fallthrough_tag); |
| tag = fallthrough_tag; |
| ASSERT(fallthrough_body != NULL); |
| dst_body = fallthrough_body; |
| /* do not go again through the fallthrough code below */ |
| instr_reset(dcontext, instr); |
| } else { |
| tag = fragment_coarse_entry_pclookup(dcontext, freeze_info->src_info, |
| next_pc); |
| } |
| /* We come back through the loop for fallthrough jmp of cbr */ |
| ASSERT(tag != NULL || (instr_opcode_valid(instr) && instr_is_cbr(instr))); |
| if (tag != NULL && tag != fallthrough_tag) { |
| LOG(THREAD, LOG_FRAGMENT, 4, |
| "\tfragment entry point "PFX" = tag "PFX, next_pc, tag); |
| fragment_coarse_lookup_in_unit(dcontext, freeze_info->dst_info, |
| tag, NULL, &dst_body); |
| if (dst_body == NULL) { |
| cache_pc src_stub = NULL; |
| fragment_coarse_add(dcontext, freeze_info->dst_info, |
| tag, freeze_info->cache_cur_pc - |
| (ptr_uint_t)freeze_info->cache_start_pc + |
| cache_offs); |
| LOG(THREAD, LOG_FRAGMENT, 4, " (new => "PFX")\n", |
| freeze_info->cache_cur_pc); |
| /* this may be a trace head, in which case we need to add its stub |
| * now in case there are no intra-unit targeters of it (which |
| * means it is probably a secondary trace head) */ |
| fragment_coarse_lookup_in_unit(dcontext, freeze_info->src_info, |
| tag, &src_stub, NULL); |
| if (src_stub != NULL) { |
| ASSERT(!DYNAMO_OPTION(disable_traces)); |
| coarse_merge_process_stub(dcontext, freeze_info, src_stub, |
| 0, NULL, replace_outgoing); |
| } |
| } else { /* dup */ |
| LOG(THREAD, LOG_FRAGMENT, 4, " (duplicate)\n"); |
| /* if prev is cbr, this is a fall-through, which is handled below */ |
| } |
| } /* else carry through dst_body from last iter */ |
| src_body = next_pc; |
| fallthrough_tag = NULL; |
| fallthrough_body = NULL; |
| do { |
| ASSERT(next_pc < stop_pc); |
| if (next_pc >= stop_pc) |
| return; /* paranoid: avoid infinite loop */ |
| pc = next_pc; |
| if (!intra_fragment && |
| (next_pc != src_body || |
| /* fall-through of cbr will be looked up pre-1st iter above */ |
| (instr_opcode_valid(instr) && instr_is_cbr(instr)))) { |
| /* We assume at least one instr in each fragment, to avoid ambiguity */ |
| ASSERT_NOT_IMPLEMENTED(!DYNAMO_OPTION(unsafe_freeze_elide_sole_ubr)); |
| if (next_pc == src_body) { |
| fallthrough_tag = tag; |
| fallthrough_body = dst_body; |
| } else { |
| fallthrough_tag = |
| fragment_coarse_entry_pclookup(dcontext, freeze_info->src_info, |
| next_pc); |
| if (fallthrough_tag != NULL) { |
| fragment_coarse_lookup_in_unit(dcontext, freeze_info->dst_info, |
| fallthrough_tag, NULL, |
| &fallthrough_body); |
| } |
| } |
| if (fallthrough_tag != NULL) { |
| /* We'd rather keep fall-through elision if we can */ |
| LOG(THREAD, LOG_FRAGMENT, 4, |
| "\tfall-through tag "PFX" @"PFX, fallthrough_tag, next_pc); |
| if (fallthrough_body == NULL) { |
| /* Just keep going and process the fall-through's cti */ |
| LOG(THREAD, LOG_FRAGMENT, 4, " (new => "PFX")\n", |
| freeze_info->cache_cur_pc + (next_pc - src_body)); |
| if (dst_body != NULL) { /* prev is a dup */ |
| ASSERT_NOT_TESTED(); |
| src_body = next_pc; |
| tag = fallthrough_tag; |
| } |
| if (fallthrough_tag != tag) { |
| fragment_coarse_add(dcontext, freeze_info->dst_info, |
| fallthrough_tag, |
| freeze_info->cache_cur_pc + |
| (next_pc - src_body) - |
| (ptr_uint_t)freeze_info->cache_start_pc + |
| cache_offs); |
| DOCHECK(1, { |
| /* We should NOT need to add a stub like we might |
| * for the entry point add above: fall-through |
| * cannot be trace head! */ |
| cache_pc src_stub = NULL; |
| fragment_coarse_lookup_in_unit(dcontext, |
| freeze_info->src_info, |
| fallthrough_tag, |
| &src_stub, NULL); |
| ASSERT(src_stub == NULL); |
| }); |
| } |
| fallthrough_tag = NULL; |
| } else { |
| LOG(THREAD, LOG_FRAGMENT, 4, " (duplicate)\n"); |
| break; |
| } |
| } |
| } |
| instr_reset(dcontext, instr); |
| next_pc = decode_cti(dcontext, pc, instr); |
| ASSERT(next_pc - src_body <= MAX_FRAGMENT_SIZE); |
| /* Case 8711: we can't distinguish exit ctis from others, |
| * so we must assume that any cti is an exit cti. |
| * Assumption: coarse-grain bbs have 1 ind exit or 2 direct, |
| * and no code beyond the last exit! |
| */ |
| intra_fragment = false; |
| if (instr_opcode_valid(instr) && instr_is_cti(instr)) { |
| if (instr_is_cti_short_rewrite(instr, pc)) { |
| /* Pull in the two short jmps for a "short-rewrite" instr. |
| * We must do this before asking whether it's an |
| * intra-fragment so we don't just look at the |
| * first part of the sequence. |
| */ |
| next_pc = remangle_short_rewrite(dcontext, instr, pc, |
| 0/*same target*/); |
| } |
| if (coarse_cti_is_intra_fragment(dcontext, freeze_info->src_info, |
| instr, src_body)) |
| intra_fragment = true; |
| } |
| } while (!instr_opcode_valid(instr) || !instr_is_cti(instr) || intra_fragment); |
| |
| if (dst_body == NULL) { /* not a dup */ |
| /* copy body of fragment, including cti (if not ending @ fall-through) */ |
| size_t sz = next_pc - src_body; |
| memcpy(freeze_info->cache_cur_pc, src_body, sz); |
| freeze_info->cache_cur_pc += sz; |
| } |
| |
| if (fallthrough_tag != NULL) { |
| ASSERT(next_pc == pc); /* should have short-circuited */ |
| /* add intra-cache jmp if elided but fall-through a dup */ |
| ASSERT(fallthrough_body != NULL); |
| /* If start bb not a dup, or post-cbr, must un-elide */ |
| if (dst_body == NULL || (next_pc == src_body && last_dst_body == NULL)) { |
| LOG(THREAD, LOG_FRAGMENT, 4, |
| "\tadding jmp @"PFX" to "PFX" for fall-through tag "PFX"\n", |
| freeze_info->cache_cur_pc, fallthrough_body, fallthrough_tag); |
| freeze_info->cache_cur_pc = |
| insert_relative_jump(freeze_info->cache_cur_pc, fallthrough_body, |
| NOT_HOT_PATCHABLE); |
| } |
| } else { |
| ASSERT(instr_opcode_valid(instr) && instr_is_cti(instr)); |
| /* We already remangled if a short-rewrite so no extra work here */ |
| tgt = opnd_get_pc(instr_get_target(instr)); |
| if (in_coarse_stub_prefixes(tgt)) { |
| /* We should not encounter prefix targets other than indirect while |
| * in the body of the cache (rest are from the stubs) */ |
| ASSERT(coarse_is_indirect_stub |
| (next_pc - coarse_indirect_stub_size(freeze_info->src_info))); |
| /* indirect exit stub: need to update jmp to prefix */ |
| ASSERT(instr_is_ubr(instr)); |
| if (dst_body == NULL) { /* not a dup */ |
| tgt = PC_RELATIVE_TARGET(next_pc - 4); |
| tgt = redirect_to_tgt_ibl_prefix(dcontext, freeze_info, tgt); |
| ASSERT(dynamo_all_threads_synched); /* thus NOT_HOT_PATCHABLE */ |
| /* we've already copied the stub as part of the body */ |
| ASSERT(coarse_is_indirect_stub |
| (freeze_info->cache_cur_pc - |
| coarse_indirect_stub_size(freeze_info->src_info))); |
| freeze_info->cache_cur_pc -= 4; |
| freeze_info->cache_cur_pc = |
| insert_relative_target(freeze_info->cache_cur_pc, |
| tgt, NOT_HOT_PATCHABLE); |
| } |
| } else if (tgt < freeze_info->src_info->cache_start_pc || tgt >= stop_pc) { |
| if (dst_body == NULL) { /* not a dup */ |
| /* currently goes through a stub */ |
| ASSERT(tgt >= freeze_info->src_info->stubs_start_pc && |
| tgt < freeze_info->src_info->stubs_end_pc); |
| if (instr_is_cbr(instr)) { |
| uint cbr_len; |
| if (instr_is_cti_short_rewrite(instr, pc)) |
| cbr_len = CBR_SHORT_REWRITE_LENGTH; |
| else |
| cbr_len = CBR_LONG_LENGTH; |
| ASSERT(pc + cbr_len == next_pc); |
| coarse_merge_process_stub(dcontext, freeze_info, tgt, cbr_len, |
| freeze_info->cache_cur_pc - cbr_len, |
| replace_outgoing); |
| /* If there is a ubr next (could be elided) we just hit |
| * it next time around loop */ |
| } else { |
| ASSERT(instr_is_ubr(instr)); |
| ASSERT(pc + JMP_LONG_LENGTH == next_pc); |
| coarse_merge_process_stub(dcontext, freeze_info, tgt, |
| JMP_LONG_LENGTH, |
| freeze_info->cache_cur_pc - |
| JMP_LONG_LENGTH, |
| replace_outgoing); |
| } |
| } |
| } else if (dst_body == NULL) { /* not a dup */ |
| /* Intra-cache target, but we're moving things around and have to do |
| * a separate pass since don't know future locations. Since the |
| * layout is changing and later we'd need multiple lookups to find |
| * the corrsepondence between src and dst, we store the target tag in |
| * the jmp and replace it w/ the body in the later pass. |
| * We can't fit a 64-bit target, so we use offs from mod base. |
| * XXX: split pcaches up if app module is over 4GB. |
| */ |
| jmp_tgt_list_t *entry; |
| app_pc tgt_tag = |
| fragment_coarse_entry_pclookup(dcontext, freeze_info->src_info, tgt); |
| ASSERT(tgt_tag != NULL); |
| LOG(THREAD, LOG_FRAGMENT, 4, |
| "\tintra-cache src "PFX"->"PFX" tag "PFX" dst pre-"PFX"\n", |
| pc, tgt, tgt_tag, freeze_info->cache_cur_pc); |
| entry = HEAP_TYPE_ALLOC(dcontext, jmp_tgt_list_t, |
| ACCT_VMAREAS, PROTECTED); |
| entry->tag = tgt_tag; |
| entry->jmp_end_pc = freeze_info->cache_cur_pc; |
| entry->next = jmp_list; |
| jmp_list = entry; |
| } |
| } |
| } |
| |
| /* Second pass to update intra-cache targets. |
| * FIXME: combine w/ later coarse_unit_shift_jmps() |
| */ |
| while (jmp_list != NULL) { |
| jmp_tgt_list_t *next = jmp_list->next; |
| fragment_coarse_lookup_in_unit(dcontext, freeze_info->dst_info, |
| jmp_list->tag, NULL, &dst_body); |
| ASSERT(dst_body != NULL); |
| LOG(THREAD, LOG_FRAGMENT, 4, |
| "\tintra-cache dst -"PFX"->"PFX" tag "PFX"\n", |
| jmp_list->jmp_end_pc, dst_body, tgt); /* tgt always set here */ |
| /* FIXME: make 4 a named constant; used elsewhere as well */ |
| insert_relative_target(jmp_list->jmp_end_pc - 4, dst_body, NOT_HOT_PATCHABLE); |
| HEAP_TYPE_FREE(dcontext, jmp_list, jmp_tgt_list_t, ACCT_VMAREAS, PROTECTED); |
| jmp_list = next; |
| } |
| |
| instr_destroy(dcontext, instr); |
| } |
| |
| /* Returns a new coarse_info_t (or if in_place returns info1) that combines |
| * info1 and info2. In in_place, info1 is replaced with the result and returned; |
| * else, a separate coarse_info_t is created and returned. |
| * If either of the units is live, then info1 must be live. |
| * If one of the two units covers a different code range, it must be info2, |
| * and it must be a subset of info1's range. |
| * If returns NULL, the merge failed; if in_place, info1 is unchanged on failure. |
| * If in_place, caller is responsible for flushing the ibl tables (case 11057). |
| */ |
| coarse_info_t * |
| coarse_unit_merge(dcontext_t *dcontext, coarse_info_t *info1, coarse_info_t *info2, |
| bool in_place) |
| { |
| coarse_info_t *merged; |
| coarse_info_t *res = NULL; |
| coarse_info_t *src_sm, *src_lg; |
| size_t cache1_size, cachelg_size, cache2_size, merged_cache_size; |
| size_t stubs1_size, stubs2_size; |
| coarse_freeze_info_t freeze_info; |
| |
| LOG(THREAD, LOG_CACHE, 2, "coarse_unit_merge %s %s with %s\n", |
| info1->module, info1->persisted ? "persisted" : "non-persisted", |
| info2->persisted ? "persisted" : "non-persisted"); |
| STATS_INC(coarse_units_merged); |
| |
| ASSERT(info1 != NULL && info2 != NULL); |
| ASSERT(info1->base_pc <= info2->base_pc && info1->end_pc >= info2->end_pc); |
| if (info1->base_pc > info2->base_pc || info1->end_pc < info2->end_pc) |
| return NULL; |
| /* Currently we only do online merging where one unit is live */ |
| ASSERT(!info1->persisted || !info2->persisted); |
| |
| /* Much more efficient to merge smaller cache into larger */ |
| if (fragment_coarse_num_entries(info1) > fragment_coarse_num_entries(info2)) { |
| src_lg = info1; |
| src_sm = info2; |
| } else { |
| src_lg = info2; |
| src_sm = info1; |
| } |
| |
| /* Ensure the pclookup table is set up for src_sm, to avoid recursive |
| * lock issues |
| */ |
| if (src_sm->pclookup_htable == NULL) { /* read needs no lock */ |
| fragment_coarse_entry_pclookup(dcontext, src_sm, NULL); |
| ASSERT(src_sm->pclookup_htable != NULL); |
| } |
| |
| acquire_recursive_lock(&change_linking_lock); |
| #ifdef HOT_PATCHING_INTERFACE |
| /* we may call coarse_unit_calculate_persist_info() */ |
| if (DYNAMO_OPTION(hot_patching)) |
| read_lock(hotp_get_lock()); |
| #endif |
| /* We can't grab both locks due to deadlock potential. Currently we are |
| * always fully synched, so we rely on that to synch with info2. |
| */ |
| ASSERT(dynamo_all_threads_synched); |
| mutex_lock(&info1->lock); |
| ASSERT(info1->cache != NULL && info2->cache != NULL); /* don't merge empty units */ |
| ASSERT(info1->frozen && info2->frozen); |
| |
| /* Tasks: |
| * 1) Merge the caches, eliminating duplicates. Various optimizations to |
| * preserve fall-through pairs if 2nd part is in other cache but 1st is |
| * not could be employed. |
| * 2) Turn cross-links into direct links and eliminate those entrance stubs. |
| * 3) Copy the rest of the entrance stubs. |
| * 4) Merge the RCT, RAC, and hotp fields, which may require calculating |
| * persist info if this unit has not been persisted; if neither unit |
| * has been persisted we do not need to do anything here. |
| * |
| * We will remove the extra post-stubs space since when persisting |
| * we only write up through stubs_end_pc and not the allocation end. |
| */ |
| /* Whether merging in-place or not, we create a new coarse_info_t. |
| * If in-place we delete the old one afterward. |
| */ |
| merged = coarse_unit_create(info1->base_pc, info1->end_pc, &info1->module_md5, |
| in_place && info1->in_use); |
| merged->frozen = true; |
| cache1_size = info1->cache_end_pc - info1->cache_start_pc; |
| cache2_size = info2->cache_end_pc - info2->cache_start_pc; |
| /* We shrink the cache to size below, after merging & removing dups */ |
| merged_cache_size = cache1_size + cache2_size; |
| /* We need the stubs to start on a new page since will be +rw vs cache +r */ |
| merged_cache_size = ALIGN_FORWARD(merged_cache_size, PAGE_SIZE); |
| /* We only need one set of prefixes */ |
| stubs1_size = info1->stubs_end_pc - info1->fcache_return_prefix; |
| stubs2_size = info2->stubs_end_pc - info2->stubs_start_pc; |
| merged->mmap_size = merged_cache_size + stubs1_size + stubs2_size; |
| /* Our relative jmps require that we do not exceed 32-bit reachability */ |
| IF_X64(ASSERT(CHECK_TRUNCATE_TYPE_int(merged->mmap_size))); |
| merged->cache_start_pc = (cache_pc) heap_mmap(merged->mmap_size); |
| merged->cache_end_pc = merged->cache_start_pc + cache1_size + cache2_size; |
| merged->stubs_start_pc = |
| coarse_stubs_create(merged, merged->cache_start_pc + merged_cache_size, |
| stubs1_size + stubs2_size); |
| /* will be tightened up later */ |
| merged->stubs_end_pc = merged->cache_start_pc + merged->mmap_size; |
| ASSERT(merged->stubs_start_pc != NULL); |
| ASSERT(ALIGNED(merged->stubs_start_pc, coarse_stub_alignment(info1))); |
| ASSERT(merged->fcache_return_prefix == |
| merged->cache_start_pc + merged_cache_size); |
| ASSERT(merged->trace_head_return_prefix == merged->fcache_return_prefix + |
| (info1->trace_head_return_prefix - info1->fcache_return_prefix)); |
| ASSERT(merged->ibl_ret_prefix == merged->fcache_return_prefix + |
| (info1->ibl_ret_prefix - info1->fcache_return_prefix)); |
| ASSERT(merged->ibl_call_prefix == merged->fcache_return_prefix + |
| (info1->ibl_call_prefix - info1->fcache_return_prefix)); |
| ASSERT(merged->ibl_jmp_prefix == merged->fcache_return_prefix + |
| (info1->ibl_jmp_prefix - info1->fcache_return_prefix)); |
| |
| /* Much more efficient to put the larger cache 1st, but we have to be sure |
| * to use the same order for both the htable and cache. |
| */ |
| |
| /* Try to size the dst tables to avoid collision asserts. |
| * Put the larger unit's entries into the dst table up front. |
| * FIXME: do this earlier and if one is a subset of other then do |
| * a simpler merge? |
| */ |
| fragment_coarse_htable_merge(dcontext, merged, src_lg, src_sm, |
| false/*do not add src_sm yet*/, |
| false/*leave th htable empty*/); |
| |
| /* Copy the 1st cache intact, and bring in the non-dup portions of the second |
| * (since know the offsets of the 1st); then, walk the 1st and patch up its |
| * inter-unit links while decoding from the original, now that the 2nd is in |
| * place. |
| */ |
| cachelg_size = (src_lg == info2) ? cache2_size : cache1_size; |
| memcpy(merged->cache_start_pc, src_lg->cache_start_pc, cachelg_size); |
| |
| memset(&freeze_info, 0, sizeof(freeze_info)); |
| freeze_info.dst_info = merged; |
| freeze_info.stubs_start_pc = merged->stubs_start_pc; |
| freeze_info.stubs_cur_pc = merged->stubs_start_pc; |
| /* Just like for freezing: leave inter-unit links intact for in_place. |
| * coarse_merge_process_stub() assumes that unlink == !in_place |
| */ |
| freeze_info.unlink = !in_place; |
| |
| freeze_info.src_info = src_sm; |
| freeze_info.cache_start_pc = merged->cache_start_pc + cachelg_size; |
| freeze_info.cache_cur_pc = freeze_info.cache_start_pc; |
| coarse_merge_without_dups(dcontext, &freeze_info, cachelg_size, |
| /* replace for primary unit; add for secondary */ |
| freeze_info.src_info == info1); |
| merged->cache_end_pc = freeze_info.cache_cur_pc; |
| |
| freeze_info.src_info = src_lg; |
| freeze_info.cache_start_pc = merged->cache_start_pc; |
| freeze_info.cache_cur_pc = freeze_info.cache_start_pc; |
| coarse_merge_update_jmps(dcontext, &freeze_info, |
| /* replace for primary unit; add for secondary */ |
| freeze_info.src_info == info1); |
| |
| ASSERT((ptr_uint_t)(freeze_info.stubs_cur_pc - merged->fcache_return_prefix) <= |
| stubs1_size + stubs2_size); |
| |
| /* We have extra space from extra in each stub region (from case 9428), |
| * from duplicate prefix space, and from eliminated inter-unit stubs, so |
| * we must set end pc. |
| */ |
| coarse_stubs_set_end_pc(merged, freeze_info.stubs_cur_pc); |
| |
| LOG(THREAD, LOG_CACHE, 2, "merged size: stubs "SZFMT" => "SZFMT" bytes, " |
| "cache "SZFMT" ("SZFMT" align) => "SZFMT" ("SZFMT" align) bytes\n", |
| stubs1_size + stubs2_size, freeze_info.stubs_cur_pc - merged->stubs_start_pc, |
| cache1_size + cache2_size, |
| (info1->fcache_return_prefix - info1->cache_start_pc) + |
| (info2->fcache_return_prefix - info2->cache_start_pc), |
| merged->cache_end_pc - merged->cache_start_pc, |
| merged->fcache_return_prefix - merged->cache_start_pc); |
| |
| if (merged_cache_size - (merged->cache_end_pc - merged->cache_start_pc) > 0) { |
| /* With duplicate elimination we often have a lot of empty space, so we |
| * re-allocate into a proper-fitting space |
| */ |
| size_t cachesz = merged->cache_end_pc - merged->cache_start_pc; |
| size_t cachesz_aligned = ALIGN_FORWARD(cachesz, PAGE_SIZE); |
| size_t stubsz = merged->stubs_end_pc - merged->fcache_return_prefix; |
| size_t newsz = cachesz_aligned + stubsz; |
| size_t old_mapsz = merged->mmap_size; |
| cache_pc newmap = (cache_pc) heap_mmap(newsz); |
| ssize_t cache_shift = merged->cache_start_pc - newmap; |
| /* stubs have moved too, so a relative shift not absolute */ |
| ssize_t stubs_shift = cachesz_aligned - |
| (merged->fcache_return_prefix - merged->cache_start_pc); |
| LOG(THREAD, LOG_CACHE, 2, |
| "re-allocating merged unit: "SZFMT" @"PFX" "PFX" => " |
| SZFMT" @"PFX" "PFX" "SZFMT" "SZFMT"\n", |
| merged->mmap_size, merged->cache_start_pc, merged->fcache_return_prefix, |
| newsz, newmap, newmap + cachesz_aligned, cache_shift, stubs_shift); |
| memcpy(newmap, merged->cache_start_pc, cachesz); |
| memcpy(newmap + cachesz_aligned, merged->fcache_return_prefix, stubsz); |
| heap_munmap(merged->cache_start_pc, merged->mmap_size); |
| coarse_stubs_delete(merged); |
| merged->mmap_size = newsz; |
| /* Our relative jmps require that we do not exceed 32-bit reachability */ |
| IF_X64(ASSERT(CHECK_TRUNCATE_TYPE_int(merged->mmap_size))); |
| merged->cache_start_pc = newmap; |
| merged->cache_end_pc = merged->cache_start_pc + cachesz; |
| merged->stubs_start_pc = |
| coarse_stubs_create(merged, merged->cache_start_pc + cachesz_aligned, |
| stubsz); |
| ASSERT(merged->stubs_start_pc != NULL); |
| ASSERT(ALIGNED(merged->stubs_start_pc, coarse_stub_alignment(info1))); |
| ASSERT(merged->fcache_return_prefix == newmap + cachesz_aligned); |
| coarse_stubs_set_end_pc(merged, merged->cache_start_pc + newsz); |
| coarse_unit_shift_jmps(dcontext, merged, cache_shift, stubs_shift, old_mapsz); |
| } |
| |
| /* Set cache bounds after we've potentially moved the initial cache */ |
| fcache_coarse_init_frozen(dcontext, merged, merged->cache_start_pc, |
| merged->fcache_return_prefix - merged->cache_start_pc); |
| |
| /* Currently we only do online merging where at least one unit is live, |
| * and we expect that to be info1 |
| */ |
| ASSERT(!info1->persisted); |
| /* Store the source persisted size so we know whether we need to merge |
| * with that on-disk file |
| */ |
| if (info2->persisted) |
| merged->persisted_source_mmap_size = info2->mmap_size; |
| |
| /* Merge the other fields */ |
| coarse_unit_merge_persist_info(dcontext, merged, info1, info2); |
| |
| DOLOG(5, LOG_CACHE, { |
| byte *pc = merged->cache_start_pc; |
| LOG(THREAD, LOG_CACHE, 1, "merged cache:\n"); |
| do { |
| pc = disassemble_with_bytes(dcontext, pc, THREAD); |
| } while (pc < merged->cache_end_pc); |
| pc = merged->stubs_start_pc; |
| LOG(THREAD, LOG_CACHE, 1, "merged stubs:\n"); |
| do { |
| if (((ptr_uint_t)pc) % coarse_stub_alignment(info1) == |
| coarse_stub_alignment(info1)-1) |
| pc++; |
| pc = disassemble_with_bytes(dcontext, pc, THREAD); |
| } while (pc < merged->stubs_end_pc); |
| }); |
| |
| /* FIXME case 9687: mark cache as read-only */ |
| |
| if (in_place) { |
| coarse_incoming_t *e; |
| coarse_replace_unit(dcontext, info1, merged); |
| merged = NULL; |
| /* up to caller to call mark_executable_area_coarse_frozen() if necessary */ |
| |
| /* case 10877: must combine the incoming lists |
| * targets should be unique, so can just append |
| */ |
| mutex_lock(&info1->incoming_lock); |
| /* can't grab info2 lock, so just like for main lock we rely on synchall */ |
| DODEBUG({ |
| /* Make sure no inter-incoming left */ |
| uint in1 = 0; |
| uint in2 = 0; |
| for (e = info1->incoming; e != NULL; e = e->next, in1++) |
| ASSERT(!e->coarse || get_stub_coarse_info(e->in.stub_pc) != info2); |
| for (e = info2->incoming; e != NULL; e = e->next, in2++) |
| ASSERT(!e->coarse || get_stub_coarse_info(e->in.stub_pc) != info1); |
| LOG(THREAD, LOG_CACHE, 1, "merging %d incoming into %d incoming\n", |
| in2, in1); |
| }); |
| e = info1->incoming; |
| if (e == NULL) { |
| info1->incoming = info2->incoming; |
| } else { |
| while (e->next != NULL) |
| e = e->next; |
| e->next = info2->incoming; |
| } |
| mutex_unlock(&info1->incoming_lock); |
| info2->incoming = NULL; /* ensure not freed when info2 is freed */ |
| coarse_unit_shift_links(dcontext, info1); |
| |
| res = info1; |
| } else { |
| /* we made separate copy that has no outgoing or incoming links */ |
| res = merged; |
| } |
| mutex_unlock(&info1->lock); |
| #ifdef HOT_PATCHING_INTERFACE |
| /* we may call coarse_unit_calculate_persist_info() */ |
| if (DYNAMO_OPTION(hot_patching)) |
| read_unlock(hotp_get_lock()); |
| #endif |
| release_recursive_lock(&change_linking_lock); |
| return res; |
| } |
| |
| /*************************************************************************** |
| * PERSISTENT CODE CACHE |
| */ |
| |
| #if defined(RETURN_AFTER_CALL) && defined(WINDOWS) |
| extern bool seen_Borland_SEH; |
| #endif |
| |
| /* get global or per-user directory name */ |
| bool |
| perscache_dirname(char *directory /* OUT */, uint directory_len) |
| { |
| int retval; |
| bool param_ok = false; |
| /* Support specifying the pcache dir from either a config param (historical |
| * from ASLR piggyback) or runtime option, though config param gets precedence. |
| */ |
| const char *param_name = DYNAMO_OPTION(persist_per_user) ? |
| PARAM_STR(DYNAMORIO_VAR_PERSCACHE_ROOT) : |
| PARAM_STR(DYNAMORIO_VAR_PERSCACHE_SHARED); |
| retval = get_parameter(param_name, directory, directory_len); |
| if (IS_GET_PARAMETER_FAILURE(retval)) { |
| string_option_read_lock(); |
| if (DYNAMO_OPTION(persist_per_user) && !IS_STRING_OPTION_EMPTY(persist_dir)) { |
| strncpy(directory, DYNAMO_OPTION(persist_dir), directory_len); |
| param_ok = true; |
| } else if (!IS_STRING_OPTION_EMPTY(persist_shared_dir)) { |
| strncpy(directory, DYNAMO_OPTION(persist_shared_dir), directory_len); |
| param_ok = true; |
| } else { |
| /* use log dir by default |
| * XXX: create subdir "logs/cache"? default is per-user so currently |
| * user dirs will be in logs/ which seems sufficient. |
| */ |
| uint len = directory_len; |
| create_log_dir(BASE_DIR); |
| if (get_log_dir(BASE_DIR, directory, &len) && len <= directory_len) |
| param_ok = true; |
| } |
| string_option_read_unlock(); |
| } else |
| param_ok = true; |
| if (param_ok) |
| directory[directory_len - 1] = '\0'; |
| return param_ok; |
| } |
| |
| /* get global or per-user directory name */ |
| static bool |
| get_persist_dir(char *directory /* OUT */, |
| uint directory_len, |
| bool create) |
| { |
| if (!perscache_dirname(directory, directory_len) || |
| double_strchr(directory, DIRSEP, ALT_DIRSEP) == NULL) { |
| SYSLOG_INTERNAL_ERROR_ONCE("Persistent cache root dir is invalid. " |
| "Persistent cache will not operate."); |
| return false; |
| } |
| |
| if (DYNAMO_OPTION(persist_per_user)) { |
| bool res = os_current_user_directory(directory, directory_len, |
| create); |
| /* null terminated */ |
| if (!res) { |
| /* directory name may be set even on failure */ |
| LOG(THREAD_GET, LOG_CACHE, 2, "\terror opening per-user dir %s\n", directory); |
| return false; |
| } |
| } |
| |
| return true; |
| } |
| |
| /* Checks for enough space on the volume where persisted caches are stored */ |
| bool |
| coarse_unit_check_persist_space(file_t fd_in/*OPTIONAL*/, size_t size_needed) |
| { |
| bool room = false; |
| file_t fd = fd_in; |
| if (fd == INVALID_FILE) { |
| /* Use directory to get handle on proper volume */ |
| char dir[MAXIMUM_PATH]; |
| if (get_persist_dir(dir, BUFFER_SIZE_ELEMENTS(dir), |
| true /* note we MUST always create directory |
| * even if never persisting */ |
| )) { |
| fd = os_open_directory(dir, 0); |
| } else |
| LOG(THREAD_GET, LOG_CACHE, 2, "\terror finding persist dir\n"); |
| } |
| if (fd != INVALID_FILE) { |
| room = check_low_disk_threshold(fd, (uint64)size_needed); |
| if (fd_in == INVALID_FILE) { |
| /* FIXME: cache the handle, combine with -validate_owner_dir */ |
| os_close(fd); |
| } |
| } else |
| LOG(THREAD_GET, LOG_CACHE, 2, "\terror opening persist dir\n"); |
| return room; |
| } |
| |
| /* If force_local, pretends module at pc has been exempted (so no effect |
| * unless -persist_check_exempted_options) |
| */ |
| static inline op_pcache_t |
| persist_get_options_level(app_pc pc, coarse_info_t *info, bool force_local) |
| { |
| if (!DYNAMO_OPTION(persist_check_options)) |
| return OP_PCACHE_NOP; |
| else if (DYNAMO_OPTION(persist_check_local_options) || |
| (DYNAMO_OPTION(persist_check_exempted_options) && |
| (force_local || |
| /* once loaded as local, must remain local, even if this |
| * process never hit the exemption */ |
| (info != NULL && TEST(PERSCACHE_EXEMPTION_OPTIONS, info->flags)) || |
| os_module_get_flag(pc, MODULE_WAS_EXEMPTED)) && |
| /* don't use local if no such options: else when load will think |
| * local when really global */ |
| has_pcache_dynamo_options(&dynamo_options, OP_PCACHE_LOCAL))) |
| return OP_PCACHE_LOCAL; |
| else |
| return OP_PCACHE_GLOBAL; |
| } |
| |
| static const char * |
| persist_get_relevant_options(dcontext_t *dcontext, char *option_buf, |
| uint buf_len, op_pcache_t level) |
| { |
| if (level == OP_PCACHE_NOP) |
| return ""; |
| get_pcache_dynamo_options_string(&dynamo_options, option_buf, buf_len, level); |
| option_buf[buf_len - 1] = '\0'; |
| LOG(THREAD, LOG_CACHE, 2, "Pcache-affecting options = %s\n", option_buf); |
| return option_buf; |
| } |
| |
| /* We identify persisted caches by mapping module info into a canonical name. |
| * There can be collisions (including for sub-module coarse units, such as |
| * separate +x module sections (xref case 9834 and case 9653); in addition, we |
| * can have different modules map to the same name), so caller must further |
| * verify matches. |
| * (Note that we use a different scheme than aslr's calculate_publish_name() |
| * as we are not dealing with file handles here but in-memory module images). |
| */ |
| static bool |
| get_persist_filename(char *filename /*OUT*/, uint filename_max /* max #chars */, |
| app_pc modbase, bool write, persisted_module_info_t *modinfo, |
| const char *option_string) |
| { |
| uint checksum, timestamp; |
| size_t size, code_size; |
| uint64 file_version; |
| const char *name; |
| uint hash; |
| char dir[MAXIMUM_PATH]; |
| |
| os_get_module_info_lock(); |
| if (!os_get_module_info(modbase, &checksum, ×tamp, &size, |
| &name, &code_size, &file_version)) { |
| os_get_module_info_unlock(); |
| return false; |
| } |
| if (name == NULL) { |
| /* theoretically possible but pathological, unless we came in late */ |
| ASSERT_CURIOSITY(IF_WINDOWS_ELSE_0(!dr_early_injected)); |
| LOG(GLOBAL, LOG_CACHE, 1, "\tmodule "PFX" has no name\n", modbase); |
| os_get_module_info_unlock(); |
| return false; |
| } |
| /* Should not have path chars in the name */ |
| ASSERT(get_short_name(name) == name && name[0] != DIRSEP); |
| name = get_short_name(name); /* paranoid */ |
| |
| /* Exclude list applies to both read and write */ |
| if (!IS_STRING_OPTION_EMPTY(persist_exclude_list)) { |
| bool exclude; |
| string_option_read_lock(); |
| exclude = check_filter(DYNAMO_OPTION(persist_exclude_list), name); |
| string_option_read_unlock(); |
| if (exclude) { |
| LOG(GLOBAL, LOG_CACHE, 1, "\t%s is on exclude list\n", name); |
| DOSTATS({ |
| if (write) |
| STATS_INC(coarse_units_persist_excluded); |
| else |
| STATS_INC(perscache_load_excluded); |
| }); |
| os_get_module_info_unlock(); |
| return false; |
| } |
| } |
| |
| /* Prepend the perscache dir. We assume it has already been created. |
| * FIXME: cache this, or better, cache the dir handle and use an |
| * os_open that can take it in. Note that the directory handle |
| * doesn't help us in Linux - we can neither open files relative to it, |
| * nor there is any strong chown guarantee that we depend on. |
| */ |
| if (!get_persist_dir(dir, BUFFER_SIZE_ELEMENTS(dir), write)) { |
| os_get_module_info_unlock(); |
| return false; |
| } |
| |
| /* FIXME case 8494: version-independent names so we clobber files |
| * from old versions of modules and have less need of stale file |
| * cleanup? If so should add in (hash of) full path (w/ volume) |
| * to avoid name conflicts? But if try to share across machines |
| * we do not want to include path since can vary. |
| */ |
| /* should we go to a 64-bit hash? */ |
| IF_X64(ASSERT(CHECK_TRUNCATE_TYPE_uint(size))); |
| hash = checksum ^ timestamp ^ (uint)size; |
| /* case 9799: make options part of namespace */ |
| if (option_string != NULL) { |
| uint i; |
| ASSERT(DYNAMO_OPTION(persist_check_options)); |
| for (i = 0; i < strlen(option_string); i++) |
| hash ^= option_string[i] << ((i % 4)*8); |
| } |
| LOG(GLOBAL, LOG_CACHE, 2, "\thash = 0x%08x^0x%08x^"PFX" ^ %s = "PFX"\n", |
| checksum, timestamp, size, option_string == NULL ? "" : option_string, hash); |
| ASSERT_CURIOSITY(hash != 0); |
| |
| if (DYNAMO_OPTION(persist_per_app)) { |
| char *dirend = dir + strlen(dir); |
| /* FIXME case 9692: put tls offs instead of "dbg" here, and then |
| * sqlservr can have its own set if it ends up w/ separate tls offs |
| * (once we have non-per-app persisted files, that is). |
| */ |
| snprintf(dirend, BUFFER_SIZE_ELEMENTS(dir) - (dirend - dir), "%c%s%s", |
| DIRSEP, get_application_short_name(), IF_DEBUG_ELSE("-dbg", "")); |
| NULL_TERMINATE_BUFFER(dir); |
| LOG(GLOBAL, LOG_CACHE, 2, "\tper-app dir is %s\n", dir); |
| |
| /* check for existence first so we can require new during creation */ |
| if (!os_file_exists(dir, true/*is dir*/) && write) { |
| if (!os_create_dir(dir, CREATE_DIR_REQUIRE_NEW)) { |
| LOG(GLOBAL, LOG_CACHE, 2, "\terror creating per-app dir %s\n", dir); |
| os_get_module_info_unlock(); |
| return false; |
| } else |
| LOG(GLOBAL, LOG_CACHE, 2, "\tcreated per-app dir %s\n", dir); |
| } |
| } |
| /* FIXME PR 214088/case 9653: should we put the section ordinal or vmarea range into |
| * the name to support simultaneous sub-module files? If sections are |
| * adjacent they'll be one vmarea, so this affects very few dlls. For now |
| * we only support one file per module. We could also support multiple |
| * ranges per file. |
| */ |
| snprintf(filename, filename_max, "%s%c%s%s-0x%08x.%s", dir, DIRSEP, name, |
| IF_DEBUG_ELSE("-dbg", ""), hash, PERSCACHE_FILE_SUFFIX); |
| filename[filename_max-1] = '\0'; |
| os_get_module_info_unlock(); |
| if (modinfo != NULL) { |
| modinfo->base = modbase; |
| modinfo->checksum = checksum; |
| modinfo->timestamp = timestamp; |
| modinfo->image_size = size; |
| modinfo->code_size = code_size; |
| modinfo->file_version = file_version; |
| } |
| return true; |
| } |
| |
| #if defined(DEBUG) && defined(INTERNAL) |
| /* FIXME: share w/ aslr.c */ |
| static void |
| print_module_digest(file_t f, module_digest_t *digest, const char *prefix) |
| { |
| LOG(f, LOG_CACHE, 1, "%s\n md5 short: ", prefix); |
| dump_buffer_as_bytes(f, digest->short_MD5, MD5_RAW_BYTES, DUMP_RAW); |
| LOG(f, LOG_CACHE, 1, "\n md5 long: "); |
| dump_buffer_as_bytes(f, digest->full_MD5, MD5_RAW_BYTES, DUMP_RAW); |
| LOG(f, LOG_CACHE, 1, "\n"); |
| } |
| #endif |
| |
| static void |
| persist_calculate_self_digest(module_digest_t *digest, coarse_persisted_info_t *pers, |
| app_pc map, uint validation_option) |
| { |
| struct MD5Context self_md5_cxt; |
| if (TEST(PERSCACHE_GENFILE_MD5_COMPLETE, validation_option)) { |
| MD5Init(&self_md5_cxt); |
| /* Even if generated w/ -persist_map_rw_separate but loaded w/o that |
| * option, the md5 should match since the memory layout is the same. |
| */ |
| MD5Update(&self_md5_cxt, map, pers->header_len + pers->data_len |
| - sizeof(persisted_footer_t)); |
| MD5Final(digest->full_MD5, &self_md5_cxt); |
| } |
| if (TEST(PERSCACHE_GENFILE_MD5_SHORT, validation_option)) { |
| MD5Init(&self_md5_cxt); |
| MD5Update(&self_md5_cxt, (byte *) pers, pers->header_len); |
| MD5Final(digest->short_MD5, &self_md5_cxt); |
| } |
| } |
| |
| static void |
| persist_calculate_module_digest(module_digest_t *digest, app_pc modbase, size_t modsize, |
| app_pc code_start, app_pc code_end, |
| uint validation_option) |
| { |
| size_t view_size = modsize; |
| if (TESTANY(PERSCACHE_MODULE_MD5_COMPLETE|PERSCACHE_MODULE_MD5_SHORT, |
| validation_option)) { |
| /* case 9717: need view size, not image size */ |
| view_size = os_module_get_view_size(modbase); |
| } |
| if (TEST(PERSCACHE_MODULE_MD5_COMPLETE, validation_option)) { |
| /* We can't use a full md5 from module_calculate_digest() since .data |
| * and other sections change between persist and load times (this is |
| * in-memory image, not file). So we do md5 of code region. If we have |
| * hooks at persist time but not at load time we will cry foul; |
| * PERSCACHE_MODULE_MD5_AT_LOAD tries to get around this by using |
| * the load-time md5 when persisting. |
| */ |
| struct MD5Context code_md5_cxt; |
| MD5Init(&code_md5_cxt); |
| /* Code range should be within a single memory allocation so it should |
| * all be readable. Xref case 9653. |
| */ |
| code_end = MIN(code_end, modbase + view_size); |
| MD5Update(&code_md5_cxt, code_start, code_end - code_start); |
| MD5Final(digest->full_MD5, &code_md5_cxt); |
| } |
| if (TEST(PERSCACHE_MODULE_MD5_SHORT, validation_option)) { |
| /* Examine only the image header and the footer (if non-writable) |
| * FIXME: if view_size < modsize, better to skip the footer than have it |
| * cover a data section? Should be ok w/ PERSCACHE_MODULE_MD5_AT_LOAD. |
| */ |
| module_calculate_digest(digest, modbase, view_size, |
| false /* not full */, true /* yes short */, |
| DYNAMO_OPTION(persist_short_digest), |
| /* do not consider writable sections */ |
| ~((uint)OS_IMAGE_WRITE), OS_IMAGE_WRITE); |
| } |
| } |
| |
| /* Compares all but the module base */ |
| static bool |
| persist_modinfo_cmp(persisted_module_info_t *mi1, persisted_module_info_t *mi2) |
| { |
| bool match = true; |
| /* We'd like to know if we have an md5 mismatch */ |
| ASSERT_CURIOSITY(module_digests_equal(&mi1->module_md5, &mi2->module_md5, |
| TEST(PERSCACHE_MODULE_MD5_SHORT, |
| DYNAMO_OPTION(persist_load_validation)), |
| TEST(PERSCACHE_MODULE_MD5_COMPLETE, |
| DYNAMO_OPTION(persist_load_validation))) |
| /* relocs => md5 diffs, until we handle relocs wrt md5 */ |
| IF_WINDOWS(|| mi1->base != mi2->base) |
| || check_filter("win32.partial_map.exe", |
| get_short_name(get_application_name()))); |
| if (TESTALL(PERSCACHE_MODULE_MD5_SHORT|PERSCACHE_MODULE_MD5_COMPLETE, |
| DYNAMO_OPTION(persist_load_validation))) { |
| return (memcmp(&mi1->checksum, &mi2->checksum, |
| sizeof(*mi1)-offsetof(persisted_module_info_t, checksum)) == 0); |
| } |
| match = match && (memcmp(&mi1->checksum, &mi2->checksum, |
| offsetof(persisted_module_info_t, module_md5) - |
| offsetof(persisted_module_info_t, checksum)) == 0); |
| match = match && module_digests_equal(&mi1->module_md5, &mi2->module_md5, |
| TEST(PERSCACHE_MODULE_MD5_SHORT, |
| DYNAMO_OPTION(persist_load_validation)), |
| TEST(PERSCACHE_MODULE_MD5_COMPLETE, |
| DYNAMO_OPTION(persist_load_validation))); |
| return match; |
| } |
| |
| #ifdef WINDOWS |
| static void |
| persist_record_base_mismatch(app_pc modbase) |
| { |
| /* The idea is that we shouldn't waste our time re-persisting modules |
| * whose base keeps mismatching due to ASLR (we don't support rebasing |
| * pcaches yet). |
| * To record whether to not persist, we can't use a VM_ flag b/c |
| * no simple way to tell vmareas.c why a load failed so we use a |
| * module flag |
| */ |
| if (!DYNAMO_OPTION(coarse_freeze_rebased_aslr) && |
| os_module_has_dynamic_base(modbase)) |
| os_module_set_flag(modbase, MODULE_DO_NOT_PERSIST); |
| } |
| #endif |
| |
| /* key is meant to be a short string to help identify the purpose of this name. |
| * FIXME: right now up to caller to figure out if the name collided w/ an |
| * existing file; maybe this routine should do that and return a file handle? |
| * FIXME: combine w/ get_unique_logfile, which handles file creation race? |
| * As it is this is not mkstemp, and caller must use OS_OPEN_REQUIRE_NEW. |
| */ |
| static void |
| get_unique_name(const char *origname, const char *key, |
| char *filename /*OUT*/, uint filename_max /* max #chars */) |
| { |
| /* We need unique names for: |
| * 1) case 9696: a temp file to build our pcache in |
| * before renaming to the real thing |
| * 2) case 9701: to rename the existing file before we replace it, as for |
| * images or mmaps with file handles open we must rename before deleting. |
| */ |
| /* FIXME: should we use full 64-bit TSC instead of pseudo-random 32-bit? |
| * FIXME: if we make name w/ full path too long we'll truncate: |
| * could cache dir handle and use relative name only. |
| */ |
| /* update aslr_get_unique_wide_name() with any improvements here */ |
| size_t timestamp = get_random_offset(UINT_MAX); |
| LOG_DECLARE(int trunc =) /* for DEBUG and INTERNAL */ |
| snprintf(filename, filename_max, |
| "%s-"PIDFMT"-%010"SZFC"-%s", origname, |
| get_process_id(), timestamp, key); |
| ASSERT_CURIOSITY(trunc > 0 && trunc < (int)filename_max && |
| "perscache new name truncated"); |
| /* FIXME: case 10677 file name truncation */ |
| |
| filename[filename_max-1] = '\0'; |
| } |
| |
| |
| /* Merges a given frozen unit with any new persisted cache file on disk. |
| * Caller must hold read lock hotp_get_lock(), if -hot_patching. |
| * If merge is successful, returns a new coarse_info_t, which caller is |
| * responsible for freeing; else returns NULL. |
| */ |
| static coarse_info_t * |
| coarse_unit_merge_with_disk(dcontext_t *dcontext, coarse_info_t *info, |
| const char *filename) |
| { |
| coarse_info_t *merge_with, *postmerge = NULL; |
| uint64 file_size; |
| size_t existing_size; |
| /* We may have already merged new code with an inuse persisted unit, so we |
| * check the stored size of that one if info is not itself persisted. |
| * FIXME: we could store the file handle: can we tell if two file handles |
| * refer to the same file? |
| */ |
| size_t inuse_size = (info->persisted) ? info->mmap_size : |
| info->persisted_source_mmap_size; |
| |
| LOG(THREAD, LOG_CACHE, 2, "coarse_unit_merge_with_disk %s\n", info->module); |
| ASSERT(dynamo_all_threads_synched); |
| ASSERT(info != NULL && info->cache != NULL); /* don't merge empty units */ |
| ASSERT(info->frozen); |
| #ifdef HOT_PATCHING_INTERFACE |
| ASSERT_OWN_READ_LOCK(DYNAMO_OPTION(hot_patching), hotp_get_lock()); |
| #endif |
| |
| /* Strategy: check current pcache file size (not perfect but good enough): |
| * if different from source size, or source was not persisted, then |
| * load in and merge. |
| * FIXME case 10356: need a better check since can have false positive |
| * and false negatives by only looking at size. |
| * Could repeat, and could also check again after writing to tmp file but |
| * before renaming. FIXME: should we do those things to reduce the race |
| * window where we lose another process's appended code? |
| */ |
| if (!os_get_file_size(filename, &file_size)) { |
| LOG(THREAD, LOG_CACHE, 2, " no existing file %s to merge with\n", filename); |
| return postmerge; |
| } |
| ASSERT_TRUNCATE(existing_size, size_t, file_size); |
| existing_size = (size_t) file_size; |
| LOG(THREAD, LOG_CACHE, 2, " size of existing %s is "SZFMT" vs our "SZFMT"\n", |
| filename, existing_size, inuse_size); |
| if (existing_size == 0) |
| return postmerge; |
| /* Merge a non-persisted (and not merged with persisted) file w/ any on-disk file |
| * that has appeared since startup; or, our own, if we abandoned it but stayed |
| * coarse due to a reset or hotp flush. |
| */ |
| if ((!info->persisted && info->persisted_source_mmap_size == 0 && |
| DYNAMO_OPTION(coarse_lone_merge)) || |
| /* FIXME case 10356: need a better check since can have false positive |
| * and false negatives by only looking at size. |
| */ |
| (existing_size != inuse_size && DYNAMO_OPTION(coarse_disk_merge))) { |
| merge_with = coarse_unit_load(dcontext, info->base_pc, info->end_pc, |
| false/*not for execution*/); |
| /* We rely on coarse_unit_load to reject incompatible pcaches, whether for |
| * tls, trace support, or other reasons. We do need to check the region |
| * here. FIXME: once we support relocs we need to handle appropriately. |
| */ |
| if (merge_with != NULL) { |
| LOG(THREAD, LOG_CACHE, 2, " merging to-be-persisted %s with on-disk %s\n", |
| info->module, filename); |
| /* Case 8640: allow merging with a smaller on-disk file, to avoid |
| * being forever stuck at that size by prior cores with no IAT merging |
| * or other features */ |
| if (merge_with->base_pc >= info->base_pc && |
| merge_with->end_pc <= info->end_pc) { |
| /* If want in-place merging, need to arrange for ibl invalidation: |
| * case 11057 */ |
| postmerge = coarse_unit_merge(dcontext, info, merge_with, |
| false/*!in-place*/); |
| ASSERT(postmerge != NULL); |
| /* if NULL info is still guaranteed to be unchanged so carry on */ |
| DOSTATS({ |
| if (postmerge == NULL) |
| STATS_INC(coarse_merge_disk_fail); |
| else |
| STATS_INC(coarse_merge_disk); |
| }); |
| } else { |
| /* FIXME case 10357: w/o -unsafe_ignore_IAT_writes |
| * we're going to see a lot |
| * of this w/ process-shared pcaches. Should we abort |
| * the persist at this point to keep the small-region |
| * file on disk? Otherwise we're going to keep |
| * trading w/ the small-region process. |
| */ |
| LOG(THREAD, LOG_CACHE, 2, |
| " region mismatch: "PFX"-"PFX" on-disk vs "PFX"-"PFX" live\n", |
| merge_with->base_pc, merge_with->end_pc, |
| info->base_pc, info->end_pc); |
| STATS_INC(coarse_merge_disk_mismatch); |
| } |
| coarse_unit_reset_free(dcontext, merge_with, false/*no locks*/, |
| true/*need to unlink*/, false/*not in use anyway*/); |
| coarse_unit_free(dcontext, merge_with); |
| merge_with = NULL; |
| } else |
| STATS_INC(coarse_merge_disk_fail); |
| } |
| return postmerge; |
| } |
| |
| /* Calculates information for persisting that we don't need for online-generated |
| * units, such as hotp points and RAC/RCT info. |
| * If just_live is true, ignores any currently in-use persisted info, assuming |
| * it will be merged in in a merge step by an in-use persisted unit. |
| * Caller must hold change_linking_lock, read lock hotp_get_lock(), and info's lock. |
| */ |
| static void |
| coarse_unit_calculate_persist_info(dcontext_t *dcontext, coarse_info_t *info) |
| { |
| #ifdef HOT_PATCHING_INTERFACE |
| int len; |
| #endif |
| /* we need real dcontext for rac_entries_in_region() */ |
| ASSERT(dcontext != NULL && dcontext != GLOBAL_DCONTEXT); |
| |
| ASSERT_OWN_RECURSIVE_LOCK(true, &change_linking_lock); |
| #ifdef HOT_PATCHING_INTERFACE |
| ASSERT_OWN_READ_LOCK(DYNAMO_OPTION(hot_patching), hotp_get_lock()); |
| #endif |
| ASSERT_OWN_MUTEX(true, &info->lock); |
| LOG(THREAD, LOG_CACHE, 1, "coarse_unit_calculate_persist_info %s "PFX"-"PFX"\n", |
| info->module, info->base_pc, info->end_pc); |
| ASSERT(info->frozen && !info->persisted && !info->has_persist_info); |
| |
| if (DYNAMO_OPTION(coarse_freeze_elide_ubr)) |
| info->flags |= PERSCACHE_ELIDED_UBR; |
| #if defined(RETURN_AFTER_CALL) && defined(WINDOWS) |
| if (seen_Borland_SEH) |
| info->flags |= PERSCACHE_SEEN_BORLAND_SEH; |
| #endif |
| if (!DYNAMO_OPTION(disable_traces)) |
| info->flags |= PERSCACHE_SUPPORT_TRACES; |
| |
| #ifdef RCT_IND_BRANCH |
| ASSERT(info->rct_table == NULL); |
| if ((TEST(OPTION_ENABLED, DYNAMO_OPTION(rct_ind_call)) || |
| TEST(OPTION_ENABLED, DYNAMO_OPTION(rct_ind_jump))) && |
| (DYNAMO_OPTION(persist_rct) |
| # if defined(RETURN_AFTER_CALL) && defined(WINDOWS) |
| /* case 8648: we can have RCT entries that come from code patterns |
| * not derivable from relocation entries that we must persist. |
| */ |
| || os_module_get_flag(info->base_pc, MODULE_HAS_BORLAND_SEH) |
| # endif |
| )) { |
| app_pc limit_start = info->base_pc; |
| app_pc limit_end = info->end_pc; |
| if (DYNAMO_OPTION(persist_rct) && DYNAMO_OPTION(persist_rct_entire)) { |
| limit_start = 0; |
| limit_end = (app_pc) POINTER_MAX; |
| } |
| info->flags |= PERSCACHE_SUPPORT_RCT; |
| /* We don't support pulling out just entries from this sub-module-region */ |
| ASSERT(DYNAMO_OPTION(persist_rct_entire)); |
| info->flags |= PERSCACHE_ENTIRE_MODULE_RCT; |
| /* Get a copy of the live + persisted tables merged together */ |
| info->rct_table = rct_module_table_copy(dcontext, info->base_pc, RCT_RCT, |
| limit_start, limit_end); |
| } |
| #endif |
| #ifdef RETURN_AFTER_CALL |
| if (DYNAMO_OPTION(ret_after_call)) { |
| ASSERT(info->rac_table == NULL); |
| info->flags |= PERSCACHE_SUPPORT_RAC; |
| /* If we don't persist RAC we'll get violations when using! */ |
| /* Get a copy of the live + persisted tables merged together, |
| * but only the entries in this region. |
| * To make sure we have entries for call sites in the region, not targets, |
| * we ignore target just over start and include just over end: thus the |
| * +1 on both sides. |
| */ |
| info->rac_table = rct_module_table_copy(dcontext, info->base_pc, RCT_RAC, |
| info->base_pc+1, info->end_pc+1); |
| } |
| #endif |
| |
| #ifdef HOT_PATCHING_INTERFACE |
| if (DYNAMO_OPTION(hot_patching)) { |
| /* We expect 2 patch points per hotpatch on average; most have 1, |
| * some have up to 5. So we could hardcode a max length; instead |
| * we have a separate pass to get the size. |
| */ |
| /* FIXME: we could include the image entry point to avoid flushing the |
| * whole exe, but that only happens when we inject into secondary thread. |
| */ |
| /* FIXME: when merging live with in-use-persisted we don't need to |
| * re-calculate this and can just use the persisted array, since we would |
| * have flushed the unit on any new hotp |
| */ |
| ASSERT_OWN_READ_LOCK(DYNAMO_OPTION(hot_patching), hotp_get_lock()); |
| info->hotp_ppoint_vec_num = |
| hotp_num_matched_patch_points(info->base_pc, info->end_pc); |
| if (info->hotp_ppoint_vec_num > 0) { |
| info->hotp_ppoint_vec = |
| HEAP_ARRAY_ALLOC(dcontext, app_rva_t, info->hotp_ppoint_vec_num, |
| ACCT_HOT_PATCHING, PROTECTED); |
| len = hotp_get_matched_patch_points(info->base_pc, info->end_pc, |
| info->hotp_ppoint_vec, |
| info->hotp_ppoint_vec_num); |
| /* Should never have mismatch, as we're holding the hotp lock. |
| * Even if len < 0 the routine still fills up the array. */ |
| ASSERT(len == (int)info->hotp_ppoint_vec_num); |
| if (len != (int)info->hotp_ppoint_vec_num) { |
| /* abort writing out hotp points */ |
| info->hotp_ppoint_vec_num = 0; |
| } |
| LOG(THREAD, LOG_CACHE, 2, "hotp points for %s "PFX"-"PFX":\n", |
| info->module, info->base_pc, info->end_pc); |
| DODEBUG({ |
| uint i; |
| for (i = 0; i < info->hotp_ppoint_vec_num; i++) |
| LOG(THREAD, LOG_CACHE, 2, "\t"PFX"\n", info->hotp_ppoint_vec[i]); |
| }); |
| } else |
| ASSERT(info->hotp_ppoint_vec == NULL); |
| } else |
| ASSERT(info->hotp_ppoint_vec == NULL); |
| #endif |
| |
| info->has_persist_info = true; |
| } |
| |
| /* Merges the persist-calculated fields of info1 and info2 into dst, |
| * as well as non-persist-calculated fields like primary_for_module. |
| * Assumption: we've already checked for compatibility and here we just |
| * need to take the union (minus dups). |
| * Caller must hold info1's lock, and we must be dynamo_all_threads_synched. |
| */ |
| static void |
| coarse_unit_merge_persist_info(dcontext_t *dcontext, coarse_info_t *dst, |
| coarse_info_t *info1, coarse_info_t *info2) |
| { |
| ASSERT(dynamo_all_threads_synched); |
| LOG(THREAD, LOG_CACHE, 1, "coarse_unit_merge_persist_info %s "PFX"-"PFX"\n", |
| info1->module, info1->base_pc, info1->end_pc); |
| /* We can't grab both locks due to deadlock potential. Currently we are |
| * always fully synched, so we rely on that. |
| */ |
| ASSERT(dynamo_all_threads_synched); |
| ASSERT_OWN_MUTEX(true, &info1->lock); |
| |
| /* We need to incorporate flags from persisted units (Borland, e.g.) */ |
| dst->flags |= info1->flags; |
| dst->flags |= info2->flags; |
| /* Some flags are intersection rather than union (but those like |
| * PERSCACHE_SUPPORT_TRACES are weeded out at load time and should |
| * not differ here) |
| */ |
| if (!TEST(PERSCACHE_MAP_RW_SEPARATE, info1->flags) || |
| !TEST(PERSCACHE_MAP_RW_SEPARATE, info2->flags)) |
| dst->flags &= ~PERSCACHE_MAP_RW_SEPARATE; |
| /* Same bounds, so same persistence privileges */ |
| dst->primary_for_module = info1->primary_for_module || info2->primary_for_module; |
| |
| ASSERT(!info2->persisted || !info2->in_use || |
| info2->has_persist_info); /* must have to use */ |
| |
| /* Everything else is only necessary if we've already calculated persist |
| * info; if we haven't, then if we do persist later we'll calculate it |
| * fresh, so no need to calculate and merge here |
| */ |
| if (!info1->has_persist_info && !info2->has_persist_info) |
| return; |
| |
| if (!info1->has_persist_info) |
| coarse_unit_calculate_persist_info(dcontext, info1); |
| if (!info2->has_persist_info) |
| coarse_unit_calculate_persist_info(dcontext, info2); |
| ASSERT(info1->has_persist_info && info2->has_persist_info); |
| ASSERT(!dst->has_persist_info); |
| |
| ASSERT((info1->flags & |
| (PERSCACHE_SUPPORT_TRACES|PERSCACHE_SUPPORT_RAC|PERSCACHE_SUPPORT_RCT)) == |
| (info2->flags & |
| (PERSCACHE_SUPPORT_TRACES|PERSCACHE_SUPPORT_RAC|PERSCACHE_SUPPORT_RCT))); |
| |
| #ifdef RCT_IND_BRANCH |
| ASSERT(dst->rct_table == NULL); |
| if (TEST(OPTION_ENABLED, DYNAMO_OPTION(rct_ind_call)) || |
| TEST(OPTION_ENABLED, DYNAMO_OPTION(rct_ind_jump))) { |
| if (info2->persisted && info2->in_use && !info1->persisted) { |
| /* coarse_unit_calculate_persist_info already merged the new entries |
| * with the in-use persisted entries |
| */ |
| dst->rct_table = rct_table_copy(dcontext, info1->rct_table); |
| } else { |
| /* Note that we can't simply take one source if it has |
| * PERSCACHE_ENTIRE_MODULE_RCT as Borland can add entries: we must |
| * merge every time |
| */ |
| dst->rct_table = rct_table_merge(dcontext, info1->rct_table, |
| info2->rct_table); |
| } |
| } |
| #endif |
| #ifdef RETURN_AFTER_CALL |
| ASSERT(dst->rac_table == NULL); |
| if (DYNAMO_OPTION(ret_after_call)) { |
| if (info2->persisted && info2->in_use && !info1->persisted) { |
| /* coarse_unit_calculate_persist_info already merged the new entries |
| * with the in-use persisted entries |
| */ |
| dst->rac_table = rct_table_copy(dcontext, info1->rac_table); |
| } else { |
| dst->rac_table = rct_table_merge(dcontext, info1->rac_table, |
| info2->rac_table); |
| } |
| } |
| #endif |
| |
| #ifdef HOT_PATCHING_INTERFACE |
| ASSERT(dst->hotp_ppoint_vec == NULL); |
| if (info2->persisted && info2->in_use && !info1->persisted) { |
| /* We would have flushed info2 if a new hotp overlapped it so just |
| * take its hotp list. info1 may have a subset. |
| */ |
| dst->hotp_ppoint_vec_num = info2->hotp_ppoint_vec_num; |
| if (dst->hotp_ppoint_vec_num > 0) { |
| dst->hotp_ppoint_vec = |
| HEAP_ARRAY_ALLOC(dcontext, app_rva_t, dst->hotp_ppoint_vec_num, |
| ACCT_HOT_PATCHING, PROTECTED); |
| memcpy(dst->hotp_ppoint_vec, info2->hotp_ppoint_vec, |
| dst->hotp_ppoint_vec_num*sizeof(app_rva_t)); |
| } |
| } else { |
| /* We expect <= 5 entries in each array so quadratic and two passes |
| * shouldn't be a perf issue. |
| */ |
| ASSERT(dst->hotp_ppoint_vec_num == 0); |
| ASSERT(sizeof(app_rva_t) == sizeof(void *)); |
| array_merge(dcontext, true /* intersect */, |
| (void **) info1->hotp_ppoint_vec, info1->hotp_ppoint_vec_num, |
| (void **) info2->hotp_ppoint_vec, info2->hotp_ppoint_vec_num, |
| (void ***) &dst->hotp_ppoint_vec, |