blob: 9ffe341dd441529f862a1a47ee94f5c518e47047 [file] [log] [blame]
/* **********************************************************
* Copyright (c) 2011-2014 Google, Inc. All rights reserved.
* Copyright (c) 2006-2010 VMware, Inc. All rights reserved.
* **********************************************************/
/*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* * Neither the name of VMware, Inc. nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
/* Copyright (c) 2006-2007 Determina Corp. */
/*
* perscache.c - coarse-grain units and persistent cache management
*/
#include "globals.h"
#include "link.h"
#include "fragment.h"
#include "fcache.h"
#include "monitor.h"
#include "perscache.h"
#include "instr.h"
#include "decode_fast.h"
#include "hotpatch.h"
#include "synch.h"
#include "module_shared.h"
#include <string.h> /* for memset */
#include <stddef.h> /* for offsetof */
#ifdef CLIENT_INTERFACE
# include "instrument.h"
#endif
#ifdef DEBUG
# include "disassemble.h"
#endif
#define MAX_PCACHE_OPTIONS_STRING (MAX_OPTIONS_STRING/2)
/* case 10823: align option string to keep hashtable data aligned.
* we're not using a cache-line-aligned lookuptable. */
#define OPTION_STRING_ALIGNMENT (sizeof(app_pc))
/* in general we want new data sections aligned to keep hashtable aligned */
#define CLIENT_ALIGNMENT (sizeof(app_pc))
/* used while merging */
typedef struct _jmp_tgt_list_t {
app_pc tag;
cache_pc jmp_end_pc;
struct _jmp_tgt_list_t *next;
} jmp_tgt_list_t;
/* Forward decls */
static void
persist_calculate_module_digest(module_digest_t *digest, app_pc modbase, size_t modsize,
app_pc code_start, app_pc code_end,
uint validation_option);
static bool
get_persist_dir(char *directory /* OUT */,
uint directory_len,
bool create);
#if defined(DEBUG) && defined(INTERNAL)
static void
print_module_digest(file_t f, module_digest_t *digest, const char *prefix);
#endif
static void
coarse_unit_shift_jmps(dcontext_t *dcontext, coarse_info_t *info,
ssize_t cache_shift, ssize_t stubs_shift, size_t old_mapsz);
static void
coarse_unit_merge_persist_info(dcontext_t *dcontext, coarse_info_t *dst,
coarse_info_t *info1, coarse_info_t *info2);
#ifdef DEBUG
/* used below for pcache_dir_check_permissions() */
DECLARE_CXTSWPROT_VAR(static mutex_t pcache_dir_check_lock,
INIT_LOCK_FREE(pcache_dir_check_lock));
#endif
/***************************************************************************
* COARSE-GRAIN UNITS
*/
/* case 9653/10380: only one coarse unit in a module's +x region(s) is persisted */
static void
coarse_unit_mark_primary(coarse_info_t *info)
{
if (!info->in_use)
return;
#ifdef WINDOWS
/* FIXME PR 295529: put in for Linux once we have per-module flags */
/* Go ahead and get write lock up front; else have to check again; not
* frequently called so don't need perf opt here.
*/
os_get_module_info_write_lock();
if (!os_module_get_flag(info->base_pc, MODULE_HAS_PRIMARY_COARSE)) {
os_module_set_flag(info->base_pc, MODULE_HAS_PRIMARY_COARSE);
ASSERT(os_module_get_flag(info->base_pc, MODULE_HAS_PRIMARY_COARSE));
info->primary_for_module = true;
LOG(GLOBAL, LOG_CACHE, 1, "marking "PFX"-"PFX" as primary coarse for %s\n",
info->base_pc, info->end_pc, info->module);
}
os_get_module_info_write_unlock();
#else
info->primary_for_module = true;
#endif
}
static void
coarse_unit_unmark_primary(coarse_info_t *info)
{
#ifdef WINDOWS
/* FIXME PR 295529: put in for Linux once we have per-module flags */
if (info->primary_for_module && info->in_use) {
ASSERT(os_module_get_flag(info->base_pc, MODULE_HAS_PRIMARY_COARSE));
os_module_clear_flag(info->base_pc, MODULE_HAS_PRIMARY_COARSE);
info->primary_for_module = false;
}
#else
info->primary_for_module = false;
#endif
}
void
coarse_unit_mark_in_use(coarse_info_t *info)
{
info->in_use = true;
coarse_unit_mark_primary(info);
}
coarse_info_t *
coarse_unit_create(app_pc base_pc, app_pc end_pc, module_digest_t *digest,
bool for_execution)
{
coarse_info_t *info = HEAP_TYPE_ALLOC(GLOBAL_DCONTEXT, coarse_info_t,
/* FIXME: have separate heap acct? */
ACCT_VMAREAS, PROTECTED);
memset(info, 0, sizeof(*info));
ASSIGN_INIT_LOCK_FREE(info->lock, coarse_info_lock);
ASSIGN_INIT_LOCK_FREE(info->incoming_lock, coarse_info_incoming_lock);
info->base_pc = base_pc;
/* XXX i#704: handle overflow: better to store size */
info->end_pc = end_pc;
/* FIXME: set PERSCACHE_X86_{32,64} here since for x64 the live
* unit's flags are used for 32-bit code in 64-bit processes.
* app_memory_allocation() may need a "bool x86_mode" param that is at
* least passed in to here: not clear if it should be stored in the
* vmarea.
*/
DODEBUG({
info->is_local = false;
info->module = os_get_module_name_strdup(info->base_pc HEAPACCT(ACCT_VMAREAS));
if (info->module == NULL) {
/* else our LOG statements will crash */
info->module = dr_strdup("" HEAPACCT(ACCT_VMAREAS));
}
});
if (for_execution)
coarse_unit_mark_in_use(info);
if (digest != NULL) {
memcpy(&info->module_md5, digest, sizeof(info->module_md5));
} else if (TEST(PERSCACHE_MODULE_MD5_AT_LOAD,
DYNAMO_OPTION(persist_gen_validation))) {
/* case 9735: calculate the module md5 at load time so we have a consistent
* point at which to compare it when loading in a persisted cache file.
* If we inject at different points we may see different views of
* post-loader vs pre-loader module changes but we'll live with that.
* Should have consistent injection points in steady state usage.
* FIXME PR 215036: for 4.4 we'll want to not record the at-mmap md5, but
* rather the 1st-execution-time post-rebase md5.
*/
app_pc modbase = get_module_base(info->base_pc);
size_t modsize;
os_get_module_info_lock();
/* For linux we can't do module segment walking at initial mmap time
* b/c the segments are not set up: we hit SIGBUS!
*/
IF_UNIX(ASSERT_BUG_NUM(215036, true));
if (os_get_module_info(modbase, NULL, NULL, &modsize,
NULL, NULL, NULL)) {
os_get_module_info_unlock();
persist_calculate_module_digest(&info->module_md5, modbase, modsize,
info->base_pc, info->end_pc,
DYNAMO_OPTION(persist_gen_validation));
DOLOG(1, LOG_CACHE, {
print_module_digest(GLOBAL, &info->module_md5, "md5 at load time: ");
});
} else
os_get_module_info_unlock();
}
/* the rest is initialized lazily in coarse_unit_init() */
RSTATS_ADD_PEAK(num_coarse_units, 1);
return info;
}
void
coarse_unit_free(dcontext_t *dcontext, coarse_info_t *info)
{
ASSERT(info != NULL);
/* Elements should have been freed in coarse_unit_reset_free() */
ASSERT(info->htable == NULL);
ASSERT(info->th_htable == NULL);
ASSERT(info->pclookup_htable == NULL);
ASSERT(info->cache == NULL);
ASSERT(info->incoming == NULL);
ASSERT(info->stubs == NULL);
ASSERT(info->cache_start_pc == NULL);
ASSERT(info->stubs_start_pc == NULL);
DODEBUG({
if (info->module != NULL)
dr_strfree(info->module HEAPACCT(ACCT_VMAREAS));
});
DELETE_LOCK(info->lock);
DELETE_LOCK(info->incoming_lock);
HEAP_TYPE_FREE(GLOBAL_DCONTEXT, info, coarse_info_t,
ACCT_VMAREAS, PROTECTED);
RSTATS_DEC(num_coarse_units);
}
void
coarse_unit_init(coarse_info_t *info, void *cache)
{
ASSERT(info != NULL);
ASSERT(cache != NULL);
ASSERT_OWN_MUTEX(true, &info->lock);
fragment_coarse_htable_create(info, 0, 0);
coarse_stubs_create(info, NULL, 0);
/* cache is passed in since it can't be created while holding info->lock */
info->cache = cache;
}
/* If caller holds change_linking_lock and info->lock, have_locks should be true.
* If !need_info_lock, info must be a thread-local, unlinked, private pointer!
*/
static void
coarse_unit_reset_free_internal(dcontext_t *dcontext, coarse_info_t *info,
bool have_locks, bool unlink, bool abdicate_primary,
bool need_info_lock)
{
DEBUG_DECLARE(bool ok;)
ASSERT(info != NULL);
LOG(THREAD, LOG_CACHE, 2, "coarse_unit_reset_free %s\n", info->module);
if (!have_locks) {
/* Though only called during all-threads-synch, we still grab our lock here */
/* higher rank than info, needed for unlink */
if (unlink)
acquire_recursive_lock(&change_linking_lock);
if (need_info_lock)
mutex_lock(&info->lock);
}
ASSERT(!unlink || self_owns_recursive_lock(&change_linking_lock));
ASSERT_OWN_MUTEX(need_info_lock, &info->lock);
ASSERT(need_info_lock || !unlink); /* else will get deadlock */
/* case 11064: avoid rank order */
DODEBUG({
if (!need_info_lock)
info->is_local = true;
});
if (unlink)
coarse_unit_unlink(dcontext, info);
fragment_coarse_htable_free(info);
coarse_stubs_delete(info);
fcache_coarse_cache_delete(dcontext, info);
if (info->in_use && abdicate_primary)
coarse_unit_unmark_primary(info);
if (info->frozen) {
ASSERT(info->mmap_size > 0);
if (info->persisted) {
#if defined(RETURN_AFTER_CALL) || defined(RCT_IND_BRANCH)
if (info->in_use) {
rct_module_table_persisted_invalidate(dcontext, info->base_pc);
}
#endif
/* We use GLOBAL_DCONTEXT always for these in case in use */
#ifdef RCT_IND_BRANCH
if (info->rct_table != NULL)
rct_table_free(GLOBAL_DCONTEXT, info->rct_table, false/*data mmapped*/);
#endif
#ifdef RETURN_AFTER_CALL
if (info->rac_table != NULL)
rct_table_free(GLOBAL_DCONTEXT, info->rac_table, false/*data mmapped*/);
#endif
ASSERT(info->mmap_pc != NULL);
if (info->mmap_ro_size > 0) {
/* two views */
DEBUG_DECLARE(ok =) unmap_file(info->mmap_pc, info->mmap_ro_size);
ASSERT(ok);
DEBUG_DECLARE(ok =) unmap_file(info->mmap_pc+info->mmap_ro_size,
info->mmap_size-info->mmap_ro_size);
ASSERT(ok);
info->mmap_ro_size = 0;
} else {
DEBUG_DECLARE(ok =) unmap_file(info->mmap_pc, info->mmap_size);
ASSERT(ok);
}
if (DYNAMO_OPTION(persist_lock_file)) {
ASSERT(info->fd != INVALID_FILE);
os_close(info->fd);
info->fd = INVALID_FILE;
}
} else {
ASSERT(info->cache_start_pc != NULL);
ASSERT(info->stubs_start_pc != NULL);
ASSERT(info->mmap_ro_size == 0);
heap_munmap(info->cache_start_pc, info->mmap_size);
if (info->has_persist_info) {
/* Persisted units point at their mmaps for these structures;
* non-persisted dynamically allocate them from DR heap.
*/
#ifdef RCT_IND_BRANCH
if (info->rct_table != NULL)
rct_table_free(dcontext, info->rct_table, true);
#endif
#ifdef RETURN_AFTER_CALL
if (info->rac_table != NULL)
rct_table_free(dcontext, info->rac_table, true);
#endif
#ifdef HOT_PATCHING_INTERFACE
if (info->hotp_ppoint_vec != NULL) {
HEAP_ARRAY_FREE(dcontext, info->hotp_ppoint_vec, app_rva_t,
info->hotp_ppoint_vec_num, ACCT_HOT_PATCHING, PROTECTED);
}
#endif
}
}
} else {
ASSERT(info->mmap_size == 0);
ASSERT(info->cache_start_pc == NULL);
ASSERT(info->stubs_start_pc == NULL);
ASSERT(!info->has_persist_info);
}
/* This struct may be re-used for a non-frozen/persisted unit if it was reset due
* to a non-cache-consistency reason. Thus we want to preserve the locks, vm
* region, and md5, but clear everything else (case 10119).
*/
memset(info, 0, offsetof(coarse_info_t, lock));
if (!have_locks) {
if (need_info_lock)
mutex_unlock(&info->lock);
if (unlink)
release_recursive_lock(&change_linking_lock);
}
}
/* If caller holds change_linking_lock and info->lock, have_locks should be true */
void
coarse_unit_reset_free(dcontext_t *dcontext, coarse_info_t *info,
bool have_locks, bool unlink, bool abdicate_primary)
{
coarse_unit_reset_free_internal(dcontext, info,
have_locks, unlink, abdicate_primary,
true/*need_info_lock*/);
}
/* currently only one such directory expected matching
* primary user token, see case 8812
*/
static file_t perscache_user_directory = INVALID_FILE;
void
perscache_init(void)
{
if (DYNAMO_OPTION(use_persisted) &&
DYNAMO_OPTION(persist_per_user) &&
DYNAMO_OPTION(validate_owner_dir)) {
char dir[MAXIMUM_PATH];
/* case 8812 we need to hold a handle to the user directory
* from startup (we could delay until we open our first pcache file)
*/
if (get_persist_dir(dir, BUFFER_SIZE_ELEMENTS(dir),
true /* note we MUST always create directory
* even if never persisting */
)) {
/* we just need READ_CONTROL (on Windows) to check
* ownership, and we are NOT OK with the directory being
* renamed (or deleted and recreated by a malactor) while
* we still have a handle to it.
*/
perscache_user_directory = os_open_directory(dir, 0);
ASSERT(perscache_user_directory != INVALID_FILE);
/* note that now that we have the actual handle open, we can validate */
/* see os_current_user_directory() for details */
if (perscache_user_directory != INVALID_FILE &&
!os_validate_user_owned(perscache_user_directory)) {
SYSLOG_INTERNAL_ERROR("%s is OWNED by an impostor!"
" Persistent cache use is disabled.",
dir);
os_close(perscache_user_directory);
perscache_user_directory = INVALID_FILE;
/* we could also turn off use_persisted */
} else {
/* either FAT32 or we are the proper owner */
/* FIXME: we have to verify that the final permissions and
* sharing attributes for cache/ and for the current
* directory, do NOT allow anyone to rename our directory
* while in use, and replace it. Otherwise we'd still
* have to verify owner for each file as well with
* -validate_owner_file. See duplicate comment in
* open_relocated_dlls_filecache_directory()
*/
}
}
}
}
void
perscache_fast_exit(void)
{
if (DYNAMO_OPTION(coarse_freeze_at_exit)) {
coarse_units_freeze_all(false/*!in place*/);
}
if (perscache_user_directory != INVALID_FILE) {
ASSERT_CURIOSITY(DYNAMO_OPTION(validate_owner_dir));
os_close(perscache_user_directory);
perscache_user_directory = INVALID_FILE;
}
ASSERT(perscache_user_directory == INVALID_FILE);
}
void
perscache_slow_exit(void)
{
DODEBUG(DELETE_LOCK(pcache_dir_check_lock););
}
/***************************************************************************
* FROZEN UNITS
*/
/* Separated out to keep priv_mcontext_t out of critical stack path */
static void
coarse_units_freeze_translate(thread_record_t *tr,
const thread_synch_state_t desired_state)
{
priv_mcontext_t mc;
bool res;
res = thread_get_mcontext(tr, &mc);
ASSERT(res);
/* We're freeing coarse fragments so we must translate all
* threads who are currently in a coarse unit, or about
* to enter one (case 10030). We don't translate threads
* in fine-grained caches as an optimization.
* If we did one unit at a time, could compare to just that unit.
*/
if (!res || !in_fcache((cache_pc)mc.pc) ||
get_fcache_coarse_info((cache_pc)mc.pc) != NULL) {
/* FIXME optimization: pass cxt for translation */
translate_from_synchall_to_dispatch(tr, desired_state);
} else {
LOG(GLOBAL, LOG_FRAGMENT, 2,
"\tin fine-grained cache so no translation needed\n");
}
}
/* If !in_place this routine freezes (if not already) and persists.
* FIXME case 9975: provide support for freezing in place and
* persisting in one call? Should we support loading in a newly
* persisted version to replace the in-memory unit?
*/
void
coarse_units_freeze_all(bool in_place)
{
thread_record_t **threads = NULL;
int i, num_threads = 0;
bool own_synch;
dcontext_t *my_dcontext = get_thread_private_dcontext();
const thread_synch_state_t desired_state =
THREAD_SYNCH_SUSPENDED_VALID_MCONTEXT_OR_NO_XFER;
if (!DYNAMO_OPTION(coarse_units) || !DYNAMO_OPTION(coarse_enable_freeze) ||
RUNNING_WITHOUT_CODE_CACHE())
return;
KSTART(coarse_freeze_all);
/* on a detach we don't need to synch or xlate the threads */
own_synch = !dynamo_all_threads_synched;
ASSERT(own_synch IF_WINDOWS(|| doing_detach));
STATS_INC(coarse_freezes);
if (own_synch) {
/* Called from nudge threads from the code cache, so
* if the calling fragment could be coarse, we have to
* terminate this thread. Case 8711 does not allow calls from
* coarse fragments so we're fine for now.
*/
if (!synch_with_all_threads(desired_state, &threads, &num_threads,
/* FIXME: can we set mcontext->pc to next_tag and
* use THREAD_SYNCH_VALID_MCONTEXT? not if nudge
* comes here */
THREAD_SYNCH_NO_LOCKS_NO_XFER, /* Case 6821 */
/* if we fail to suspend a thread (e.g., for
* privilege reasons) just abort */
THREAD_SYNCH_SUSPEND_FAILURE_ABORT
/* if we get in a race with detach, or are having
* synch issues for whatever reason, bail out sooner
* rather than later */
| THREAD_SYNCH_SMALL_LOOP_MAX)) {
/* just give up */
ASSERT(!OWN_MUTEX(&all_threads_synch_lock) &&
!OWN_MUTEX(&thread_initexit_lock));
ASSERT(threads == NULL);
ASSERT(!dynamo_all_threads_synched);
STATS_INC(coarse_freeze_abort);
LOG(GLOBAL, LOG_CACHE, 2,
"coarse_unit_freeze: aborting due to thread synch failure\n");
SYSLOG_INTERNAL_WARNING("coarse freeze aborted due to thread synch failure");
KSTOP(coarse_freeze_all);
return;
}
}
ASSERT(dynamo_all_threads_synched);
ASSERT(OWN_MUTEX(&all_threads_synch_lock) && OWN_MUTEX(&thread_initexit_lock));
DOSTATS({
SYSLOG_INTERNAL_INFO("freezing all coarse units @ "SSZFMT" fragments",
GLOBAL_STAT(num_fragments));
});
/* This routine does the actual freeze and persist calls
* FIXME case 9641: should we end the synch after freezing
* so other threads can make progress while we persist?
*/
vm_area_coarse_units_freeze(in_place);
if (in_place && own_synch) {
DEBUG_DECLARE(uint removed;)
for (i = 0; i < num_threads; i++) {
dcontext_t *dcontext = threads[i]->dcontext;
if (dcontext != NULL && dcontext != my_dcontext) {
/* FIXME: share these checks w/ other synchall-and-abort users
* (reset) and synchall-and-don't-abort (flush).
*/
/* Should have aborted if we had any synch failures */
ASSERT(thread_synch_successful(threads[i]));
if (is_thread_currently_native(threads[i])) {
/* Whether in native_exec or we lost control, since we're not
* freeing the interception buffer, no state to worry about.
*/
LOG(GLOBAL, LOG_FRAGMENT, 2,
"\tcurrently native so no translation needed\n");
} else if (thread_synch_state_no_xfer(dcontext)) {
/* Case 6821: do not translate other synch-all-thread users. */
LOG(GLOBAL, LOG_FRAGMENT, 2,
"\tat THREAD_SYNCH_NO_LOCKS_NO_XFER so no translation needed\n");
} else {
/* subroutine to avoid priv_mcontext_t on our stack when we
* freeze + merge&load */
coarse_units_freeze_translate(threads[i], desired_state);
}
last_exit_deleted(dcontext);
if (is_building_trace(dcontext)) {
LOG(THREAD, LOG_FRAGMENT, 2,
"\tsquashing trace of thread "TIDFMT"\n", i);
trace_abort(dcontext);
}
if (DYNAMO_OPTION(bb_ibl_targets)) {
/* FIXME: we could just remove the coarse ibl entries */
DEBUG_DECLARE(removed =)
fragment_remove_all_ibl_in_region(dcontext,
UNIVERSAL_REGION_BASE,
UNIVERSAL_REGION_END);
LOG(THREAD, LOG_FRAGMENT, 2, "\tremoved %d ibl entries\n", removed);
}
}
}
if (DYNAMO_OPTION(bb_ibl_targets)) {
/* FIXME: we could just remove the coarse ibl entries */
DEBUG_DECLARE(removed =)
fragment_remove_all_ibl_in_region(GLOBAL_DCONTEXT,
UNIVERSAL_REGION_BASE,
UNIVERSAL_REGION_END);
LOG(GLOBAL, LOG_FRAGMENT, 2, "\tremoved %d ibl entries\n", removed);
}
}
if (own_synch)
end_synch_with_all_threads(threads, num_threads, true/*resume*/);
KSTOP(coarse_freeze_all);
}
/* Removes dst's data and replaces it with src's data. Frees src.
* Assumes that src is thread-local and not reachable by any other thread,
* and that dst's lock is held.
*/
static void
coarse_replace_unit(dcontext_t *dcontext, coarse_info_t *dst, coarse_info_t *src)
{
/* Perhaps we should separately allocate the locks to avoid this copying
* for preservation? Or memcpy all but the lock fields? Or delete and
* re-init them? If we move to a model where the world isn't suspended
* we have to ensure no other thread is trying to lock.
*/
coarse_info_t *non_frozen;
mutex_t temp_lock, temp_incoming_lock;
DEBUG_DECLARE(const char *modname;)
ASSERT_OWN_MUTEX(true, &dst->lock);
mutex_lock(&dst->incoming_lock);
ASSERT(src->incoming == NULL); /* else we leak */
src->incoming = dst->incoming;
dst->incoming = NULL; /* do not free incoming */
mutex_unlock(&dst->incoming_lock);
non_frozen = dst->non_frozen;
coarse_unit_reset_free(dcontext, dst, true/*have locks*/,
false/*do not unlink*/, false/*keep primary*/);
temp_lock = dst->lock;
temp_incoming_lock = dst->incoming_lock;
DODEBUG({ modname = dst->module; });
memcpy(dst, src, sizeof(*dst));
dst->lock = temp_lock;
dst->incoming_lock = temp_incoming_lock;
dst->non_frozen = non_frozen;
DODEBUG({ dst->module = modname; });
ASSERT(dst->incoming == src->incoming);
/* update pointers from src to dst */
fcache_coarse_set_info(dcontext, dst);
patch_coarse_exit_prefix(dcontext, dst);
coarse_stubs_set_info(dst);
DODEBUG({
/* avoid asserts */
src->htable = NULL;
src->th_htable = NULL;
src->pclookup_htable = NULL;
src->cache = NULL;
src->incoming = NULL;
src->stubs = NULL;
src->cache_start_pc = NULL;
src->stubs_start_pc = NULL;
});
coarse_unit_free(dcontext, src);
}
/* In-place freezing replaces info with a frozen copy.
* Otherwise, a new copy is created for persisting, while the original
* copy is undisturbed and unfrozen.
* Only purpose of !in_place is to write out to disk a
* snapshot while letting coarse unit creation continue. Can
* write in_place out to disk as well, so we leave that up to
* caller.
* Caller must hold change_linking_lock.
* If in_place, caller is responsible for flushing the ibl tables (case 11057).
*/
coarse_info_t *
coarse_unit_freeze(dcontext_t *dcontext, coarse_info_t *info, bool in_place)
{
coarse_info_t *frozen = NULL;
coarse_info_t *res = NULL;
size_t frozen_stub_size, frozen_cache_size;
uint num_fragments, num_stubs;
coarse_freeze_info_t *freeze_info =
HEAP_TYPE_ALLOC(dcontext, coarse_freeze_info_t,
ACCT_MEM_MGT/*appropriate?*/, PROTECTED);
LOG(THREAD, LOG_CACHE, 2, "coarse_unit_freeze %s\n", info->module);
STATS_INC(coarse_units_frozen);
/* FIXME: Suspend world needed if not in-place? Even though unit lock
* is not held when changing unit links (e.g., unit flush holds only
* target unit lock when unlinking incoming), the change_linking_lock
* should give us guarantees.
* But if we don't suspend-all we'll have other issues:
* - fcache_coarse_init_frozen() will need to
* grab the shared cache lock, which is higher rank than coarse unit lock!
* - same issue w/ fcache_coarse_cache_delete (via coarse_unit_reset_free)
*/
/* FIXME: support single-unit freeze by having this routine itself
* do a synch-all?
*/
ASSERT(dynamo_all_threads_synched);
ASSERT(info != NULL);
ASSERT_OWN_RECURSIVE_LOCK(true, &change_linking_lock);
/* trigger lazy initialize to avoid deadlock on calling
* coarse_cti_is_intra_fragment() during shifting
*/
fragment_coarse_create_entry_pclookup_table(dcontext, info);
mutex_lock(&info->lock);
ASSERT(info->cache != NULL); /* don't freeze empty units */
ASSERT(!info->frozen); /* don't freeze already frozen units */
if (info->cache == NULL || info->frozen)
goto coarse_unit_freeze_exit;
/* invalid unit shouldn't get this far */
ASSERT(!TEST(PERSCACHE_CODE_INVALID, info->flags));
if (TEST(PERSCACHE_CODE_INVALID, info->flags)) /* paranoid */
goto coarse_unit_freeze_exit;
memset(freeze_info, 0, sizeof(*freeze_info));
freeze_info->src_info = info;
/* Tasks:
* 1) Calculate final size of cache and stub space:
* Walk entrance stubs and count how many are intra-unit links
* that can be changed to direct jmps
* 2) Create single contiguous region to hold both cache and stubs,
* rounding up to a page boundary in the middle for +r->+rw
* 3) Copy each fragment and stub over
*
* FIXME case 9428: shrink the cache to take advantage of elided jmps!
* Requires a separate pass to touch up jmps to stubs/prefixes, or
* re-ordering w/ stubs on top and cache on bottom. That would also
* put a read-only page at the end, so no guard page needed -- unless we
* hook our own cache (case 9673) and are worried about brief periods of +w.
*/
frozen_stub_size = coarse_frozen_stub_size(dcontext, info, &num_fragments,
&num_stubs);
frozen_cache_size = coarse_frozen_cache_size(dcontext, info);
/* we need the stubs to start on a new page since will be +rw vs cache +r */
frozen_cache_size = ALIGN_FORWARD(frozen_cache_size, PAGE_SIZE);
freeze_info->cache_start_pc =
(cache_pc) heap_mmap(frozen_stub_size + frozen_cache_size);
/* FIXME: should show full non-frozen size as well */
LOG(THREAD, LOG_CACHE, 2,
"%d frozen stubs @ "SZFMT" bytes + %d fragments @ "SZFMT" bytes => "PFX"\n",
num_stubs, frozen_stub_size, num_fragments,
frozen_cache_size, freeze_info->cache_start_pc);
STATS_ADD(coarse_fragments_frozen, num_fragments);
/* We use raw pcs to build up our cache and stubs, and later we impose
* our regular data structures on them
*/
/* Whether freezing in-place or not, we create a new coarse_info_t.
* If in-place we delete the old one afterward.
*/
frozen = coarse_unit_create(info->base_pc, info->end_pc, &info->module_md5,
in_place && info->in_use);
freeze_info->dst_info = frozen;
frozen->frozen = true;
frozen->cache_start_pc = freeze_info->cache_start_pc;
frozen->mmap_size = frozen_stub_size + frozen_cache_size;
/* Our relative jmps require that we do not exceed 32-bit reachability */
IF_X64(ASSERT(CHECK_TRUNCATE_TYPE_int(frozen->mmap_size)));
/* Same bounds, so same persistence privileges */
frozen->primary_for_module = info->primary_for_module;
freeze_info->stubs_start_pc =
coarse_stubs_create(frozen, freeze_info->cache_start_pc + frozen_cache_size,
frozen_stub_size);
ASSERT(freeze_info->stubs_start_pc != NULL);
ASSERT(ALIGNED(freeze_info->stubs_start_pc, coarse_stub_alignment(info)));
frozen->stubs_start_pc = freeze_info->stubs_start_pc;
ASSERT(frozen->fcache_return_prefix ==
freeze_info->cache_start_pc + frozen_cache_size);
#if 0
ASSERT(frozen->trace_head_return_prefix == frozen->fcache_return_prefix +
(info->trace_head_return_prefix - info->fcache_return_prefix));
ASSERT(frozen->ibl_ret_prefix == frozen->fcache_return_prefix +
(info->ibl_ret_prefix - info->fcache_return_prefix));
ASSERT(frozen->ibl_call_prefix == frozen->fcache_return_prefix +
(info->ibl_call_prefix - info->fcache_return_prefix));
ASSERT(frozen->ibl_jmp_prefix == frozen->fcache_return_prefix +
(info->ibl_jmp_prefix - info->fcache_return_prefix));
#endif
fragment_coarse_htable_create(frozen, num_fragments, num_stubs);
fcache_coarse_init_frozen(dcontext, frozen, freeze_info->cache_start_pc,
frozen_cache_size);
/* assumption: leave inter-unit links intact for in_place, but not (for
* persisting) otherwise
*/
freeze_info->unlink = !in_place;
freeze_info->cache_cur_pc = freeze_info->cache_start_pc;
freeze_info->stubs_cur_pc = freeze_info->stubs_start_pc;
fragment_coarse_unit_freeze(dcontext, freeze_info);
ASSERT(freeze_info->pending == NULL);
ASSERT(freeze_info->cache_cur_pc <=
freeze_info->cache_start_pc + frozen_cache_size);
ASSERT(freeze_info->stubs_cur_pc <=
freeze_info->stubs_start_pc + frozen_stub_size);
if (frozen->fcache_return_prefix + frozen_stub_size ==
freeze_info->stubs_cur_pc)
frozen->stubs_end_pc = freeze_info->stubs_cur_pc;
else {
/* FIXME case 9428: strange history here: I don't see a problem now,
* but leaving some release-build code just in case.
*/
ASSERT_NOT_REACHED();
coarse_stubs_set_end_pc(frozen, freeze_info->stubs_cur_pc);
}
frozen->cache_end_pc = freeze_info->cache_cur_pc;
LOG(THREAD, LOG_CACHE, 2, "frozen code stats for %s:\n %6d app code\n",
info->module, freeze_info->app_code_size);
LOG(THREAD, LOG_CACHE, 2, " %6d fallthrough\n", freeze_info->added_fallthrough);
LOG(THREAD, LOG_CACHE, 2, " %6d ind br mangle\n", freeze_info->added_indbr_mangle);
LOG(THREAD, LOG_CACHE, 2, " %6d indr br stubs\n", freeze_info->added_indbr_stub);
LOG(THREAD, LOG_CACHE, 2, " %6d jecxz mangle\n", freeze_info->added_jecxz_mangle);
LOG(THREAD, LOG_CACHE, 2, " -%6d = 5 x %d elisions\n",
freeze_info->num_elisions*5, freeze_info->num_elisions);
LOG(THREAD, LOG_CACHE, 2, "ctis: %5d cbr, %5d jmp, %5d call, %5d ind\n",
freeze_info->num_cbr, freeze_info->num_jmp, freeze_info->num_call,
freeze_info->num_indbr);
LOG(THREAD, LOG_CACHE, 2,
"frozen final size: stubs "SZFMT" bytes + cache "SZFMT" bytes\n",
freeze_info->stubs_cur_pc - freeze_info->stubs_start_pc,
freeze_info->cache_cur_pc - freeze_info->cache_start_pc);
/* FIXME case 9687: mark cache as read-only */
if (in_place) {
coarse_replace_unit(dcontext, info, frozen);
frozen = NULL;
mark_executable_area_coarse_frozen(info);
coarse_unit_shift_links(dcontext, info);
res = info;
} else {
/* we made separate copy that has no outgoing or incoming links */
res = frozen;
}
coarse_unit_freeze_exit:
HEAP_TYPE_FREE(dcontext, freeze_info, coarse_freeze_info_t,
ACCT_MEM_MGT/*appropriate?*/, PROTECTED);
mutex_unlock(&info->lock);
/* be sure to free to avoid missing entries if we add to info later */
fragment_coarse_free_entry_pclookup_table(dcontext, info);
DOLOG(3, LOG_CACHE, {
if (res != NULL) {
byte *pc = frozen->cache_start_pc;
LOG(THREAD, LOG_CACHE, 1, "frozen cache for %s:\n", info->module);
do {
app_pc tag = fragment_coarse_entry_pclookup(dcontext, frozen, pc);
if (tag != NULL)
LOG(THREAD, LOG_CACHE, 1, "tag "PFX":\n", tag);
pc = disassemble_with_bytes(dcontext, pc, THREAD);
} while (pc < frozen->cache_end_pc);
}
});
return res;
}
/* These decode-and-instr-using routines could go in arch/ as they assume that direct
* jump operands are 4 bytes and are at the end of the instruction.
*/
/* Transfers a coarse stub to a new location.
* If freeze_info->dst_info is non-NULL,
* shifts any unlinked stubs to point at the prefixes in freeze_info->dst_info.
* If freeze_info->unlink is true,
* points any linked stubs at freeze_info->dst_info->fcache_return_prefix if
* freeze_info->dst_info is non-NULL, else
* freeze_info->src_info->fcache_return_prefix. If trace_head, points at
* trace_head_return_prefix instead of fcache_return_prefix.
* replace_outgoing really only applies if in_place: should we replace
* outgoing link incoming entries (true), or add new ones (false)
*/
void
transfer_coarse_stub(dcontext_t *dcontext, coarse_freeze_info_t *freeze_info,
cache_pc stub, bool trace_head, bool replace_outgoing)
{
cache_pc tgt = entrance_stub_jmp_target(stub);
cache_pc pc = freeze_info->stubs_cur_pc; /* target pc */
uint sz;
bool update_out = false;
/* Should not be targeting the cache, else our later shift will be wrong */
ASSERT(tgt < freeze_info->src_info->cache_start_pc ||
tgt >= freeze_info->src_info->cache_end_pc);
if (tgt == freeze_info->src_info->fcache_return_prefix) {
ASSERT(!trace_head);
if (freeze_info->dst_info != NULL)
tgt = freeze_info->dst_info->fcache_return_prefix;
LOG(THREAD, LOG_FRAGMENT, 4,
" transfer_coarse_stub "PFX": tgt is fcache_return_prefix\n", stub);
} else if (tgt == freeze_info->src_info->trace_head_return_prefix) {
ASSERT(trace_head);
if (freeze_info->dst_info != NULL)
tgt = freeze_info->dst_info->trace_head_return_prefix;
LOG(THREAD, LOG_FRAGMENT, 4,
" transfer_coarse_stub "PFX": tgt is trace_head_return_prefix\n", stub);
} else if (freeze_info->unlink) {
coarse_info_t *info = (freeze_info->dst_info != NULL) ?
freeze_info->dst_info : freeze_info->src_info;
if (trace_head) {
tgt = info->trace_head_return_prefix;
LOG(THREAD, LOG_FRAGMENT, 4,
" transfer_coarse_stub "PFX": unlinking as trace head\n", stub);
} else {
tgt = info->fcache_return_prefix;
LOG(THREAD, LOG_FRAGMENT, 4,
" transfer_coarse_stub "PFX": unlinking as non-trace head\n", stub);
}
} else
update_out = true;
sz = exit_stub_size(dcontext, tgt, FRAG_COARSE_GRAIN)
- (JMP_LONG_LENGTH - 1/*get opcode*/);
memcpy(pc, stub, sz);
pc += sz;
ASSERT(pc == entrance_stub_jmp(freeze_info->stubs_cur_pc) + 1/*skip opcode*/);
#ifdef X86
ASSERT(*(pc-1) == JMP_OPCODE);
#elif defined(ARM)
/* FIXME i#1551: NYI on ARM */
ASSERT_NOT_IMPLEMENTED(false);
#endif
/* if tgt unchanged we still need to re-relativize it */
ASSERT(dynamo_all_threads_synched); /* thus NOT_HOT_PATCHABLE */
pc = insert_relative_target(pc, tgt, NOT_HOT_PATCHABLE);
if (update_out) {
coarse_update_outgoing(dcontext, stub, freeze_info->stubs_cur_pc,
freeze_info->src_info, replace_outgoing);
}
pc = (cache_pc) ALIGN_FORWARD(pc, coarse_stub_alignment(freeze_info->src_info));
freeze_info->stubs_cur_pc = pc;
}
void
transfer_coarse_stub_fix_trace_head(dcontext_t *dcontext,
coarse_freeze_info_t *freeze_info,
cache_pc stub)
{
/* We don't know body pc at fragment exit processing time and so can
* add a stub and unlink it as a non-trace head if it was linked to
* a trace, so we fix it up later
*/
coarse_info_t *info = (freeze_info->dst_info != NULL) ?
freeze_info->dst_info : freeze_info->src_info;
ASSERT(freeze_info->unlink);
if (entrance_stub_jmp_target(stub) == info->fcache_return_prefix) {
cache_pc tgt = info->trace_head_return_prefix;
ASSERT(dynamo_all_threads_synched); /* thus NOT_HOT_PATCHABLE */
insert_relative_target(entrance_stub_jmp(stub) + 1/*skip opcode*/,
tgt, NOT_HOT_PATCHABLE);
LOG(THREAD, LOG_FRAGMENT, 4,
" fixing up stub "PFX" to be unlinked as a trace head\n", stub);
} else
ASSERT(entrance_stub_jmp_target(stub) == info->trace_head_return_prefix);
}
static void
push_pending_freeze(dcontext_t *dcontext, coarse_freeze_info_t *freeze_info,
cache_pc exit_tgt, uint cti_len, cache_pc cti_pc,
cache_pc body_start_pc)
{
pending_freeze_t *pending;
cache_pc stub_target;
uint sz;
pending = HEAP_TYPE_ALLOC(dcontext, pending_freeze_t,
ACCT_MEM_MGT/*appropriate?*/, UNPROTECTED);
ASSERT(coarse_is_entrance_stub(exit_tgt));
pending->tag = entrance_stub_target_tag(exit_tgt, freeze_info->src_info);
stub_target = entrance_stub_jmp_target(exit_tgt);
if (entrance_stub_linked(exit_tgt, freeze_info->src_info) &&
get_fcache_coarse_info(stub_target) == freeze_info->src_info) {
/* Intra-unit non-trace-head target: eliminate stub */
pending->entrance_stub = false;
pending->cur_pc = stub_target;
/* trace heads are discovered in the htable walk, never pushed here */
pending->trace_head = false;
} else {
/* Leave stub */
pending->entrance_stub = true;
pending->cur_pc = exit_tgt;
DOCHECK(1, {
cache_pc body;
/* A rank order violation (grabbing src htable read lock while
* holding frozen htable write lock) prevents us from calling the
* exported interface fragment_coarse_lookup_in_unit, so instead we
* have a hack where we call the lower-level routine that is
* exported only for us.
*/
coarse_body_from_htable_entry(dcontext, freeze_info->src_info,
pending->tag, exit_tgt, NULL, &body);
ASSERT(body == NULL ||
coarse_is_trace_head(exit_tgt) ||
fragment_lookup_trace(dcontext, pending->tag) != NULL);
});
/* We do not look up body pc to see if a trace head stub linked to
* a trace; instead we fix up the unlinked target (for freeze_info->unlink)
* when we proactively add the stub when processing the head body
*/
pending->trace_head = coarse_is_trace_head(exit_tgt);
/* If target is trace head in same unit, we could add to pending,
* but don't have body pc handy, so we let htable walk find it
*/
}
ASSERT(cti_len > 4);
sz = cti_len - 4;
pending->link_cti_opnd = freeze_info->cache_cur_pc + sz;
memcpy(freeze_info->cache_cur_pc, cti_pc, sz);
if (body_start_pc == cti_pc && !DYNAMO_OPTION(unsafe_freeze_elide_sole_ubr)) {
/* case 9677: unsafe to elide entire-bb-ubr */
pending->elide_ubr = false;
} else /* elide if possible */
pending->elide_ubr = true;
freeze_info->cache_cur_pc += cti_len;
pending->next = freeze_info->pending;
freeze_info->pending = pending;
}
static cache_pc
redirect_to_tgt_ibl_prefix(dcontext_t *dcontext, coarse_freeze_info_t *freeze_info,
cache_pc tgt)
{
ASSERT(freeze_info != NULL && freeze_info->src_info != NULL &&
freeze_info->dst_info != NULL);
if (tgt == freeze_info->src_info->ibl_ret_prefix)
return freeze_info->dst_info->ibl_ret_prefix;
else if (tgt == freeze_info->src_info->ibl_call_prefix)
return freeze_info->dst_info->ibl_call_prefix;
else if (tgt == freeze_info->src_info->ibl_jmp_prefix)
return freeze_info->dst_info->ibl_jmp_prefix;
else
ASSERT_NOT_REACHED();
return tgt; /* best chance of continuing on */
}
/* Transfers a coarse fragment to a new location. Queues up all of its
* exit targets for transfer as well, scheduling ubr last to enable ubr elision.
*/
void
transfer_coarse_fragment(dcontext_t *dcontext, coarse_freeze_info_t *freeze_info,
cache_pc body)
{
/* FIXME: for maximum code re-use, use decode_fragment instead of trying
* to be efficient?
* FIXME case 9428: 8-bit conversion
*/
cache_pc pc = body, next_pc = pc; /* source pcs */
app_pc tgt;
size_t sz;
bool intra_fragment = false;
instr_t *instr;
instr = instr_create(dcontext);
do {
instr_reset(dcontext, instr);
pc = next_pc;
ASSERT(pc - body <= MAX_FRAGMENT_SIZE);
next_pc = decode_cti(dcontext, pc, instr);
/* Case 8711: we can't distinguish exit ctis from others,
* so we must assume that any cti is an exit cti, although
* we do now support intra-fragment ctis (i#665).
* Assumption: coarse-grain bbs have 1 ind exit or 2 direct,
* and no code beyond the last exit!
*/
intra_fragment = false;
if (instr_opcode_valid(instr) && instr_is_cti(instr)) {
if (instr_is_cti_short_rewrite(instr, pc)) {
/* Pull in the two short jmps for a "short-rewrite" instr.
* We must do this before asking whether it's an
* intra-fragment so we don't just look at the
* first part of the sequence.
*/
next_pc = remangle_short_rewrite(dcontext, instr, pc,
0/*same target*/);
}
if (coarse_cti_is_intra_fragment(dcontext, freeze_info->src_info,
instr, body))
intra_fragment = true;
}
} while (!instr_opcode_valid(instr) || !instr_is_cti(instr) || intra_fragment);
/* copy body of fragment, up to start of cti */
sz = pc - body;
memcpy(freeze_info->cache_cur_pc, body, sz);
freeze_info->cache_cur_pc += sz;
DODEBUG({ freeze_info->app_code_size += sz; });
/* Ensure we get proper target for short cti sequence */
if (instr_is_cti_short_rewrite(instr, pc)) {
/* We already remangled if a short-rewrite */
DODEBUG({
/* We mangled 2-byte jecxz/loop* into 9-byte sequence */
freeze_info->app_code_size -= 7;
freeze_info->added_jecxz_mangle += 7;
});
}
tgt = opnd_get_pc(instr_get_target(instr));
if (tgt == next_pc) {
ASSERT(instr_is_ubr(instr));
/* indirect exit stub */
ASSERT(coarse_is_indirect_stub(tgt));
/* elide the jmp to the stub */
pc += JMP_LONG_LENGTH /*ubr to stub*/;
sz = coarse_indirect_stub_size(freeze_info->src_info) - 4;
memcpy(freeze_info->cache_cur_pc, pc, sz);
freeze_info->cache_cur_pc += sz;
pc += sz;
tgt = PC_RELATIVE_TARGET(pc);
DODEBUG({
freeze_info->num_indbr++;
freeze_info->app_code_size -= 6; /* save ecx */
freeze_info->added_indbr_mangle += 6 /* save ecx */;
if (tgt == freeze_info->src_info->ibl_ret_prefix) {
/* ret imm goes from 3 bytes to 1+4=5 bytes
L3 c2 18 00 ret $0x0018 %esp (%esp) -> %esp
=>
L4 67 64 89 0e e8 0e addr16 mov %ecx -> %fs:0xee8
L4 59 pop %esp (%esp) -> %ecx %esp
L4 8d 64 24 18 lea 0x18(%esp) -> %esp
*/
/* guaranteed to be able to read 5 bytes back */
if (*(pc - 4) == 0x8d && *(pc - 3) == 0x6d && *(pc - 2) == 0x24) {
freeze_info->app_code_size -= 2;
freeze_info->added_indbr_mangle += 2;
}
} else if (tgt == freeze_info->src_info->ibl_call_prefix) {
/* change from call* to mov is no size diff */
freeze_info->added_indbr_mangle += 5 /* push immed */;
} else {
/* jmp*: change to mov is no size difference */
}
freeze_info->added_indbr_stub +=
coarse_indirect_stub_size(freeze_info->src_info) ;
});
tgt = redirect_to_tgt_ibl_prefix(dcontext, freeze_info, tgt);
ASSERT(dynamo_all_threads_synched); /* thus NOT_HOT_PATCHABLE */
freeze_info->cache_cur_pc =
insert_relative_target(freeze_info->cache_cur_pc, tgt, NOT_HOT_PATCHABLE);
} else {
/* FIXME: if we had profile info we could reverse the branch and
* make our cache trace-like
*/
DEBUG_DECLARE(bool is_cbr = false;)
if (instr_is_cbr(instr)) {
uint cbr_len;
/* push cbr target on todo-stack */
if (instr_is_cti_short_rewrite(instr, pc))
cbr_len = CBR_SHORT_REWRITE_LENGTH;
else
cbr_len = CBR_LONG_LENGTH;
push_pending_freeze(dcontext, freeze_info, tgt, cbr_len, pc, body);
ASSERT(pc + cbr_len == next_pc);
/* process ubr next */
instr_reset(dcontext, instr);
pc = next_pc;
next_pc = decode_cti(dcontext, pc, instr);
ASSERT(instr_opcode_valid(instr) && instr_is_ubr(instr));
tgt = opnd_get_pc(instr_get_target(instr));
DODEBUG({
freeze_info->num_cbr++;
/* FIXME: assumes 32-bit cbr! */
freeze_info->app_code_size += cbr_len;
freeze_info->added_fallthrough += 5;
is_cbr = true;
});
}
ASSERT(instr_is_ubr(instr));
/* push ubr last, so we can elide the jmp if we process it next */
push_pending_freeze(dcontext, freeze_info, tgt, JMP_LONG_LENGTH, pc, body);
ASSERT(pc + JMP_LONG_LENGTH == next_pc);
DODEBUG({
if (!is_cbr) {
if (pc >= body + 5 && *(pc - 5) == 0x68) {
/* FIXME: could be an app push immed followed by app jmp */
/* call => push immed: same size, but adding jmp */
freeze_info->num_call++;
freeze_info->added_fallthrough += 5; /* jmp */
} else {
/* FIXME: assumes 32-bit jmp! */
freeze_info->app_code_size += 5;
freeze_info->num_jmp++;
}
}
});
}
instr_destroy(dcontext, instr);
}
/* This routine walks info's cache and updates extra-cache jmp targets by cache_shift
* and jmps to stubs by stubs_shift.
* If !is_cache, assumes these are stubs and decodes and acts appropriately.
*/
static void
coarse_unit_shift_jmps_internal(dcontext_t *dcontext, coarse_info_t *info,
ssize_t cache_shift, ssize_t stubs_shift,
size_t old_mapsz,
cache_pc start, cache_pc end,
cache_pc bounds_start, cache_pc bounds_end,
bool is_cache)
{
/* We must patch up indirect and direct stub jmps to prefixes */
cache_pc pc = start;
cache_pc next_pc = pc;
app_pc tgt;
instr_t *instr;
ASSERT(dynamo_all_threads_synched); /* thus NOT_HOT_PATCHABLE */
ASSERT(info->frozen);
instr = instr_create(dcontext);
while (next_pc < end) {
instr_reset(dcontext, instr);
pc = next_pc;
next_pc = decode_cti(dcontext, pc, instr);
/* Case 8711: we can't distinguish exit ctis from others.
* Note that we don't need to distinguish intra-fragment ctis here
* b/c we want to shift them by the same amount (xref i#665).
*/
if (instr_opcode_valid(instr) && instr_is_cti(instr)) {
if (instr_is_cti_short_rewrite(instr, pc))
next_pc = remangle_short_rewrite(dcontext, instr, pc, 0/*same target*/);
tgt = opnd_get_pc(instr_get_target(instr));
if (tgt < bounds_start || tgt >= bounds_end) {
ssize_t shift;
if (is_cache) {
/* break down into whether targeting stubs or not
* ok to use new prefix start, which is where old padding was
*/
if (tgt >= info->fcache_return_prefix &&
tgt < info->cache_start_pc + old_mapsz)
shift = stubs_shift;
else
shift = cache_shift;
} else {
/* Shifting jmps from stubs
* We started with [cache | padding | stubs | padding]
* We then allocate new memory and copy there [cache | stubs]
* Thus, the stubs have a double shift: once for padding bet
* cache and stubs, and once for shift of whole alloc.
* This doesn't work if stubs target cache, but we assert on
* that in transfer_coarse_stub().
*/
shift = cache_shift - stubs_shift;
}
LOG(THREAD, LOG_FRAGMENT, 4,
"\tshifting jmp @"PFX" "PFX" from "PFX" to "PFX"\n",
pc, next_pc - 4, tgt, tgt + shift);
insert_relative_target(next_pc - 4, tgt + shift, NOT_HOT_PATCHABLE);
if (!is_cache) {
/* we must update incoming after fixing target, since old_stub is
* inconsistent and we need a complete stub to dereference
*/
cache_pc old_stub, new_stub;
/* double-check post-shift: no prefix targets */
ASSERT(tgt + shift < bounds_start || tgt + shift >= bounds_end);
new_stub = (cache_pc) ALIGN_BACKWARD(pc, coarse_stub_alignment(info));
old_stub = new_stub + shift;
/* we can't assert that old_stub or new_stub are entrance_stubs
* since targets are currently inconsistent wrt info
*/
/* must update incoming stub for target */
coarse_update_outgoing(dcontext, old_stub, new_stub,
info, true/*replace*/);
}
}
if (!is_cache) {
/* for stubs, skip the padding (which we'll decode as garbage */
ASSERT(next_pc + IF_X64_ELSE(3, 1) ==
(cache_pc) ALIGN_FORWARD(next_pc, coarse_stub_alignment(info)));
next_pc = (cache_pc) ALIGN_FORWARD(next_pc, coarse_stub_alignment(info));
}
}
}
instr_destroy(dcontext, instr);
}
/* This routine walks info's cache and updates extra-cache jmp targets by cache_shift
* and jmps to stubs by stubs_shift.
* It also walks info's stubs and updates targets that are not prefixes:
* in other coarse units or in fine-grained fragment caches.
*/
static void
coarse_unit_shift_jmps(dcontext_t *dcontext, coarse_info_t *info,
ssize_t cache_shift, ssize_t stubs_shift, size_t old_mapsz)
{
LOG(THREAD, LOG_FRAGMENT, 4,
"shifting jmps for cache "PFX"-"PFX"\n",
info->cache_start_pc, info->cache_end_pc);
coarse_unit_shift_jmps_internal(dcontext, info,
cache_shift, stubs_shift, old_mapsz,
info->cache_start_pc, info->cache_end_pc,
info->cache_start_pc, info->cache_end_pc,
true/*cache*/);
LOG(THREAD, LOG_FRAGMENT, 4,
"shifting jmps for stubs "PFX"-"PFX"\n",
info->stubs_start_pc, info->stubs_end_pc);
coarse_unit_shift_jmps_internal(dcontext, info,
cache_shift, stubs_shift, old_mapsz,
info->stubs_start_pc, info->stubs_end_pc,
/* do not re-relativize prefix targets */
info->fcache_return_prefix, info->stubs_end_pc,
false/*stubs*/);
}
/***************************************************************************
* MERGING FROZEN UNITS
*/
/* Processes a stub in the original old source unit at stub whose
* targeting cti has been copied into the new being-built merged unit
* at dst_cache_pc and has length cti_len. Passing dst_cache_pc==NULL
* causes no cti patching to occur.
*/
static void
coarse_merge_process_stub(dcontext_t *dcontext, coarse_freeze_info_t *freeze_info,
cache_pc old_stub, uint cti_len, cache_pc dst_cache_pc,
bool replace_outgoing)
{
app_pc old_stub_tgt;
cache_pc dst_body, dst_stub, patch_pc, src_body;
bool trace_head;
ASSERT(coarse_is_entrance_stub(old_stub));
ASSERT(dynamo_all_threads_synched); /* thus NOT_HOT_PATCHABLE */
ASSERT((dst_cache_pc == NULL && cti_len == 0) || cti_len > 4);
patch_pc = dst_cache_pc + cti_len - 4;
old_stub_tgt = entrance_stub_target_tag(old_stub, freeze_info->src_info);
fragment_coarse_lookup_in_unit(dcontext, freeze_info->dst_info,
old_stub_tgt, &dst_stub, &dst_body);
/* We need to know for sure whether a trace head as we're not doing
* a pass through the htable like we do for regular freezing
*/
fragment_coarse_lookup_in_unit(dcontext, freeze_info->src_info, old_stub_tgt,
NULL, &src_body);
/* Consider both sources for headness */
trace_head =
coarse_is_trace_head_in_own_unit(dcontext, old_stub_tgt, old_stub,
src_body, true, freeze_info->src_info) ||
(dst_stub != NULL &&
coarse_is_trace_head_in_own_unit(dcontext, old_stub_tgt, dst_stub,
dst_body, true, freeze_info->dst_info));
/* Should only be adding w/ no source cti if a trace head or for the stub
* walk for the larger unit where we have a dup stub and aren't replacing */
ASSERT(dst_cache_pc != NULL || trace_head ||
(dst_body == NULL && dst_stub != NULL && !replace_outgoing));
if (dst_body != NULL && !trace_head) {
/* Directly link and do not copy the stub */
LOG(THREAD, LOG_FRAGMENT, 4,
"\ttarget "PFX" is in other cache @"PFX": directly linking\n",
old_stub_tgt, dst_body);
ASSERT(dst_stub == NULL);
ASSERT(dst_body >= freeze_info->dst_info->cache_start_pc &&
dst_body < freeze_info->dst_info->cache_end_pc);
if (dst_cache_pc != NULL)
insert_relative_target(patch_pc, dst_body, NOT_HOT_PATCHABLE);
if (!freeze_info->unlink && entrance_stub_linked(old_stub,
freeze_info->src_info)) {
/* ASSUMPTION: unlink == !in_place
* If in-place, we must update target incoming info, whether source is
* primary (being replaced) or secondary (probably being deleted since
* now in merge result, but we don't want to crash while unlinking it
* (case 10382)) source.
*/
coarse_remove_outgoing(dcontext, old_stub, freeze_info->src_info);
}
} else if (dst_stub != NULL) {
LOG(THREAD, LOG_FRAGMENT, 4,
"\ttarget "PFX" is already in stubs @"PFX"\n", old_stub_tgt, dst_stub);
ASSERT(dst_body == NULL || trace_head);
/* Stub already exists: point to it */
if (dst_cache_pc != NULL)
insert_relative_target(patch_pc, dst_stub, NOT_HOT_PATCHABLE);
/* Must remove incoming if one mergee had a cross link to the other */
if ((dst_body != NULL ||
/* If secondary merger was smaller and had a stub for the same target,
* we need to remove our outgoing since secondary added a new one.
* We want to do this only the 1st time we get here, and not if the
* primary merger added the stub, so we have the primary unlink
* the old stub (in else code below).
*/
replace_outgoing) &&
entrance_stub_linked(old_stub, freeze_info->src_info)) {
coarse_remove_outgoing(dcontext, old_stub, freeze_info->src_info);
}
} else {
/* Copy stub */
cache_pc stub_pc = freeze_info->stubs_cur_pc;
ASSERT(dst_body == NULL || trace_head);
LOG(THREAD, LOG_FRAGMENT, 4,
"\ttarget "PFX" is %s, adding stub @"PFX"\n",
old_stub_tgt, trace_head ? "trace head" : "not present", stub_pc);
transfer_coarse_stub(dcontext, freeze_info, old_stub, trace_head,
replace_outgoing);
if (replace_outgoing) {
/* Signal to later stubs that they don't need to remove the outgoing
* entry (as opposed to new stubs added by the secondary merger,
* for which we do need to remove).
* Assumption: if replace_outgoing then it's ok to unlink the old stub
* since it's going away anyway.
*/
unlink_entrance_stub(dcontext, old_stub,
trace_head ? FRAG_IS_TRACE_HEAD : 0,
freeze_info->src_info);
}
ASSERT(freeze_info->stubs_cur_pc == stub_pc +
coarse_stub_alignment(freeze_info->src_info));
fragment_coarse_th_add(dcontext, freeze_info->dst_info, old_stub_tgt,
stub_pc - (ptr_uint_t)
freeze_info->dst_info->stubs_start_pc);
if (dst_cache_pc != NULL)
insert_relative_target(patch_pc, stub_pc, NOT_HOT_PATCHABLE);
}
}
/* Assumption: cache has already been copied from src to dst.
* This routine walks the copied cache to find inter-unit links; it
* directly links them, eliminating their entrance stubs.
* replace_outgoing really only applies if in_place: should we replace
* outgoing link incoming entries (true), or add new ones (false)
*/
static void
coarse_merge_update_jmps(dcontext_t *dcontext, coarse_freeze_info_t *freeze_info,
bool replace_outgoing)
{
/* Plan: cache has already been copied from src to dst, but we need to do
* inter-unit links. So we decode from the original cache to find the
* target stubs: if a target is present in dst, we do not copy the stub and
* we directly link; if not present, we copy the stub and re-relativize the
* jmp to the stub. We must also patch up indirect and direct stub jmps
* to prefixes.
*/
cache_pc pc = freeze_info->src_info->cache_start_pc;
cache_pc next_pc = pc;
cache_pc stop_pc = freeze_info->src_info->cache_end_pc;
app_pc tgt;
uint sz;
/* FIXME: share code w/ decode_fragment() and transfer_coarse_fragment() */
instr_t *instr;
/* Since mucking with caches, though if thread-private not necessary */
ASSERT(dynamo_all_threads_synched);
ASSERT(freeze_info->src_info->frozen);
LOG(THREAD, LOG_FRAGMENT, 4,
"coarse_merge_update_jmps %s "PFX" => "PFX"\n",
freeze_info->src_info->module, pc, freeze_info->cache_start_pc);
instr = instr_create(dcontext);
while (next_pc < stop_pc) {
instr_reset(dcontext, instr);
pc = next_pc;
next_pc = decode_cti(dcontext, pc, instr);
/* Case 8711: we can't distinguish exit ctis from others,
* so we must assume that any cti is an exit cti.
*/
/* We don't care about fragment boundaries so we can ignore elision.
* We only care about jmps to stubs.
*/
if (instr_opcode_valid(instr) && instr_is_cti(instr)) {
/* Ensure we get proper target for short cti sequence */
if (instr_is_cti_short_rewrite(instr, pc))
next_pc = remangle_short_rewrite(dcontext, instr, pc, 0/*same target*/);
tgt = opnd_get_pc(instr_get_target(instr));
if (in_coarse_stub_prefixes(tgt)) {
/* We should not encounter prefix targets other than indirect while
* in the body of the cache (rest are from the stubs) */
ASSERT(coarse_is_indirect_stub
(next_pc - coarse_indirect_stub_size(freeze_info->src_info)));
/* indirect exit stub: need to update jmp to prefix */
ASSERT(instr_is_ubr(instr));
sz = JMP_LONG_LENGTH /*ubr to stub*/ - 4;
pc += sz;
tgt = PC_RELATIVE_TARGET(pc);
tgt = redirect_to_tgt_ibl_prefix(dcontext, freeze_info, tgt);
ASSERT(dynamo_all_threads_synched); /* thus NOT_HOT_PATCHABLE */
insert_relative_target(freeze_info->cache_start_pc +
(pc - freeze_info->src_info->cache_start_pc),
tgt, NOT_HOT_PATCHABLE);
next_pc = pc + 4;
} else if (tgt < freeze_info->src_info->cache_start_pc || tgt >= stop_pc) {
/* Must go through a stub */
cache_pc dst_cache_pc = freeze_info->cache_start_pc +
(pc - freeze_info->src_info->cache_start_pc);
ASSERT(tgt >= freeze_info->src_info->stubs_start_pc &&
tgt < freeze_info->src_info->stubs_end_pc);
if (instr_is_cbr(instr)) {
uint cbr_len;
if (instr_is_cti_short_rewrite(instr, pc))
cbr_len = CBR_SHORT_REWRITE_LENGTH;
else
cbr_len = CBR_LONG_LENGTH;
ASSERT(pc + cbr_len == next_pc);
coarse_merge_process_stub(dcontext, freeze_info, tgt, cbr_len,
dst_cache_pc, replace_outgoing);
/* If there is a ubr next (could be elided) we just hit
* it next time around loop */
} else {
ASSERT(instr_is_ubr(instr));
ASSERT(pc + JMP_LONG_LENGTH == next_pc);
coarse_merge_process_stub(dcontext, freeze_info, tgt,
JMP_LONG_LENGTH, dst_cache_pc,
replace_outgoing);
}
} else {
/* intra-cache target */
/* I would assert that a pclookup finds an entry but that hits
* a recursive lock on non-recursive freeze_info->src_info->lock */
}
}
}
instr_destroy(dcontext, instr);
/* Do the loop even w/o traces in debug for the assert */
if (!DYNAMO_OPTION(disable_traces) IF_DEBUG(|| true)) {
/* We can have trace heads with no intra-unit targeters (secondary trace
* heads!) so we must also walk the stubs. Rather than require an
* iterator or helper routine in fragment or link we directly
* walk here. */
for (pc = freeze_info->src_info->stubs_start_pc;
pc < freeze_info->src_info->stubs_end_pc;
pc += coarse_stub_alignment(freeze_info->src_info)) {
if (in_coarse_stub_prefixes(pc))
continue;
ASSERT(coarse_is_entrance_stub(pc));
if (entrance_stub_linked(pc, freeze_info->src_info)) {
cache_pc src_body;
/* for non-in-place merging we don't unlink stubs targeting
* the other mergee, so we must rule that out here.
* the only internally-untargeted stubs we need to add are
* those for our own bodies. */
fragment_coarse_lookup_in_unit
(dcontext, freeze_info->src_info,
entrance_stub_target_tag(pc, freeze_info->src_info),
NULL, &src_body);
if (src_body != NULL) {
ASSERT(!DYNAMO_OPTION(disable_traces));
coarse_merge_process_stub(dcontext, freeze_info, pc,
0, NULL, replace_outgoing);
}
}
}
}
}
/* Assumption: cache to be merged with has already been copied to dst.
* This routine walks the other src and copies over non-dup fragments,
* directly linking inter-unit links along the way.
* replace_outgoing really only applies if in_place: should we replace
* outgoing link incoming entries (true), or add new ones (false)
*/
static void
coarse_merge_without_dups(dcontext_t *dcontext, coarse_freeze_info_t *freeze_info,
ssize_t cache_offs, bool replace_outgoing)
{
/* Plan: we need to append the non-dup portions of src to the already-copied
* other source, as well as fixing up inter-unit links: if a target is present in
* dst, we do not copy the stub and we directly link; if not present, we copy the
* stub and re-relativize the jmp to the stub. We must also patch up indirect
* and direct stub jmps to prefixes.
* Though we need to know fragment boundaries, note that walking the htable
* instead of the cache doesn't buy us much: due to elision we still have to do
* pclookups, so we go ahead and walk the cache as it is already laid out.
*/
cache_pc pc = freeze_info->src_info->cache_start_pc;
cache_pc src_body, next_pc = pc, fallthrough_body = NULL;
cache_pc dst_body = NULL, last_dst_body;
cache_pc stop_pc = freeze_info->src_info->cache_end_pc;
app_pc tag, fallthrough_tag = NULL, tgt = NULL;
/* FIXME: share code w/ decode_fragment() and transfer_coarse_fragment() */
instr_t *instr;
/* stored targets for fixup */
jmp_tgt_list_t *jmp_list = NULL;
bool intra_fragment = false;
/* Since mucking with caches, though if thread-private not necessary */
ASSERT(dynamo_all_threads_synched);
ASSERT(freeze_info->src_info->frozen);
LOG(THREAD, LOG_FRAGMENT, 4,
"coarse_merge_without_dups %s "PFX" => "PFX"\n",
freeze_info->src_info->module, pc, freeze_info->cache_cur_pc);
instr = instr_create(dcontext);
while (next_pc < stop_pc) {
last_dst_body = dst_body;
if (fallthrough_tag != NULL) {
/* still at dup fallthrough pc */
ASSERT(fragment_coarse_entry_pclookup(dcontext, freeze_info->src_info,
next_pc) == fallthrough_tag);
tag = fallthrough_tag;
ASSERT(fallthrough_body != NULL);
dst_body = fallthrough_body;
/* do not go again through the fallthrough code below */
instr_reset(dcontext, instr);
} else {
tag = fragment_coarse_entry_pclookup(dcontext, freeze_info->src_info,
next_pc);
}
/* We come back through the loop for fallthrough jmp of cbr */
ASSERT(tag != NULL || (instr_opcode_valid(instr) && instr_is_cbr(instr)));
if (tag != NULL && tag != fallthrough_tag) {
LOG(THREAD, LOG_FRAGMENT, 4,
"\tfragment entry point "PFX" = tag "PFX, next_pc, tag);
fragment_coarse_lookup_in_unit(dcontext, freeze_info->dst_info,
tag, NULL, &dst_body);
if (dst_body == NULL) {
cache_pc src_stub = NULL;
fragment_coarse_add(dcontext, freeze_info->dst_info,
tag, freeze_info->cache_cur_pc -
(ptr_uint_t)freeze_info->cache_start_pc +
cache_offs);
LOG(THREAD, LOG_FRAGMENT, 4, " (new => "PFX")\n",
freeze_info->cache_cur_pc);
/* this may be a trace head, in which case we need to add its stub
* now in case there are no intra-unit targeters of it (which
* means it is probably a secondary trace head) */
fragment_coarse_lookup_in_unit(dcontext, freeze_info->src_info,
tag, &src_stub, NULL);
if (src_stub != NULL) {
ASSERT(!DYNAMO_OPTION(disable_traces));
coarse_merge_process_stub(dcontext, freeze_info, src_stub,
0, NULL, replace_outgoing);
}
} else { /* dup */
LOG(THREAD, LOG_FRAGMENT, 4, " (duplicate)\n");
/* if prev is cbr, this is a fall-through, which is handled below */
}
} /* else carry through dst_body from last iter */
src_body = next_pc;
fallthrough_tag = NULL;
fallthrough_body = NULL;
do {
ASSERT(next_pc < stop_pc);
if (next_pc >= stop_pc)
return; /* paranoid: avoid infinite loop */
pc = next_pc;
if (!intra_fragment &&
(next_pc != src_body ||
/* fall-through of cbr will be looked up pre-1st iter above */
(instr_opcode_valid(instr) && instr_is_cbr(instr)))) {
/* We assume at least one instr in each fragment, to avoid ambiguity */
ASSERT_NOT_IMPLEMENTED(!DYNAMO_OPTION(unsafe_freeze_elide_sole_ubr));
if (next_pc == src_body) {
fallthrough_tag = tag;
fallthrough_body = dst_body;
} else {
fallthrough_tag =
fragment_coarse_entry_pclookup(dcontext, freeze_info->src_info,
next_pc);
if (fallthrough_tag != NULL) {
fragment_coarse_lookup_in_unit(dcontext, freeze_info->dst_info,
fallthrough_tag, NULL,
&fallthrough_body);
}
}
if (fallthrough_tag != NULL) {
/* We'd rather keep fall-through elision if we can */
LOG(THREAD, LOG_FRAGMENT, 4,
"\tfall-through tag "PFX" @"PFX, fallthrough_tag, next_pc);
if (fallthrough_body == NULL) {
/* Just keep going and process the fall-through's cti */
LOG(THREAD, LOG_FRAGMENT, 4, " (new => "PFX")\n",
freeze_info->cache_cur_pc + (next_pc - src_body));
if (dst_body != NULL) { /* prev is a dup */
ASSERT_NOT_TESTED();
src_body = next_pc;
tag = fallthrough_tag;
}
if (fallthrough_tag != tag) {
fragment_coarse_add(dcontext, freeze_info->dst_info,
fallthrough_tag,
freeze_info->cache_cur_pc +
(next_pc - src_body) -
(ptr_uint_t)freeze_info->cache_start_pc +
cache_offs);
DOCHECK(1, {
/* We should NOT need to add a stub like we might
* for the entry point add above: fall-through
* cannot be trace head! */
cache_pc src_stub = NULL;
fragment_coarse_lookup_in_unit(dcontext,
freeze_info->src_info,
fallthrough_tag,
&src_stub, NULL);
ASSERT(src_stub == NULL);
});
}
fallthrough_tag = NULL;
} else {
LOG(THREAD, LOG_FRAGMENT, 4, " (duplicate)\n");
break;
}
}
}
instr_reset(dcontext, instr);
next_pc = decode_cti(dcontext, pc, instr);
ASSERT(next_pc - src_body <= MAX_FRAGMENT_SIZE);
/* Case 8711: we can't distinguish exit ctis from others,
* so we must assume that any cti is an exit cti.
* Assumption: coarse-grain bbs have 1 ind exit or 2 direct,
* and no code beyond the last exit!
*/
intra_fragment = false;
if (instr_opcode_valid(instr) && instr_is_cti(instr)) {
if (instr_is_cti_short_rewrite(instr, pc)) {
/* Pull in the two short jmps for a "short-rewrite" instr.
* We must do this before asking whether it's an
* intra-fragment so we don't just look at the
* first part of the sequence.
*/
next_pc = remangle_short_rewrite(dcontext, instr, pc,
0/*same target*/);
}
if (coarse_cti_is_intra_fragment(dcontext, freeze_info->src_info,
instr, src_body))
intra_fragment = true;
}
} while (!instr_opcode_valid(instr) || !instr_is_cti(instr) || intra_fragment);
if (dst_body == NULL) { /* not a dup */
/* copy body of fragment, including cti (if not ending @ fall-through) */
size_t sz = next_pc - src_body;
memcpy(freeze_info->cache_cur_pc, src_body, sz);
freeze_info->cache_cur_pc += sz;
}
if (fallthrough_tag != NULL) {
ASSERT(next_pc == pc); /* should have short-circuited */
/* add intra-cache jmp if elided but fall-through a dup */
ASSERT(fallthrough_body != NULL);
/* If start bb not a dup, or post-cbr, must un-elide */
if (dst_body == NULL || (next_pc == src_body && last_dst_body == NULL)) {
LOG(THREAD, LOG_FRAGMENT, 4,
"\tadding jmp @"PFX" to "PFX" for fall-through tag "PFX"\n",
freeze_info->cache_cur_pc, fallthrough_body, fallthrough_tag);
freeze_info->cache_cur_pc =
insert_relative_jump(freeze_info->cache_cur_pc, fallthrough_body,
NOT_HOT_PATCHABLE);
}
} else {
ASSERT(instr_opcode_valid(instr) && instr_is_cti(instr));
/* We already remangled if a short-rewrite so no extra work here */
tgt = opnd_get_pc(instr_get_target(instr));
if (in_coarse_stub_prefixes(tgt)) {
/* We should not encounter prefix targets other than indirect while
* in the body of the cache (rest are from the stubs) */
ASSERT(coarse_is_indirect_stub
(next_pc - coarse_indirect_stub_size(freeze_info->src_info)));
/* indirect exit stub: need to update jmp to prefix */
ASSERT(instr_is_ubr(instr));
if (dst_body == NULL) { /* not a dup */
tgt = PC_RELATIVE_TARGET(next_pc - 4);
tgt = redirect_to_tgt_ibl_prefix(dcontext, freeze_info, tgt);
ASSERT(dynamo_all_threads_synched); /* thus NOT_HOT_PATCHABLE */
/* we've already copied the stub as part of the body */
ASSERT(coarse_is_indirect_stub
(freeze_info->cache_cur_pc -
coarse_indirect_stub_size(freeze_info->src_info)));
freeze_info->cache_cur_pc -= 4;
freeze_info->cache_cur_pc =
insert_relative_target(freeze_info->cache_cur_pc,
tgt, NOT_HOT_PATCHABLE);
}
} else if (tgt < freeze_info->src_info->cache_start_pc || tgt >= stop_pc) {
if (dst_body == NULL) { /* not a dup */
/* currently goes through a stub */
ASSERT(tgt >= freeze_info->src_info->stubs_start_pc &&
tgt < freeze_info->src_info->stubs_end_pc);
if (instr_is_cbr(instr)) {
uint cbr_len;
if (instr_is_cti_short_rewrite(instr, pc))
cbr_len = CBR_SHORT_REWRITE_LENGTH;
else
cbr_len = CBR_LONG_LENGTH;
ASSERT(pc + cbr_len == next_pc);
coarse_merge_process_stub(dcontext, freeze_info, tgt, cbr_len,
freeze_info->cache_cur_pc - cbr_len,
replace_outgoing);
/* If there is a ubr next (could be elided) we just hit
* it next time around loop */
} else {
ASSERT(instr_is_ubr(instr));
ASSERT(pc + JMP_LONG_LENGTH == next_pc);
coarse_merge_process_stub(dcontext, freeze_info, tgt,
JMP_LONG_LENGTH,
freeze_info->cache_cur_pc -
JMP_LONG_LENGTH,
replace_outgoing);
}
}
} else if (dst_body == NULL) { /* not a dup */
/* Intra-cache target, but we're moving things around and have to do
* a separate pass since don't know future locations. Since the
* layout is changing and later we'd need multiple lookups to find
* the corrsepondence between src and dst, we store the target tag in
* the jmp and replace it w/ the body in the later pass.
* We can't fit a 64-bit target, so we use offs from mod base.
* XXX: split pcaches up if app module is over 4GB.
*/
jmp_tgt_list_t *entry;
app_pc tgt_tag =
fragment_coarse_entry_pclookup(dcontext, freeze_info->src_info, tgt);
ASSERT(tgt_tag != NULL);
LOG(THREAD, LOG_FRAGMENT, 4,
"\tintra-cache src "PFX"->"PFX" tag "PFX" dst pre-"PFX"\n",
pc, tgt, tgt_tag, freeze_info->cache_cur_pc);
entry = HEAP_TYPE_ALLOC(dcontext, jmp_tgt_list_t,
ACCT_VMAREAS, PROTECTED);
entry->tag = tgt_tag;
entry->jmp_end_pc = freeze_info->cache_cur_pc;
entry->next = jmp_list;
jmp_list = entry;
}
}
}
/* Second pass to update intra-cache targets.
* FIXME: combine w/ later coarse_unit_shift_jmps()
*/
while (jmp_list != NULL) {
jmp_tgt_list_t *next = jmp_list->next;
fragment_coarse_lookup_in_unit(dcontext, freeze_info->dst_info,
jmp_list->tag, NULL, &dst_body);
ASSERT(dst_body != NULL);
LOG(THREAD, LOG_FRAGMENT, 4,
"\tintra-cache dst -"PFX"->"PFX" tag "PFX"\n",
jmp_list->jmp_end_pc, dst_body, tgt); /* tgt always set here */
/* FIXME: make 4 a named constant; used elsewhere as well */
insert_relative_target(jmp_list->jmp_end_pc - 4, dst_body, NOT_HOT_PATCHABLE);
HEAP_TYPE_FREE(dcontext, jmp_list, jmp_tgt_list_t, ACCT_VMAREAS, PROTECTED);
jmp_list = next;
}
instr_destroy(dcontext, instr);
}
/* Returns a new coarse_info_t (or if in_place returns info1) that combines
* info1 and info2. In in_place, info1 is replaced with the result and returned;
* else, a separate coarse_info_t is created and returned.
* If either of the units is live, then info1 must be live.
* If one of the two units covers a different code range, it must be info2,
* and it must be a subset of info1's range.
* If returns NULL, the merge failed; if in_place, info1 is unchanged on failure.
* If in_place, caller is responsible for flushing the ibl tables (case 11057).
*/
coarse_info_t *
coarse_unit_merge(dcontext_t *dcontext, coarse_info_t *info1, coarse_info_t *info2,
bool in_place)
{
coarse_info_t *merged;
coarse_info_t *res = NULL;
coarse_info_t *src_sm, *src_lg;
size_t cache1_size, cachelg_size, cache2_size, merged_cache_size;
size_t stubs1_size, stubs2_size;
coarse_freeze_info_t freeze_info;
LOG(THREAD, LOG_CACHE, 2, "coarse_unit_merge %s %s with %s\n",
info1->module, info1->persisted ? "persisted" : "non-persisted",
info2->persisted ? "persisted" : "non-persisted");
STATS_INC(coarse_units_merged);
ASSERT(info1 != NULL && info2 != NULL);
ASSERT(info1->base_pc <= info2->base_pc && info1->end_pc >= info2->end_pc);
if (info1->base_pc > info2->base_pc || info1->end_pc < info2->end_pc)
return NULL;
/* Currently we only do online merging where one unit is live */
ASSERT(!info1->persisted || !info2->persisted);
/* Much more efficient to merge smaller cache into larger */
if (fragment_coarse_num_entries(info1) > fragment_coarse_num_entries(info2)) {
src_lg = info1;
src_sm = info2;
} else {
src_lg = info2;
src_sm = info1;
}
/* Ensure the pclookup table is set up for src_sm, to avoid recursive
* lock issues
*/
if (src_sm->pclookup_htable == NULL) { /* read needs no lock */
fragment_coarse_entry_pclookup(dcontext, src_sm, NULL);
ASSERT(src_sm->pclookup_htable != NULL);
}
acquire_recursive_lock(&change_linking_lock);
#ifdef HOT_PATCHING_INTERFACE
/* we may call coarse_unit_calculate_persist_info() */
if (DYNAMO_OPTION(hot_patching))
read_lock(hotp_get_lock());
#endif
/* We can't grab both locks due to deadlock potential. Currently we are
* always fully synched, so we rely on that to synch with info2.
*/
ASSERT(dynamo_all_threads_synched);
mutex_lock(&info1->lock);
ASSERT(info1->cache != NULL && info2->cache != NULL); /* don't merge empty units */
ASSERT(info1->frozen && info2->frozen);
/* Tasks:
* 1) Merge the caches, eliminating duplicates. Various optimizations to
* preserve fall-through pairs if 2nd part is in other cache but 1st is
* not could be employed.
* 2) Turn cross-links into direct links and eliminate those entrance stubs.
* 3) Copy the rest of the entrance stubs.
* 4) Merge the RCT, RAC, and hotp fields, which may require calculating
* persist info if this unit has not been persisted; if neither unit
* has been persisted we do not need to do anything here.
*
* We will remove the extra post-stubs space since when persisting
* we only write up through stubs_end_pc and not the allocation end.
*/
/* Whether merging in-place or not, we create a new coarse_info_t.
* If in-place we delete the old one afterward.
*/
merged = coarse_unit_create(info1->base_pc, info1->end_pc, &info1->module_md5,
in_place && info1->in_use);
merged->frozen = true;
cache1_size = info1->cache_end_pc - info1->cache_start_pc;
cache2_size = info2->cache_end_pc - info2->cache_start_pc;
/* We shrink the cache to size below, after merging & removing dups */
merged_cache_size = cache1_size + cache2_size;
/* We need the stubs to start on a new page since will be +rw vs cache +r */
merged_cache_size = ALIGN_FORWARD(merged_cache_size, PAGE_SIZE);
/* We only need one set of prefixes */
stubs1_size = info1->stubs_end_pc - info1->fcache_return_prefix;
stubs2_size = info2->stubs_end_pc - info2->stubs_start_pc;
merged->mmap_size = merged_cache_size + stubs1_size + stubs2_size;
/* Our relative jmps require that we do not exceed 32-bit reachability */
IF_X64(ASSERT(CHECK_TRUNCATE_TYPE_int(merged->mmap_size)));
merged->cache_start_pc = (cache_pc) heap_mmap(merged->mmap_size);
merged->cache_end_pc = merged->cache_start_pc + cache1_size + cache2_size;
merged->stubs_start_pc =
coarse_stubs_create(merged, merged->cache_start_pc + merged_cache_size,
stubs1_size + stubs2_size);
/* will be tightened up later */
merged->stubs_end_pc = merged->cache_start_pc + merged->mmap_size;
ASSERT(merged->stubs_start_pc != NULL);
ASSERT(ALIGNED(merged->stubs_start_pc, coarse_stub_alignment(info1)));
ASSERT(merged->fcache_return_prefix ==
merged->cache_start_pc + merged_cache_size);
ASSERT(merged->trace_head_return_prefix == merged->fcache_return_prefix +
(info1->trace_head_return_prefix - info1->fcache_return_prefix));
ASSERT(merged->ibl_ret_prefix == merged->fcache_return_prefix +
(info1->ibl_ret_prefix - info1->fcache_return_prefix));
ASSERT(merged->ibl_call_prefix == merged->fcache_return_prefix +
(info1->ibl_call_prefix - info1->fcache_return_prefix));
ASSERT(merged->ibl_jmp_prefix == merged->fcache_return_prefix +
(info1->ibl_jmp_prefix - info1->fcache_return_prefix));
/* Much more efficient to put the larger cache 1st, but we have to be sure
* to use the same order for both the htable and cache.
*/
/* Try to size the dst tables to avoid collision asserts.
* Put the larger unit's entries into the dst table up front.
* FIXME: do this earlier and if one is a subset of other then do
* a simpler merge?
*/
fragment_coarse_htable_merge(dcontext, merged, src_lg, src_sm,
false/*do not add src_sm yet*/,
false/*leave th htable empty*/);
/* Copy the 1st cache intact, and bring in the non-dup portions of the second
* (since know the offsets of the 1st); then, walk the 1st and patch up its
* inter-unit links while decoding from the original, now that the 2nd is in
* place.
*/
cachelg_size = (src_lg == info2) ? cache2_size : cache1_size;
memcpy(merged->cache_start_pc, src_lg->cache_start_pc, cachelg_size);
memset(&freeze_info, 0, sizeof(freeze_info));
freeze_info.dst_info = merged;
freeze_info.stubs_start_pc = merged->stubs_start_pc;
freeze_info.stubs_cur_pc = merged->stubs_start_pc;
/* Just like for freezing: leave inter-unit links intact for in_place.
* coarse_merge_process_stub() assumes that unlink == !in_place
*/
freeze_info.unlink = !in_place;
freeze_info.src_info = src_sm;
freeze_info.cache_start_pc = merged->cache_start_pc + cachelg_size;
freeze_info.cache_cur_pc = freeze_info.cache_start_pc;
coarse_merge_without_dups(dcontext, &freeze_info, cachelg_size,
/* replace for primary unit; add for secondary */
freeze_info.src_info == info1);
merged->cache_end_pc = freeze_info.cache_cur_pc;
freeze_info.src_info = src_lg;
freeze_info.cache_start_pc = merged->cache_start_pc;
freeze_info.cache_cur_pc = freeze_info.cache_start_pc;
coarse_merge_update_jmps(dcontext, &freeze_info,
/* replace for primary unit; add for secondary */
freeze_info.src_info == info1);
ASSERT((ptr_uint_t)(freeze_info.stubs_cur_pc - merged->fcache_return_prefix) <=
stubs1_size + stubs2_size);
/* We have extra space from extra in each stub region (from case 9428),
* from duplicate prefix space, and from eliminated inter-unit stubs, so
* we must set end pc.
*/
coarse_stubs_set_end_pc(merged, freeze_info.stubs_cur_pc);
LOG(THREAD, LOG_CACHE, 2, "merged size: stubs "SZFMT" => "SZFMT" bytes, "
"cache "SZFMT" ("SZFMT" align) => "SZFMT" ("SZFMT" align) bytes\n",
stubs1_size + stubs2_size, freeze_info.stubs_cur_pc - merged->stubs_start_pc,
cache1_size + cache2_size,
(info1->fcache_return_prefix - info1->cache_start_pc) +
(info2->fcache_return_prefix - info2->cache_start_pc),
merged->cache_end_pc - merged->cache_start_pc,
merged->fcache_return_prefix - merged->cache_start_pc);
if (merged_cache_size - (merged->cache_end_pc - merged->cache_start_pc) > 0) {
/* With duplicate elimination we often have a lot of empty space, so we
* re-allocate into a proper-fitting space
*/
size_t cachesz = merged->cache_end_pc - merged->cache_start_pc;
size_t cachesz_aligned = ALIGN_FORWARD(cachesz, PAGE_SIZE);
size_t stubsz = merged->stubs_end_pc - merged->fcache_return_prefix;
size_t newsz = cachesz_aligned + stubsz;
size_t old_mapsz = merged->mmap_size;
cache_pc newmap = (cache_pc) heap_mmap(newsz);
ssize_t cache_shift = merged->cache_start_pc - newmap;
/* stubs have moved too, so a relative shift not absolute */
ssize_t stubs_shift = cachesz_aligned -
(merged->fcache_return_prefix - merged->cache_start_pc);
LOG(THREAD, LOG_CACHE, 2,
"re-allocating merged unit: "SZFMT" @"PFX" "PFX" => "
SZFMT" @"PFX" "PFX" "SZFMT" "SZFMT"\n",
merged->mmap_size, merged->cache_start_pc, merged->fcache_return_prefix,
newsz, newmap, newmap + cachesz_aligned, cache_shift, stubs_shift);
memcpy(newmap, merged->cache_start_pc, cachesz);
memcpy(newmap + cachesz_aligned, merged->fcache_return_prefix, stubsz);
heap_munmap(merged->cache_start_pc, merged->mmap_size);
coarse_stubs_delete(merged);
merged->mmap_size = newsz;
/* Our relative jmps require that we do not exceed 32-bit reachability */
IF_X64(ASSERT(CHECK_TRUNCATE_TYPE_int(merged->mmap_size)));
merged->cache_start_pc = newmap;
merged->cache_end_pc = merged->cache_start_pc + cachesz;
merged->stubs_start_pc =
coarse_stubs_create(merged, merged->cache_start_pc + cachesz_aligned,
stubsz);
ASSERT(merged->stubs_start_pc != NULL);
ASSERT(ALIGNED(merged->stubs_start_pc, coarse_stub_alignment(info1)));
ASSERT(merged->fcache_return_prefix == newmap + cachesz_aligned);
coarse_stubs_set_end_pc(merged, merged->cache_start_pc + newsz);
coarse_unit_shift_jmps(dcontext, merged, cache_shift, stubs_shift, old_mapsz);
}
/* Set cache bounds after we've potentially moved the initial cache */
fcache_coarse_init_frozen(dcontext, merged, merged->cache_start_pc,
merged->fcache_return_prefix - merged->cache_start_pc);
/* Currently we only do online merging where at least one unit is live,
* and we expect that to be info1
*/
ASSERT(!info1->persisted);
/* Store the source persisted size so we know whether we need to merge
* with that on-disk file
*/
if (info2->persisted)
merged->persisted_source_mmap_size = info2->mmap_size;
/* Merge the other fields */
coarse_unit_merge_persist_info(dcontext, merged, info1, info2);
DOLOG(5, LOG_CACHE, {
byte *pc = merged->cache_start_pc;
LOG(THREAD, LOG_CACHE, 1, "merged cache:\n");
do {
pc = disassemble_with_bytes(dcontext, pc, THREAD);
} while (pc < merged->cache_end_pc);
pc = merged->stubs_start_pc;
LOG(THREAD, LOG_CACHE, 1, "merged stubs:\n");
do {
if (((ptr_uint_t)pc) % coarse_stub_alignment(info1) ==
coarse_stub_alignment(info1)-1)
pc++;
pc = disassemble_with_bytes(dcontext, pc, THREAD);
} while (pc < merged->stubs_end_pc);
});
/* FIXME case 9687: mark cache as read-only */
if (in_place) {
coarse_incoming_t *e;
coarse_replace_unit(dcontext, info1, merged);
merged = NULL;
/* up to caller to call mark_executable_area_coarse_frozen() if necessary */
/* case 10877: must combine the incoming lists
* targets should be unique, so can just append
*/
mutex_lock(&info1->incoming_lock);
/* can't grab info2 lock, so just like for main lock we rely on synchall */
DODEBUG({
/* Make sure no inter-incoming left */
uint in1 = 0;
uint in2 = 0;
for (e = info1->incoming; e != NULL; e = e->next, in1++)
ASSERT(!e->coarse || get_stub_coarse_info(e->in.stub_pc) != info2);
for (e = info2->incoming; e != NULL; e = e->next, in2++)
ASSERT(!e->coarse || get_stub_coarse_info(e->in.stub_pc) != info1);
LOG(THREAD, LOG_CACHE, 1, "merging %d incoming into %d incoming\n",
in2, in1);
});
e = info1->incoming;
if (e == NULL) {
info1->incoming = info2->incoming;
} else {
while (e->next != NULL)
e = e->next;
e->next = info2->incoming;
}
mutex_unlock(&info1->incoming_lock);
info2->incoming = NULL; /* ensure not freed when info2 is freed */
coarse_unit_shift_links(dcontext, info1);
res = info1;
} else {
/* we made separate copy that has no outgoing or incoming links */
res = merged;
}
mutex_unlock(&info1->lock);
#ifdef HOT_PATCHING_INTERFACE
/* we may call coarse_unit_calculate_persist_info() */
if (DYNAMO_OPTION(hot_patching))
read_unlock(hotp_get_lock());
#endif
release_recursive_lock(&change_linking_lock);
return res;
}
/***************************************************************************
* PERSISTENT CODE CACHE
*/
#if defined(RETURN_AFTER_CALL) && defined(WINDOWS)
extern bool seen_Borland_SEH;
#endif
/* get global or per-user directory name */
bool
perscache_dirname(char *directory /* OUT */, uint directory_len)
{
int retval;
bool param_ok = false;
/* Support specifying the pcache dir from either a config param (historical
* from ASLR piggyback) or runtime option, though config param gets precedence.
*/
const char *param_name = DYNAMO_OPTION(persist_per_user) ?
PARAM_STR(DYNAMORIO_VAR_PERSCACHE_ROOT) :
PARAM_STR(DYNAMORIO_VAR_PERSCACHE_SHARED);
retval = get_parameter(param_name, directory, directory_len);
if (IS_GET_PARAMETER_FAILURE(retval)) {
string_option_read_lock();
if (DYNAMO_OPTION(persist_per_user) && !IS_STRING_OPTION_EMPTY(persist_dir)) {
strncpy(directory, DYNAMO_OPTION(persist_dir), directory_len);
param_ok = true;
} else if (!IS_STRING_OPTION_EMPTY(persist_shared_dir)) {
strncpy(directory, DYNAMO_OPTION(persist_shared_dir), directory_len);
param_ok = true;
} else {
/* use log dir by default
* XXX: create subdir "logs/cache"? default is per-user so currently
* user dirs will be in logs/ which seems sufficient.
*/
uint len = directory_len;
create_log_dir(BASE_DIR);
if (get_log_dir(BASE_DIR, directory, &len) && len <= directory_len)
param_ok = true;
}
string_option_read_unlock();
} else
param_ok = true;
if (param_ok)
directory[directory_len - 1] = '\0';
return param_ok;
}
/* get global or per-user directory name */
static bool
get_persist_dir(char *directory /* OUT */,
uint directory_len,
bool create)
{
if (!perscache_dirname(directory, directory_len) ||
double_strchr(directory, DIRSEP, ALT_DIRSEP) == NULL) {
SYSLOG_INTERNAL_ERROR_ONCE("Persistent cache root dir is invalid. "
"Persistent cache will not operate.");
return false;
}
if (DYNAMO_OPTION(persist_per_user)) {
bool res = os_current_user_directory(directory, directory_len,
create);
/* null terminated */
if (!res) {
/* directory name may be set even on failure */
LOG(THREAD_GET, LOG_CACHE, 2, "\terror opening per-user dir %s\n", directory);
return false;
}
}
return true;
}
/* Checks for enough space on the volume where persisted caches are stored */
bool
coarse_unit_check_persist_space(file_t fd_in/*OPTIONAL*/, size_t size_needed)
{
bool room = false;
file_t fd = fd_in;
if (fd == INVALID_FILE) {
/* Use directory to get handle on proper volume */
char dir[MAXIMUM_PATH];
if (get_persist_dir(dir, BUFFER_SIZE_ELEMENTS(dir),
true /* note we MUST always create directory
* even if never persisting */
)) {
fd = os_open_directory(dir, 0);
} else
LOG(THREAD_GET, LOG_CACHE, 2, "\terror finding persist dir\n");
}
if (fd != INVALID_FILE) {
room = check_low_disk_threshold(fd, (uint64)size_needed);
if (fd_in == INVALID_FILE) {
/* FIXME: cache the handle, combine with -validate_owner_dir */
os_close(fd);
}
} else
LOG(THREAD_GET, LOG_CACHE, 2, "\terror opening persist dir\n");
return room;
}
/* If force_local, pretends module at pc has been exempted (so no effect
* unless -persist_check_exempted_options)
*/
static inline op_pcache_t
persist_get_options_level(app_pc pc, coarse_info_t *info, bool force_local)
{
if (!DYNAMO_OPTION(persist_check_options))
return OP_PCACHE_NOP;
else if (DYNAMO_OPTION(persist_check_local_options) ||
(DYNAMO_OPTION(persist_check_exempted_options) &&
(force_local ||
/* once loaded as local, must remain local, even if this
* process never hit the exemption */
(info != NULL && TEST(PERSCACHE_EXEMPTION_OPTIONS, info->flags)) ||
os_module_get_flag(pc, MODULE_WAS_EXEMPTED)) &&
/* don't use local if no such options: else when load will think
* local when really global */
has_pcache_dynamo_options(&dynamo_options, OP_PCACHE_LOCAL)))
return OP_PCACHE_LOCAL;
else
return OP_PCACHE_GLOBAL;
}
static const char *
persist_get_relevant_options(dcontext_t *dcontext, char *option_buf,
uint buf_len, op_pcache_t level)
{
if (level == OP_PCACHE_NOP)
return "";
get_pcache_dynamo_options_string(&dynamo_options, option_buf, buf_len, level);
option_buf[buf_len - 1] = '\0';
LOG(THREAD, LOG_CACHE, 2, "Pcache-affecting options = %s\n", option_buf);
return option_buf;
}
/* We identify persisted caches by mapping module info into a canonical name.
* There can be collisions (including for sub-module coarse units, such as
* separate +x module sections (xref case 9834 and case 9653); in addition, we
* can have different modules map to the same name), so caller must further
* verify matches.
* (Note that we use a different scheme than aslr's calculate_publish_name()
* as we are not dealing with file handles here but in-memory module images).
*/
static bool
get_persist_filename(char *filename /*OUT*/, uint filename_max /* max #chars */,
app_pc modbase, bool write, persisted_module_info_t *modinfo,
const char *option_string)
{
uint checksum, timestamp;
size_t size, code_size;
uint64 file_version;
const char *name;
uint hash;
char dir[MAXIMUM_PATH];
os_get_module_info_lock();
if (!os_get_module_info(modbase, &checksum, &timestamp, &size,
&name, &code_size, &file_version)) {
os_get_module_info_unlock();
return false;
}
if (name == NULL) {
/* theoretically possible but pathological, unless we came in late */
ASSERT_CURIOSITY(IF_WINDOWS_ELSE_0(!dr_early_injected));
LOG(GLOBAL, LOG_CACHE, 1, "\tmodule "PFX" has no name\n", modbase);
os_get_module_info_unlock();
return false;
}
/* Should not have path chars in the name */
ASSERT(get_short_name(name) == name && name[0] != DIRSEP);
name = get_short_name(name); /* paranoid */
/* Exclude list applies to both read and write */
if (!IS_STRING_OPTION_EMPTY(persist_exclude_list)) {
bool exclude;
string_option_read_lock();
exclude = check_filter(DYNAMO_OPTION(persist_exclude_list), name);
string_option_read_unlock();
if (exclude) {
LOG(GLOBAL, LOG_CACHE, 1, "\t%s is on exclude list\n", name);
DOSTATS({
if (write)
STATS_INC(coarse_units_persist_excluded);
else
STATS_INC(perscache_load_excluded);
});
os_get_module_info_unlock();
return false;
}
}
/* Prepend the perscache dir. We assume it has already been created.
* FIXME: cache this, or better, cache the dir handle and use an
* os_open that can take it in. Note that the directory handle
* doesn't help us in Linux - we can neither open files relative to it,
* nor there is any strong chown guarantee that we depend on.
*/
if (!get_persist_dir(dir, BUFFER_SIZE_ELEMENTS(dir), write)) {
os_get_module_info_unlock();
return false;
}
/* FIXME case 8494: version-independent names so we clobber files
* from old versions of modules and have less need of stale file
* cleanup? If so should add in (hash of) full path (w/ volume)
* to avoid name conflicts? But if try to share across machines
* we do not want to include path since can vary.
*/
/* should we go to a 64-bit hash? */
IF_X64(ASSERT(CHECK_TRUNCATE_TYPE_uint(size)));
hash = checksum ^ timestamp ^ (uint)size;
/* case 9799: make options part of namespace */
if (option_string != NULL) {
uint i;
ASSERT(DYNAMO_OPTION(persist_check_options));
for (i = 0; i < strlen(option_string); i++)
hash ^= option_string[i] << ((i % 4)*8);
}
LOG(GLOBAL, LOG_CACHE, 2, "\thash = 0x%08x^0x%08x^"PFX" ^ %s = "PFX"\n",
checksum, timestamp, size, option_string == NULL ? "" : option_string, hash);
ASSERT_CURIOSITY(hash != 0);
if (DYNAMO_OPTION(persist_per_app)) {
char *dirend = dir + strlen(dir);
/* FIXME case 9692: put tls offs instead of "dbg" here, and then
* sqlservr can have its own set if it ends up w/ separate tls offs
* (once we have non-per-app persisted files, that is).
*/
snprintf(dirend, BUFFER_SIZE_ELEMENTS(dir) - (dirend - dir), "%c%s%s",
DIRSEP, get_application_short_name(), IF_DEBUG_ELSE("-dbg", ""));
NULL_TERMINATE_BUFFER(dir);
LOG(GLOBAL, LOG_CACHE, 2, "\tper-app dir is %s\n", dir);
/* check for existence first so we can require new during creation */
if (!os_file_exists(dir, true/*is dir*/) && write) {
if (!os_create_dir(dir, CREATE_DIR_REQUIRE_NEW)) {
LOG(GLOBAL, LOG_CACHE, 2, "\terror creating per-app dir %s\n", dir);
os_get_module_info_unlock();
return false;
} else
LOG(GLOBAL, LOG_CACHE, 2, "\tcreated per-app dir %s\n", dir);
}
}
/* FIXME PR 214088/case 9653: should we put the section ordinal or vmarea range into
* the name to support simultaneous sub-module files? If sections are
* adjacent they'll be one vmarea, so this affects very few dlls. For now
* we only support one file per module. We could also support multiple
* ranges per file.
*/
snprintf(filename, filename_max, "%s%c%s%s-0x%08x.%s", dir, DIRSEP, name,
IF_DEBUG_ELSE("-dbg", ""), hash, PERSCACHE_FILE_SUFFIX);
filename[filename_max-1] = '\0';
os_get_module_info_unlock();
if (modinfo != NULL) {
modinfo->base = modbase;
modinfo->checksum = checksum;
modinfo->timestamp = timestamp;
modinfo->image_size = size;
modinfo->code_size = code_size;
modinfo->file_version = file_version;
}
return true;
}
#if defined(DEBUG) && defined(INTERNAL)
/* FIXME: share w/ aslr.c */
static void
print_module_digest(file_t f, module_digest_t *digest, const char *prefix)
{
LOG(f, LOG_CACHE, 1, "%s\n md5 short: ", prefix);
dump_buffer_as_bytes(f, digest->short_MD5, MD5_RAW_BYTES, DUMP_RAW);
LOG(f, LOG_CACHE, 1, "\n md5 long: ");
dump_buffer_as_bytes(f, digest->full_MD5, MD5_RAW_BYTES, DUMP_RAW);
LOG(f, LOG_CACHE, 1, "\n");
}
#endif
static void
persist_calculate_self_digest(module_digest_t *digest, coarse_persisted_info_t *pers,
app_pc map, uint validation_option)
{
struct MD5Context self_md5_cxt;
if (TEST(PERSCACHE_GENFILE_MD5_COMPLETE, validation_option)) {
MD5Init(&self_md5_cxt);
/* Even if generated w/ -persist_map_rw_separate but loaded w/o that
* option, the md5 should match since the memory layout is the same.
*/
MD5Update(&self_md5_cxt, map, pers->header_len + pers->data_len
- sizeof(persisted_footer_t));
MD5Final(digest->full_MD5, &self_md5_cxt);
}
if (TEST(PERSCACHE_GENFILE_MD5_SHORT, validation_option)) {
MD5Init(&self_md5_cxt);
MD5Update(&self_md5_cxt, (byte *) pers, pers->header_len);
MD5Final(digest->short_MD5, &self_md5_cxt);
}
}
static void
persist_calculate_module_digest(module_digest_t *digest, app_pc modbase, size_t modsize,
app_pc code_start, app_pc code_end,
uint validation_option)
{
size_t view_size = modsize;
if (TESTANY(PERSCACHE_MODULE_MD5_COMPLETE|PERSCACHE_MODULE_MD5_SHORT,
validation_option)) {
/* case 9717: need view size, not image size */
view_size = os_module_get_view_size(modbase);
}
if (TEST(PERSCACHE_MODULE_MD5_COMPLETE, validation_option)) {
/* We can't use a full md5 from module_calculate_digest() since .data
* and other sections change between persist and load times (this is
* in-memory image, not file). So we do md5 of code region. If we have
* hooks at persist time but not at load time we will cry foul;
* PERSCACHE_MODULE_MD5_AT_LOAD tries to get around this by using
* the load-time md5 when persisting.
*/
struct MD5Context code_md5_cxt;
MD5Init(&code_md5_cxt);
/* Code range should be within a single memory allocation so it should
* all be readable. Xref case 9653.
*/
code_end = MIN(code_end, modbase + view_size);
MD5Update(&code_md5_cxt, code_start, code_end - code_start);
MD5Final(digest->full_MD5, &code_md5_cxt);
}
if (TEST(PERSCACHE_MODULE_MD5_SHORT, validation_option)) {
/* Examine only the image header and the footer (if non-writable)
* FIXME: if view_size < modsize, better to skip the footer than have it
* cover a data section? Should be ok w/ PERSCACHE_MODULE_MD5_AT_LOAD.
*/
module_calculate_digest(digest, modbase, view_size,
false /* not full */, true /* yes short */,
DYNAMO_OPTION(persist_short_digest),
/* do not consider writable sections */
~((uint)OS_IMAGE_WRITE), OS_IMAGE_WRITE);
}
}
/* Compares all but the module base */
static bool
persist_modinfo_cmp(persisted_module_info_t *mi1, persisted_module_info_t *mi2)
{
bool match = true;
/* We'd like to know if we have an md5 mismatch */
ASSERT_CURIOSITY(module_digests_equal(&mi1->module_md5, &mi2->module_md5,
TEST(PERSCACHE_MODULE_MD5_SHORT,
DYNAMO_OPTION(persist_load_validation)),
TEST(PERSCACHE_MODULE_MD5_COMPLETE,
DYNAMO_OPTION(persist_load_validation)))
/* relocs => md5 diffs, until we handle relocs wrt md5 */
IF_WINDOWS(|| mi1->base != mi2->base)
|| check_filter("win32.partial_map.exe",
get_short_name(get_application_name())));
if (TESTALL(PERSCACHE_MODULE_MD5_SHORT|PERSCACHE_MODULE_MD5_COMPLETE,
DYNAMO_OPTION(persist_load_validation))) {
return (memcmp(&mi1->checksum, &mi2->checksum,
sizeof(*mi1)-offsetof(persisted_module_info_t, checksum)) == 0);
}
match = match && (memcmp(&mi1->checksum, &mi2->checksum,
offsetof(persisted_module_info_t, module_md5) -
offsetof(persisted_module_info_t, checksum)) == 0);
match = match && module_digests_equal(&mi1->module_md5, &mi2->module_md5,
TEST(PERSCACHE_MODULE_MD5_SHORT,
DYNAMO_OPTION(persist_load_validation)),
TEST(PERSCACHE_MODULE_MD5_COMPLETE,
DYNAMO_OPTION(persist_load_validation)));
return match;
}
#ifdef WINDOWS
static void
persist_record_base_mismatch(app_pc modbase)
{
/* The idea is that we shouldn't waste our time re-persisting modules
* whose base keeps mismatching due to ASLR (we don't support rebasing
* pcaches yet).
* To record whether to not persist, we can't use a VM_ flag b/c
* no simple way to tell vmareas.c why a load failed so we use a
* module flag
*/
if (!DYNAMO_OPTION(coarse_freeze_rebased_aslr) &&
os_module_has_dynamic_base(modbase))
os_module_set_flag(modbase, MODULE_DO_NOT_PERSIST);
}
#endif
/* key is meant to be a short string to help identify the purpose of this name.
* FIXME: right now up to caller to figure out if the name collided w/ an
* existing file; maybe this routine should do that and return a file handle?
* FIXME: combine w/ get_unique_logfile, which handles file creation race?
* As it is this is not mkstemp, and caller must use OS_OPEN_REQUIRE_NEW.
*/
static void
get_unique_name(const char *origname, const char *key,
char *filename /*OUT*/, uint filename_max /* max #chars */)
{
/* We need unique names for:
* 1) case 9696: a temp file to build our pcache in
* before renaming to the real thing
* 2) case 9701: to rename the existing file before we replace it, as for
* images or mmaps with file handles open we must rename before deleting.
*/
/* FIXME: should we use full 64-bit TSC instead of pseudo-random 32-bit?
* FIXME: if we make name w/ full path too long we'll truncate:
* could cache dir handle and use relative name only.
*/
/* update aslr_get_unique_wide_name() with any improvements here */
size_t timestamp = get_random_offset(UINT_MAX);
LOG_DECLARE(int trunc =) /* for DEBUG and INTERNAL */
snprintf(filename, filename_max,
"%s-"PIDFMT"-%010"SZFC"-%s", origname,
get_process_id(), timestamp, key);
ASSERT_CURIOSITY(trunc > 0 && trunc < (int)filename_max &&
"perscache new name truncated");
/* FIXME: case 10677 file name truncation */
filename[filename_max-1] = '\0';
}
/* Merges a given frozen unit with any new persisted cache file on disk.
* Caller must hold read lock hotp_get_lock(), if -hot_patching.
* If merge is successful, returns a new coarse_info_t, which caller is
* responsible for freeing; else returns NULL.
*/
static coarse_info_t *
coarse_unit_merge_with_disk(dcontext_t *dcontext, coarse_info_t *info,
const char *filename)
{
coarse_info_t *merge_with, *postmerge = NULL;
uint64 file_size;
size_t existing_size;
/* We may have already merged new code with an inuse persisted unit, so we
* check the stored size of that one if info is not itself persisted.
* FIXME: we could store the file handle: can we tell if two file handles
* refer to the same file?
*/
size_t inuse_size = (info->persisted) ? info->mmap_size :
info->persisted_source_mmap_size;
LOG(THREAD, LOG_CACHE, 2, "coarse_unit_merge_with_disk %s\n", info->module);
ASSERT(dynamo_all_threads_synched);
ASSERT(info != NULL && info->cache != NULL); /* don't merge empty units */
ASSERT(info->frozen);
#ifdef HOT_PATCHING_INTERFACE
ASSERT_OWN_READ_LOCK(DYNAMO_OPTION(hot_patching), hotp_get_lock());
#endif
/* Strategy: check current pcache file size (not perfect but good enough):
* if different from source size, or source was not persisted, then
* load in and merge.
* FIXME case 10356: need a better check since can have false positive
* and false negatives by only looking at size.
* Could repeat, and could also check again after writing to tmp file but
* before renaming. FIXME: should we do those things to reduce the race
* window where we lose another process's appended code?
*/
if (!os_get_file_size(filename, &file_size)) {
LOG(THREAD, LOG_CACHE, 2, " no existing file %s to merge with\n", filename);
return postmerge;
}
ASSERT_TRUNCATE(existing_size, size_t, file_size);
existing_size = (size_t) file_size;
LOG(THREAD, LOG_CACHE, 2, " size of existing %s is "SZFMT" vs our "SZFMT"\n",
filename, existing_size, inuse_size);
if (existing_size == 0)
return postmerge;
/* Merge a non-persisted (and not merged with persisted) file w/ any on-disk file
* that has appeared since startup; or, our own, if we abandoned it but stayed
* coarse due to a reset or hotp flush.
*/
if ((!info->persisted && info->persisted_source_mmap_size == 0 &&
DYNAMO_OPTION(coarse_lone_merge)) ||
/* FIXME case 10356: need a better check since can have false positive
* and false negatives by only looking at size.
*/
(existing_size != inuse_size && DYNAMO_OPTION(coarse_disk_merge))) {
merge_with = coarse_unit_load(dcontext, info->base_pc, info->end_pc,
false/*not for execution*/);
/* We rely on coarse_unit_load to reject incompatible pcaches, whether for
* tls, trace support, or other reasons. We do need to check the region
* here. FIXME: once we support relocs we need to handle appropriately.
*/
if (merge_with != NULL) {
LOG(THREAD,