blob: a0f47878aa3ca29e7757ce9fd446944403094965 [file] [log] [blame]
/* **********************************************************
* Copyright (c) 2010-2014 Google, Inc. All rights reserved.
* Copyright (c) 2000-2010 VMware, Inc. All rights reserved.
* **********************************************************/
/*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* * Neither the name of VMware, Inc. nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
/* Copyright (c) 2003-2007 Determina Corp. */
/* Copyright (c) 2001-2003 Massachusetts Institute of Technology */
/* Copyright (c) 2000-2001 Hewlett-Packard Company */
/*
* arch.c - x86 architecture specific routines
*/
#include "../globals.h"
#include "../link.h"
#include "../fragment.h"
#include "arch.h"
#include "instr.h"
#include "instr_create.h"
#include "decode.h"
#include "decode_fast.h"
#include "../fcache.h"
#include "proc.h"
#include "instrument.h"
#include <string.h> /* for memcpy */
#if defined(DEBUG) || defined(INTERNAL)
# include "disassemble.h"
#endif
/* in interp.c */
void interp_init(void);
void interp_exit(void);
/* Thread-shared generated routines.
* We don't allocate the shared_code statically so that we can mark it
* executable.
*/
generated_code_t *shared_code = NULL;
#ifdef X64
/* PR 282576: For WOW64 processes we need context switches that swap between 64-bit
* mode and 32-bit mode when executing 32-bit code cache code, as well as
* 32-bit-targeted IBL routines for performance.
*/
generated_code_t *shared_code_x86 = NULL;
/* In x86_to_x64 we can use the extra registers as scratch space.
* The IBL routines are 64-bit and they use r8-r10 freely.
*/
generated_code_t *shared_code_x86_to_x64 = NULL;
#endif
static int syscall_method = SYSCALL_METHOD_UNINITIALIZED;
byte *app_sysenter_instr_addr = NULL;
#ifdef LINUX
static bool sysenter_hook_failed = false;
#endif
/* static functions forward references */
static byte *
emit_ibl_routines(dcontext_t *dcontext, generated_code_t *code,
byte *pc, byte *fcache_return_pc,
ibl_source_fragment_type_t source_fragment_type,
bool thread_shared,
bool target_trace_table,
ibl_code_t ibl_code[]);
static byte *
emit_syscall_routines(dcontext_t *dcontext, generated_code_t *code, byte *pc,
bool thread_shared);
int
reg_spill_tls_offs(reg_id_t reg)
{
switch (reg) {
case SCRATCH_REG0: return TLS_SLOT_REG0;
case SCRATCH_REG1: return TLS_SLOT_REG1;
case SCRATCH_REG2: return TLS_SLOT_REG2;
case SCRATCH_REG3: return TLS_SLOT_REG3;
}
/* don't assert if another reg passed: used on random regs looking for spills */
return -1;
}
#ifdef INTERNAL
/* routine can be used for dumping both thread private and the thread shared routines */
static void
dump_emitted_routines(dcontext_t *dcontext, file_t file,
const char *code_description,
generated_code_t *code, byte *emitted_pc)
{
byte *last_pc;
#ifdef X64
if (GENCODE_IS_X86(code->gencode_mode)) {
/* parts of x86 gencode are 64-bit but it's hard to know which here
* so we dump all as x86
*/
set_x86_mode(dcontext, true/*x86*/);
}
#endif
print_file(file, "%s routines created:\n", code_description);
{
last_pc = code->gen_start_pc;
do {
const char *ibl_brtype;
const char *ibl_name =
get_ibl_routine_name(dcontext, (cache_pc)last_pc, &ibl_brtype);
# ifdef WINDOWS
/* must test first, as get_ibl_routine_name will think "bb_ibl_indjmp" */
if (last_pc == code->unlinked_shared_syscall)
print_file(file, "unlinked_shared_syscall:\n");
else if (last_pc == code->shared_syscall)
print_file(file, "shared_syscall:\n");
else
# endif
if (ibl_name)
print_file(file, "%s_%s:\n", ibl_name, ibl_brtype);
else if (last_pc == code->fcache_enter)
print_file(file, "fcache_enter:\n");
else if (last_pc == code->fcache_return)
print_file(file, "fcache_return:\n");
else if (last_pc == code->do_syscall)
print_file(file, "do_syscall:\n");
# ifdef WINDOWS
else if (last_pc == code->fcache_enter_indirect)
print_file(file, "fcache_enter_indirect:\n");
else if (last_pc == code->do_callback_return)
print_file(file, "do_callback_return:\n");
# else
else if (last_pc == code->do_int_syscall)
print_file(file, "do_int_syscall:\n");
else if (last_pc == code->do_int81_syscall)
print_file(file, "do_int81_syscall:\n");
else if (last_pc == code->do_int82_syscall)
print_file(file, "do_int82_syscall:\n");
else if (last_pc == code->do_clone_syscall)
print_file(file, "do_clone_syscall:\n");
# ifdef VMX86_SERVER
else if (last_pc == code->do_vmkuw_syscall)
print_file(file, "do_vmkuw_syscall:\n");
# endif
# endif
# ifdef UNIX
else if (last_pc == code->new_thread_dynamo_start)
print_file(file, "new_thread_dynamo_start:\n");
# endif
# ifdef TRACE_HEAD_CACHE_INCR
else if (last_pc == code->trace_head_incr)
print_file(file, "trace_head_incr:\n");
# endif
else if (last_pc == code->reset_exit_stub)
print_file(file, "reset_exit_stub:\n");
else if (last_pc == code->fcache_return_coarse)
print_file(file, "fcache_return_coarse:\n");
else if (last_pc == code->trace_head_return_coarse)
print_file(file, "trace_head_return_coarse:\n");
# ifdef CLIENT_INTERFACE
else if (last_pc == code->special_ibl_xfer[CLIENT_IBL_IDX])
print_file(file, "client_ibl_xfer:\n");
# endif
# ifdef UNIX
else if (last_pc == code->special_ibl_xfer[NATIVE_PLT_IBL_IDX])
print_file(file, "native_plt_ibl_xfer:\n");
else if (last_pc == code->special_ibl_xfer[NATIVE_RET_IBL_IDX])
print_file(file, "native_ret_ibl_xfer:\n");
# endif
else if (last_pc == code->clean_call_save)
print_file(file, "clean_call_save:\n");
else if (last_pc == code->clean_call_restore)
print_file(file, "clean_call_restore:\n");
last_pc = disassemble_with_bytes(dcontext, last_pc, file);
} while (last_pc < emitted_pc);
print_file(file, "%s routines size: "SSZFMT" / "SSZFMT"\n\n",
code_description, emitted_pc - code->gen_start_pc,
code->commit_end_pc - code->gen_start_pc);
}
#ifdef X64
if (GENCODE_IS_X86(code->gencode_mode))
set_x86_mode(dcontext, false/*x64*/);
#endif
}
void
dump_emitted_routines_to_file(dcontext_t *dcontext, const char *filename,
const char *label, generated_code_t *code,
byte *stop_pc)
{
file_t file = open_log_file(filename, NULL, 0);
if (file != INVALID_FILE) {
/* FIXME: we currently miss later patches for table & mask, but
* that only changes a few immeds
*/
dump_emitted_routines(dcontext, file, label, code, stop_pc);
close_log_file(file);
} else
ASSERT_NOT_REACHED();
}
#endif /* INTERNAL */
/*** functions exported to src directory ***/
byte *
code_align_forward(byte *pc, size_t alignment)
{
byte *new_pc = (byte *) ALIGN_FORWARD(pc, alignment);
DOCHECK(1, {
SET_TO_NOPS(pc, new_pc - pc);
});
return new_pc;
}
static byte *
move_to_start_of_cache_line(byte *pc)
{
return code_align_forward(pc, proc_get_cache_line_size());
}
/* The real size of generated code we need varies by cache line size and
* options like inlining of ibl code. We also generate different routines
* for thread-private and thread-shared. So, we dynamically extend the size
* as we generate. Currently our max is under 5 pages.
*/
#define GENCODE_RESERVE_SIZE (5*PAGE_SIZE)
#define GENCODE_COMMIT_SIZE \
((size_t)(ALIGN_FORWARD(sizeof(generated_code_t), PAGE_SIZE) + PAGE_SIZE))
static byte *
check_size_and_cache_line(generated_code_t *code, byte *pc)
{
/* Assumption: no single emit uses more than a page.
* We keep an extra page at all times and release it at the end.
*/
byte *next_pc = move_to_start_of_cache_line(pc);
if ((byte *)ALIGN_FORWARD(pc, PAGE_SIZE) + PAGE_SIZE > code->commit_end_pc) {
ASSERT(code->commit_end_pc + PAGE_SIZE <= ((byte *)code) + GENCODE_RESERVE_SIZE);
heap_mmap_extend_commitment(code->commit_end_pc, PAGE_SIZE);
code->commit_end_pc += PAGE_SIZE;
}
return next_pc;
}
static void
release_final_page(generated_code_t *code)
{
/* FIXME: have heap_mmap not allocate a guard page, and use our
* extra for that page, to use one fewer total page of address space.
*/
size_t leftover = (ptr_uint_t)code->commit_end_pc -
ALIGN_FORWARD(code->gen_end_pc, PAGE_SIZE);
ASSERT(code->commit_end_pc >= (byte *) ALIGN_FORWARD(code->gen_end_pc, PAGE_SIZE));
ASSERT(ALIGNED(code->commit_end_pc, PAGE_SIZE));
ASSERT(ALIGNED(leftover, PAGE_SIZE));
if (leftover > 0) {
heap_mmap_retract_commitment(code->commit_end_pc - leftover, leftover);
code->commit_end_pc -= leftover;
}
LOG(THREAD_GET, LOG_EMIT, 1,
"Generated code "PFX": %d header, "SZFMT" gen, "SZFMT" commit/%d reserve\n",
code, sizeof(*code), code->gen_end_pc - code->gen_start_pc,
(ptr_uint_t)code->commit_end_pc - (ptr_uint_t)code, GENCODE_RESERVE_SIZE);
}
static void
shared_gencode_init(IF_X64_ELSE(gencode_mode_t gencode_mode, void))
{
generated_code_t *gencode;
ibl_branch_type_t branch_type;
byte *pc;
#ifdef X64
fragment_t *fragment;
bool x86_mode = false;
bool x86_to_x64_mode = false;
#endif
gencode = heap_mmap_reserve(GENCODE_RESERVE_SIZE, GENCODE_COMMIT_SIZE);
/* we would return gencode and let caller assign, but emit routines
* that this routine calls query the shared vars so we set here
*/
#ifdef X64
switch (gencode_mode) {
case GENCODE_X64:
shared_code = gencode;
break;
case GENCODE_X86:
/* we do not call set_x86_mode() b/c much of the gencode may be
* 64-bit: it's up the gencode to mark each instr that's 32-bit.
*/
shared_code_x86 = gencode;
x86_mode = true;
break;
case GENCODE_X86_TO_X64:
shared_code_x86_to_x64 = gencode;
x86_to_x64_mode = true;
break;
default:
ASSERT_NOT_REACHED();
}
#else
shared_code = gencode;
#endif
memset(gencode, 0, sizeof(*gencode));
gencode->thread_shared = true;
IF_X64(gencode->gencode_mode = gencode_mode);
/* Generated code immediately follows struct */
gencode->gen_start_pc = ((byte *)gencode) + sizeof(*gencode);
gencode->commit_end_pc = ((byte *)gencode) + GENCODE_COMMIT_SIZE;
for (branch_type = IBL_BRANCH_TYPE_START;
branch_type < IBL_BRANCH_TYPE_END; branch_type++) {
gencode->trace_ibl[branch_type].initialized = false;
gencode->bb_ibl[branch_type].initialized = false;
gencode->coarse_ibl[branch_type].initialized = false;
/* cache the mode so we can pass just the ibl_code_t around */
IF_X64(gencode->trace_ibl[branch_type].x86_mode = x86_mode);
IF_X64(gencode->trace_ibl[branch_type].x86_to_x64_mode = x86_to_x64_mode);
IF_X64(gencode->bb_ibl[branch_type].x86_mode = x86_mode);
IF_X64(gencode->bb_ibl[branch_type].x86_to_x64_mode = x86_to_x64_mode);
IF_X64(gencode->coarse_ibl[branch_type].x86_mode = x86_mode);
IF_X64(gencode->coarse_ibl[branch_type].x86_to_x64_mode = x86_to_x64_mode);
}
pc = gencode->gen_start_pc;
pc = check_size_and_cache_line(gencode, pc);
gencode->fcache_enter = pc;
pc = emit_fcache_enter_shared(GLOBAL_DCONTEXT, gencode, pc);
pc = check_size_and_cache_line(gencode, pc);
gencode->fcache_return = pc;
pc = emit_fcache_return_shared(GLOBAL_DCONTEXT, gencode, pc);
if (DYNAMO_OPTION(coarse_units)) {
pc = check_size_and_cache_line(gencode, pc);
gencode->fcache_return_coarse = pc;
pc = emit_fcache_return_coarse(GLOBAL_DCONTEXT, gencode, pc);
pc = check_size_and_cache_line(gencode, pc);
gencode->trace_head_return_coarse = pc;
pc = emit_trace_head_return_coarse(GLOBAL_DCONTEXT, gencode, pc);
}
#ifdef WINDOWS_PC_SAMPLE
gencode->fcache_enter_return_end = pc;
#endif
/* PR 244737: thread-private uses shared gencode on x64.
* Should we set the option instead? */
if (USE_SHARED_TRACE_IBL()) {
/* expected to be false for private trace IBL routine */
pc = emit_ibl_routines(GLOBAL_DCONTEXT, gencode,
pc, gencode->fcache_return,
DYNAMO_OPTION(shared_traces) ?
IBL_TRACE_SHARED : IBL_TRACE_PRIVATE, /* source_fragment_type */
true, /* thread_shared */
true, /* target_trace_table */
gencode->trace_ibl);
}
if (USE_SHARED_BB_IBL()) {
pc = emit_ibl_routines(GLOBAL_DCONTEXT, gencode,
pc, gencode->fcache_return,
IBL_BB_SHARED, /* source_fragment_type */
/* thread_shared */
IF_X64_ELSE(true, SHARED_FRAGMENTS_ENABLED()),
!DYNAMO_OPTION(bb_ibl_targets), /* target_trace_table */
gencode->bb_ibl);
}
if (DYNAMO_OPTION(coarse_units)) {
pc = emit_ibl_routines(GLOBAL_DCONTEXT, gencode, pc,
/* ibl routines use regular fcache_return */
gencode->fcache_return,
IBL_COARSE_SHARED, /* source_fragment_type */
/* thread_shared */
IF_X64_ELSE(true, SHARED_FRAGMENTS_ENABLED()),
!DYNAMO_OPTION(bb_ibl_targets), /*target_trace_table*/
gencode->coarse_ibl);
}
#ifdef WINDOWS_PC_SAMPLE
gencode->ibl_routines_end = pc;
#endif
#if defined(WINDOWS) && !defined(X64)
/* no dispatch needed on x64 since syscall routines are thread-shared */
if (DYNAMO_OPTION(shared_fragment_shared_syscalls)) {
pc = check_size_and_cache_line(gencode, pc);
gencode->shared_syscall = pc;
pc = emit_shared_syscall_dispatch(GLOBAL_DCONTEXT, pc);
pc = check_size_and_cache_line(gencode, pc);
gencode->unlinked_shared_syscall = pc;
pc = emit_unlinked_shared_syscall_dispatch(GLOBAL_DCONTEXT, pc);
LOG(GLOBAL, LOG_EMIT, 3,
"shared_syscall_dispatch: linked "PFX", unlinked "PFX"\n",
gencode->shared_syscall, gencode->unlinked_shared_syscall);
}
#endif
#ifdef UNIX
/* must create before emit_do_clone_syscall() in emit_syscall_routines() */
pc = check_size_and_cache_line(gencode, pc);
gencode->new_thread_dynamo_start = pc;
pc = emit_new_thread_dynamo_start(GLOBAL_DCONTEXT, pc);
#endif
#ifdef X64
# ifdef WINDOWS
/* plain fcache_enter indirects through edi, and next_tag is in tls,
* so we don't need a separate routine for callback return
*/
gencode->fcache_enter_indirect = gencode->fcache_enter;
gencode->shared_syscall_code.x86_mode = x86_mode;
gencode->shared_syscall_code.x86_to_x64_mode = x86_to_x64_mode;
# endif
/* i#821/PR 284029: for now we assume there are no syscalls in x86 code */
if (IF_X64_ELSE(!x86_mode, true)) {
/* PR 244737: syscall routines are all shared */
pc = emit_syscall_routines(GLOBAL_DCONTEXT, gencode, pc, true/*thread-shared*/);
}
/* since we always have a shared fcache_return we can make reset stub shared */
gencode->reset_exit_stub = pc;
fragment = linkstub_fragment(GLOBAL_DCONTEXT, (linkstub_t *) get_reset_linkstub());
if (GENCODE_IS_X86(gencode->gencode_mode))
fragment = empty_fragment_mark_x86(fragment);
/* reset exit stub should look just like a direct exit stub */
pc += insert_exit_stub_other_flags
(GLOBAL_DCONTEXT, fragment,
(linkstub_t *) get_reset_linkstub(), pc, LINK_DIRECT);
#elif defined(UNIX) && defined(HAVE_TLS)
/* PR 212570: we need a thread-shared do_syscall for our vsyscall hook */
/* PR 361894: we don't support sysenter if no TLS */
ASSERT(gencode->do_syscall == NULL);
pc = check_size_and_cache_line(gencode, pc);
gencode->do_syscall = pc;
pc = emit_do_syscall(GLOBAL_DCONTEXT, gencode, pc, gencode->fcache_return,
true/*shared*/, 0, &gencode->do_syscall_offs);
#endif
#ifdef TRACE_HEAD_CACHE_INCR
pc = check_size_and_cache_line(gencode, pc);
gencode->trace_head_incr = pc;
pc = emit_trace_head_incr_shared(GLOBAL_DCONTEXT, pc, gencode->fcache_return);
#endif
if (!special_ibl_xfer_is_thread_private()) {
#ifdef CLIENT_INTERFACE
gencode->special_ibl_xfer[CLIENT_IBL_IDX] = pc;
pc = emit_client_ibl_xfer(GLOBAL_DCONTEXT, pc, gencode);
#endif
#ifdef UNIX
/* i#1238: native exec optimization */
if (DYNAMO_OPTION(native_exec_opt)) {
pc = check_size_and_cache_line(gencode, pc);
gencode->special_ibl_xfer[NATIVE_PLT_IBL_IDX] = pc;
pc = emit_native_plt_ibl_xfer(GLOBAL_DCONTEXT, pc, gencode);
/* native ret */
pc = check_size_and_cache_line(gencode, pc);
gencode->special_ibl_xfer[NATIVE_RET_IBL_IDX] = pc;
pc = emit_native_ret_ibl_xfer(GLOBAL_DCONTEXT, pc, gencode);
}
#endif
}
if (!client_clean_call_is_thread_private()) {
pc = check_size_and_cache_line(gencode, pc);
gencode->clean_call_save = pc;
pc = emit_clean_call_save(GLOBAL_DCONTEXT, pc, gencode);
pc = check_size_and_cache_line(gencode, pc);
gencode->clean_call_restore = pc;
pc = emit_clean_call_restore(GLOBAL_DCONTEXT, pc, gencode);
}
ASSERT(pc < gencode->commit_end_pc);
gencode->gen_end_pc = pc;
release_final_page(gencode);
DOLOG(3, LOG_EMIT, {
dump_emitted_routines(GLOBAL_DCONTEXT, GLOBAL,
IF_X64_ELSE(x86_mode ? "thread-shared x86" :
"thread-shared", "thread-shared"),
gencode, pc);
});
#ifdef INTERNAL
if (INTERNAL_OPTION(gendump)) {
dump_emitted_routines_to_file(GLOBAL_DCONTEXT, "gencode-shared",
IF_X64_ELSE(x86_mode ? "thread-shared x86" :
"thread-shared", "thread-shared"),
gencode, pc);
}
#endif
#ifdef WINDOWS_PC_SAMPLE
if (dynamo_options.profile_pcs &&
dynamo_options.prof_pcs_gencode >= 2 &&
dynamo_options.prof_pcs_gencode <= 32) {
gencode->profile =
create_profile(gencode->gen_start_pc, pc,
dynamo_options.prof_pcs_gencode, NULL);
start_profile(gencode->profile);
} else
gencode->profile = NULL;
#endif
gencode->writable = true;
protect_generated_code(gencode, READONLY);
}
#ifdef X64
/* Sets other-mode ibl targets, for mixed-mode and x86_to_x64 mode */
static void
far_ibl_set_targets(ibl_code_t src_ibl[], ibl_code_t tgt_ibl[])
{
ibl_branch_type_t branch_type;
for (branch_type = IBL_BRANCH_TYPE_START;
branch_type < IBL_BRANCH_TYPE_END; branch_type++) {
if (src_ibl[branch_type].initialized) {
/* selector was set in emit_far_ibl (but at that point we didn't have
* the other mode's ibl ready for the target)
*/
ASSERT(CHECK_TRUNCATE_TYPE_uint
((ptr_uint_t)tgt_ibl[branch_type].indirect_branch_lookup_routine));
ASSERT(CHECK_TRUNCATE_TYPE_uint
((ptr_uint_t)tgt_ibl[branch_type].unlinked_ibl_entry));
src_ibl[branch_type].far_jmp_opnd.pc = (uint)(ptr_uint_t)
tgt_ibl[branch_type].indirect_branch_lookup_routine;
src_ibl[branch_type].far_jmp_unlinked_opnd.pc = (uint)(ptr_uint_t)
tgt_ibl[branch_type].unlinked_ibl_entry;
}
}
}
#endif
/* arch-specific initializations */
void
arch_init(void)
{
ASSERT(sizeof(opnd_t) == EXPECTED_SIZEOF_OPND);
IF_X86(ASSERT(CHECK_TRUNCATE_TYPE_byte(OPSZ_LAST)));
/* ensure our flag sharing is done properly */
ASSERT((uint)LINK_FINAL_INSTR_SHARED_FLAG <
(uint)INSTR_FIRST_NON_LINK_SHARED_FLAG);
ASSERT_TRUNCATE(byte, byte, OPSZ_LAST_ENUM);
DODEBUG({ reg_check_reg_fixer(); });
/* Verify that the structures used for a register spill area and to hold IBT
* table addresses & masks for IBL code are laid out as expected. We expect
* the spill area to be at offset 0 within the container struct and for the
* table address/mask pair array to follow immediately after the spill area.
*/
/* FIXME These can be converted into compile-time checks as follows:
*
* lookup_table_access_t table[
* (offsetof(local_state_extended_t, spill_space) == 0 &&
* offsetof(local_state_extended_t, table_space) ==
* sizeof(spill_state_t)) ? IBL_BRANCH_TYPE_END : -1 ];
*
* This isn't self-descriptive, though, so it's not being used right now
* (xref case 7097).
*/
ASSERT(offsetof(local_state_extended_t, spill_space) == 0);
ASSERT(offsetof(local_state_extended_t, table_space) == sizeof(spill_state_t));
#ifdef WINDOWS
/* syscall_init() should have already set the syscall_method so go ahead
* and create the globlal_do_syscall now */
ASSERT(syscall_method != SYSCALL_METHOD_UNINITIALIZED);
#endif
/* Ensure we have no unexpected padding inside structs that include
* priv_mcontext_t (app_state_at_intercept_t and dcontext_t) */
IF_X86(ASSERT(offsetof(priv_mcontext_t, pc) + sizeof(byte*) + PRE_XMM_PADDING ==
offsetof(priv_mcontext_t, ymm)));
ASSERT(offsetof(app_state_at_intercept_t, mc) ==
offsetof(app_state_at_intercept_t, start_pc) + sizeof(void*));
/* Try to catch errors in x86.asm offsets for dcontext_t */
ASSERT(sizeof(unprotected_context_t) == sizeof(priv_mcontext_t) +
IF_WINDOWS_ELSE(IF_X64_ELSE(8, 4), 8) +
IF_CLIENT_INTERFACE_ELSE(5 * sizeof(reg_t), 0));
interp_init();
#ifdef CHECK_RETURNS_SSE2
if (proc_has_feature(FEATURE_SSE2)) {
FATAL_USAGE_ERROR(CHECK_RETURNS_SSE2_REQUIRES_SSE2, 2,
get_application_name(), get_application_pid());
}
#endif
if (USE_SHARED_GENCODE()) {
/* thread-shared generated code */
/* Assumption: no single emit uses more than a page.
* We keep an extra page at all times and release it at the end.
* FIXME: have heap_mmap not allocate a guard page, and use our
* extra for that page, to use one fewer total page of address space.
*/
ASSERT(GENCODE_COMMIT_SIZE < GENCODE_RESERVE_SIZE);
shared_gencode_init(IF_X64(GENCODE_X64));
#ifdef X64
/* FIXME i#49: usually LOL64 has only 32-bit code (kernel has 32-bit syscall
* interface) but for mixed modes how would we know? We'd have to make
* this be initialized lazily on first occurrence.
*/
if (mixed_mode_enabled()) {
generated_code_t *shared_code_opposite_mode;
shared_gencode_init(IF_X64(GENCODE_X86));
if (DYNAMO_OPTION(x86_to_x64)) {
shared_gencode_init(IF_X64(GENCODE_X86_TO_X64));
shared_code_opposite_mode = shared_code_x86_to_x64;
} else
shared_code_opposite_mode = shared_code_x86;
/* Now link the far_ibl for each type to the corresponding regular
* ibl of the opposite mode.
*/
far_ibl_set_targets(shared_code->trace_ibl,
shared_code_opposite_mode->trace_ibl);
far_ibl_set_targets(shared_code->bb_ibl,
shared_code_opposite_mode->bb_ibl);
far_ibl_set_targets(shared_code->coarse_ibl,
shared_code_opposite_mode->coarse_ibl);
far_ibl_set_targets(shared_code_opposite_mode->trace_ibl,
shared_code->trace_ibl);
far_ibl_set_targets(shared_code_opposite_mode->bb_ibl,
shared_code->bb_ibl);
far_ibl_set_targets(shared_code_opposite_mode->coarse_ibl,
shared_code->coarse_ibl);
}
#endif
}
mangle_init();
}
#ifdef WINDOWS_PC_SAMPLE
static void
arch_extract_profile(dcontext_t *dcontext _IF_X64(gencode_mode_t mode))
{
generated_code_t *tpc = get_emitted_routines_code(dcontext _IF_X64(mode));
thread_id_t tid = dcontext == GLOBAL_DCONTEXT ? 0 : dcontext->owning_thread;
/* we may not have x86 gencode */
ASSERT(tpc != NULL IF_X64(|| mode == GENCODE_X86));
if (tpc != NULL && tpc->profile != NULL) {
ibl_branch_type_t branch_type;
int sum;
protect_generated_code(tpc, WRITABLE);
stop_profile(tpc->profile);
mutex_lock(&profile_dump_lock);
/* Print the thread id so even if it has no hits we can
* count the # total threads. */
print_file(profile_file, "Profile for thread "TIDFMT"\n", tid);
sum = sum_profile_range(tpc->profile, tpc->fcache_enter,
tpc->fcache_enter_return_end);
if (sum > 0) {
print_file(profile_file, "\nDumping cache enter/exit code profile "
"(thread "TIDFMT")\n%d hits\n", tid, sum);
dump_profile_range(profile_file, tpc->profile, tpc->fcache_enter,
tpc->fcache_enter_return_end);
}
/* Break out the IBL code by trace/BB and opcode types.
* Not worth showing far_ibl hits since should be quite rare.
*/
for (branch_type = IBL_BRANCH_TYPE_START;
branch_type < IBL_BRANCH_TYPE_END; branch_type++) {
byte *start;
byte *end;
if (tpc->trace_ibl[branch_type].initialized) {
start = tpc->trace_ibl[branch_type].indirect_branch_lookup_routine;
end = start + tpc->trace_ibl[branch_type].ibl_routine_length;
sum = sum_profile_range(tpc->profile, start, end);
if (sum > 0) {
print_file(profile_file, "\nDumping trace IBL code %s profile "
"(thread "TIDFMT")\n%d hits\n",
get_branch_type_name(branch_type), tid, sum);
dump_profile_range(profile_file, tpc->profile, start, end);
}
}
if (tpc->bb_ibl[branch_type].initialized) {
start = tpc->bb_ibl[branch_type].indirect_branch_lookup_routine;
end = start + tpc->bb_ibl[branch_type].ibl_routine_length;
sum = sum_profile_range(tpc->profile, start, end);
if (sum > 0) {
print_file(profile_file, "\nDumping BB IBL code %s profile "
"(thread "TIDFMT")\n%d hits\n",
get_branch_type_name(branch_type), tid, sum);
dump_profile_range(profile_file, tpc->profile, start, end);
}
}
if (tpc->coarse_ibl[branch_type].initialized) {
start = tpc->coarse_ibl[branch_type].indirect_branch_lookup_routine;
end = start + tpc->coarse_ibl[branch_type].ibl_routine_length;
sum = sum_profile_range(tpc->profile, start, end);
if (sum > 0) {
print_file(profile_file, "\nDumping coarse IBL code %s profile "
"(thread "TIDFMT")\n%d hits\n",
get_branch_type_name(branch_type), tid, sum);
dump_profile_range(profile_file, tpc->profile, start, end);
}
}
}
sum = sum_profile_range(tpc->profile, tpc->ibl_routines_end,
tpc->profile->end);
if (sum > 0) {
print_file(profile_file, "\nDumping generated code profile "
"(thread "TIDFMT")\n%d hits\n", tid, sum);
dump_profile_range(profile_file, tpc->profile,
tpc->ibl_routines_end, tpc->profile->end);
}
mutex_unlock(&profile_dump_lock);
free_profile(tpc->profile);
tpc->profile = NULL;
}
}
void
arch_profile_exit()
{
if (USE_SHARED_GENCODE()) {
arch_extract_profile(GLOBAL_DCONTEXT _IF_X64(GENCODE_X64));
IF_X64(arch_extract_profile(GLOBAL_DCONTEXT _IF_X64(GENCODE_X86)));
}
}
#endif /* WINDOWS_PC_SAMPLE */
/* arch-specific atexit cleanup */
void
arch_exit(IF_WINDOWS_ELSE_NP(bool detach_stacked_callbacks, void))
{
/* we only need to unprotect shared_code for profile extraction
* so we do it there to also cover the fast exit path
*/
#ifdef WINDOWS_PC_SAMPLE
arch_profile_exit();
#endif
/* on x64 we have syscall routines in the shared code so can't free if detaching */
if (IF_WINDOWS(IF_X64(!detach_stacked_callbacks &&)) shared_code != NULL) {
heap_munmap(shared_code, GENCODE_RESERVE_SIZE);
}
#ifdef X64
if (shared_code_x86 != NULL)
heap_munmap(shared_code_x86, GENCODE_RESERVE_SIZE);
if (shared_code_x86_to_x64 != NULL)
heap_munmap(shared_code_x86_to_x64, GENCODE_RESERVE_SIZE);
#endif
interp_exit();
mangle_exit();
}
static byte *
emit_ibl_routine_and_template(dcontext_t *dcontext, generated_code_t *code,
byte *pc,
byte *fcache_return_pc,
bool target_trace_table,
bool inline_ibl_head,
bool thread_shared,
ibl_branch_type_t branch_type,
ibl_source_fragment_type_t source_type,
ibl_code_t *ibl_code)
{
pc = check_size_and_cache_line(code, pc);
ibl_code->initialized = true;
ibl_code->indirect_branch_lookup_routine = pc;
ibl_code->ibl_head_is_inlined = inline_ibl_head;
ibl_code->thread_shared_routine = thread_shared;
ibl_code->branch_type = branch_type;
ibl_code->source_fragment_type = source_type;
pc = emit_indirect_branch_lookup(dcontext, code, pc, fcache_return_pc,
target_trace_table, inline_ibl_head,
ibl_code);
if (inline_ibl_head) {
/* create the inlined ibl template */
pc = check_size_and_cache_line(code, pc);
pc = emit_inline_ibl_stub(dcontext, pc, ibl_code, target_trace_table);
}
ibl_code->far_ibl = pc;
pc = emit_far_ibl(dcontext, pc, ibl_code,
ibl_code->indirect_branch_lookup_routine
_IF_X64(&ibl_code->far_jmp_opnd));
ibl_code->far_ibl_unlinked = pc;
pc = emit_far_ibl(dcontext, pc, ibl_code,
ibl_code->unlinked_ibl_entry
_IF_X64(&ibl_code->far_jmp_unlinked_opnd));
return pc;
}
static byte *
emit_ibl_routines(dcontext_t *dcontext, generated_code_t *code, byte *pc,
byte *fcache_return_pc,
ibl_source_fragment_type_t source_fragment_type,
bool thread_shared,
bool target_trace_table,
ibl_code_t ibl_code_routines[])
{
ibl_branch_type_t branch_type;
/* emit separate routines for each branch type
The goal is to have routines that target different fragment tables
so that we can control for example return targets for RAC,
or we can control inlining if some branch types have better hit ratios.
Currently it only gives us better stats.
*/
/*
N.B.: shared fragments requires -atomic_inlined_linking in order to
inline ibl lookups, but not for private since they're unlinked by another thread
flushing but not linked by anyone but themselves.
*/
bool inline_ibl_head = (IS_IBL_TRACE(source_fragment_type)) ?
DYNAMO_OPTION(inline_trace_ibl) : DYNAMO_OPTION(inline_bb_ibl);
for (branch_type = IBL_BRANCH_TYPE_START;
branch_type < IBL_BRANCH_TYPE_END; branch_type++) {
#ifdef HASHTABLE_STATISTICS
/* ugly asserts but we'll stick with uints to save space */
IF_X64(ASSERT(CHECK_TRUNCATE_TYPE_uint
(GET_IBL_TARGET_TABLE(branch_type, target_trace_table)
+ offsetof(ibl_table_t, unprot_stats))));
ibl_code_routines[branch_type].unprot_stats_offset = (uint)
GET_IBL_TARGET_TABLE(branch_type, target_trace_table)
+ offsetof(ibl_table_t, unprot_stats);
IF_X64(ASSERT(CHECK_TRUNCATE_TYPE_uint
(GET_IBL_TARGET_TABLE(branch_type, target_trace_table)
+ offsetof(ibl_table_t, entry_stats_to_lookup_table))));
ibl_code_routines[branch_type].entry_stats_to_lookup_table_offset = (uint)
GET_IBL_TARGET_TABLE(branch_type, target_trace_table)
+ offsetof(ibl_table_t, entry_stats_to_lookup_table);
IF_X64(ASSERT(CHECK_TRUNCATE_TYPE_uint
(offsetof(unprot_ht_statistics_t, trace_ibl_stats[branch_type]))));
IF_X64(ASSERT(CHECK_TRUNCATE_TYPE_uint
(offsetof(unprot_ht_statistics_t, bb_ibl_stats[branch_type]))));
ibl_code_routines[branch_type].hashtable_stats_offset = (uint)
((IS_IBL_TRACE(source_fragment_type)) ?
offsetof(unprot_ht_statistics_t, trace_ibl_stats[branch_type])
: offsetof(unprot_ht_statistics_t, bb_ibl_stats[branch_type]));
#endif
pc = emit_ibl_routine_and_template(dcontext, code, pc,
fcache_return_pc,
target_trace_table,
inline_ibl_head, thread_shared,
branch_type, source_fragment_type,
&ibl_code_routines[branch_type]);
}
return pc;
}
static byte *
emit_syscall_routines(dcontext_t *dcontext, generated_code_t *code, byte *pc,
bool thread_shared)
{
#ifdef HASHTABLE_STATISTICS
/* Stats for the syscall IBLs (note it is also using the trace hashtable, and it never hits!) */
# ifdef WINDOWS
/* ugly asserts but we'll stick with uints to save space */
IF_X64(ASSERT(CHECK_TRUNCATE_TYPE_uint
(GET_IBL_TARGET_TABLE(IBL_SHARED_SYSCALL, true)
+ offsetof(ibl_table_t, unprot_stats))));
code->shared_syscall_code.unprot_stats_offset = (uint)
GET_IBL_TARGET_TABLE(IBL_SHARED_SYSCALL, true)
+ offsetof(ibl_table_t, unprot_stats);
IF_X64(ASSERT(CHECK_TRUNCATE_TYPE_uint
(GET_IBL_TARGET_TABLE(IBL_SHARED_SYSCALL, true)
+ offsetof(ibl_table_t, entry_stats_to_lookup_table))));
code->shared_syscall_code.entry_stats_to_lookup_table_offset = (uint)
GET_IBL_TARGET_TABLE(IBL_SHARED_SYSCALL, true)
+ offsetof(ibl_table_t, entry_stats_to_lookup_table);
IF_X64(ASSERT(CHECK_TRUNCATE_TYPE_uint
(offsetof(unprot_ht_statistics_t, shared_syscall_hit_stats))));
code->shared_syscall_code.hashtable_stats_offset = (uint)
offsetof(unprot_ht_statistics_t, shared_syscall_hit_stats);
# endif /* WINDOWS */
#endif /* HASHTABLE_STATISTICS */
#ifdef WINDOWS
pc = check_size_and_cache_line(code, pc);
code->do_callback_return = pc;
pc = emit_do_callback_return(dcontext, pc, code->fcache_return, thread_shared);
if (DYNAMO_OPTION(shared_syscalls)) {
ibl_code_t *ibl_code;
if (DYNAMO_OPTION(disable_traces)) {
ibl_code = DYNAMO_OPTION(shared_bbs) ?
&SHARED_GENCODE(code->gencode_mode)->bb_ibl[IBL_SHARED_SYSCALL] :
&code->bb_ibl[IBL_SHARED_SYSCALL];
}
else if (DYNAMO_OPTION(shared_traces)) {
ibl_code = &SHARED_GENCODE(code->gencode_mode)->trace_ibl[IBL_SHARED_SYSCALL];
}
else {
ibl_code = &code->trace_ibl[IBL_SHARED_SYSCALL];
}
pc = check_size_and_cache_line(code, pc);
code->unlinked_shared_syscall = pc;
pc = emit_shared_syscall(dcontext, code, pc,
&code->shared_syscall_code,
&code->shared_syscall_code.ibl_patch,
ibl_code->indirect_branch_lookup_routine,
ibl_code->unlinked_ibl_entry,
!DYNAMO_OPTION(disable_traces), /* target_trace_table */
/* Only a single copy of shared syscall is
* emitted and afterwards it performs an IBL.
* Since both traces and BBs execute shared
* syscall (when trace building isn't disabled),
* we can't target the trace IBT table; otherwise,
* we'd miss marking secondary trace heads after
* a post-trace IBL misses. More comments are
* co-located with emit_shared_syscall().
*/
DYNAMO_OPTION(disable_traces) ?
DYNAMO_OPTION(inline_bb_ibl) :
DYNAMO_OPTION(inline_trace_ibl), /* inline_ibl_head */
ibl_code->thread_shared_routine, /* thread_shared */
&code->shared_syscall);
code->end_shared_syscall = pc;
/* Lookup at end of shared_syscall should be able to go to bb or trace,
* unrestricted (will never be an exit from a trace so no secondary trace
* restrictions) -- currently only traces supported so using the trace_ibl
* is OK.
*/
}
pc = check_size_and_cache_line(code, pc);
code->do_syscall = pc;
pc = emit_do_syscall(dcontext, code, pc, code->fcache_return, thread_shared,
0, &code->do_syscall_offs);
#else /* UNIX */
pc = check_size_and_cache_line(code, pc);
code->do_syscall = pc;
pc = emit_do_syscall(dcontext, code, pc, code->fcache_return, thread_shared,
0, &code->do_syscall_offs);
pc = check_size_and_cache_line(code, pc);
code->do_int_syscall = pc;
pc = emit_do_syscall(dcontext, code, pc, code->fcache_return, thread_shared,
0x80/*force int*/, &code->do_int_syscall_offs);
pc = check_size_and_cache_line(code, pc);
code->do_int81_syscall = pc;
pc = emit_do_syscall(dcontext, code, pc, code->fcache_return, thread_shared,
0x81/*force int*/, &code->do_int81_syscall_offs);
pc = check_size_and_cache_line(code, pc);
code->do_int82_syscall = pc;
pc = emit_do_syscall(dcontext, code, pc, code->fcache_return, thread_shared,
0x82/*force int*/, &code->do_int82_syscall_offs);
pc = check_size_and_cache_line(code, pc);
code->do_clone_syscall = pc;
pc = emit_do_clone_syscall(dcontext, code, pc, code->fcache_return, thread_shared,
&code->do_clone_syscall_offs);
# ifdef VMX86_SERVER
pc = check_size_and_cache_line(code, pc);
code->do_vmkuw_syscall = pc;
pc = emit_do_vmkuw_syscall(dcontext, code, pc, code->fcache_return, thread_shared,
&code->do_vmkuw_syscall_offs);
# endif
#endif /* UNIX */
return pc;
}
void
arch_thread_init(dcontext_t *dcontext)
{
byte *pc;
generated_code_t *code;
ibl_branch_type_t branch_type;
#ifdef X86
/* Simplest to have a real dcontext for emitting the selfmod code
* and finding the patch offsets so we do it on 1st thread init */
static bool selfmod_init = false;
if (!selfmod_init) {
ASSERT(!dynamo_initialized); /* .data +w */
selfmod_init = true;
set_selfmod_sandbox_offsets(dcontext);
}
#endif
ASSERT_CURIOSITY(proc_is_cache_aligned(get_local_state())
IF_WINDOWS(|| DYNAMO_OPTION(tls_align != 0)));
#ifdef X64
/* PR 244737: thread-private uses only shared gencode on x64 */
ASSERT(dcontext->private_code == NULL);
return;
#endif
/* For detach on windows need to use a separate mmap so we can leave this
* memory around in case of outstanding callbacks when we detach. Without
* detach or on linux could just use one of our heaps (which would save
* a little space, (would then need to coordinate with arch_thread_exit)
*/
ASSERT(GENCODE_COMMIT_SIZE < GENCODE_RESERVE_SIZE);
/* case 9474; share allocation unit w/ thread-private stack */
code = heap_mmap_reserve_post_stack(dcontext,
GENCODE_RESERVE_SIZE, GENCODE_COMMIT_SIZE);
ASSERT(code != NULL);
/* FIXME case 6493: if we split private from shared, remove this
* memset since we will no longer have a bunch of fields we don't use
*/
memset(code, 0, sizeof(*code));
code->thread_shared = false;
/* Generated code immediately follows struct */
code->gen_start_pc = ((byte *)code) + sizeof(*code);
code->commit_end_pc = ((byte *)code) + GENCODE_COMMIT_SIZE;
for (branch_type = IBL_BRANCH_TYPE_START;
branch_type < IBL_BRANCH_TYPE_END; branch_type++) {
code->trace_ibl[branch_type].initialized = false;
code->bb_ibl[branch_type].initialized = false;
code->coarse_ibl[branch_type].initialized = false;
}
dcontext->private_code = (void *) code;
pc = code->gen_start_pc;
pc = check_size_and_cache_line(code, pc);
code->fcache_enter = pc;
pc = emit_fcache_enter(dcontext, code, pc);
pc = check_size_and_cache_line(code, pc);
code->fcache_return = pc;
pc = emit_fcache_return(dcontext, code, pc);;
#ifdef WINDOWS_PC_SAMPLE
code->fcache_enter_return_end = pc;
#endif
/* Currently all ibl routines target the trace hashtable
and we don't yet support basic blocks as targets of an IBL.
However, having separate routines at least enables finer control
over the indirect exit stubs.
This way we have inlined IBL stubs for trace but not in basic blocks.
TODO: After separating the IBL routines, now we can retarget them to separate
hashtables (or alternatively chain several IBL routines together).
From trace ib exits we can only go to {traces}, so no change here.
(when we exit to a basic block we need to mark as a trace head)
From basic block ib exits we should be able to go to {traces + bbs - traceheads}
(for the tracehead bbs we actually have to increment counters.
From shared_syscall we should be able to go to {traces + bbs}.
TODO: we also want to have separate routines per indirect branch types to enable
the restricted control transfer policies to be efficiently enforced.
*/
if (!DYNAMO_OPTION(disable_traces) && DYNAMO_OPTION(shared_trace_ibl_routine)) {
if (!DYNAMO_OPTION(shared_traces)) {
/* copy all bookkeeping information from shared_code into thread private
needed by get_ibl_routine*() */
ibl_branch_type_t branch_type;
for (branch_type = IBL_BRANCH_TYPE_START;
branch_type < IBL_BRANCH_TYPE_END; branch_type++) {
code->trace_ibl[branch_type] =
SHARED_GENCODE(code->gencode_mode)->trace_ibl[branch_type];
}
} /* FIXME: no private traces supported right now w/ -shared_traces */
} else if (PRIVATE_TRACES_ENABLED()) {
/* shared_trace_ibl_routine should be false for private (performance test only) */
pc = emit_ibl_routines(dcontext, code, pc, code->fcache_return,
IBL_TRACE_PRIVATE, /* source_fragment_type */
DYNAMO_OPTION(shared_trace_ibl_routine), /* thread_shared */
true, /* target_trace_table */
code->trace_ibl);
}
pc = emit_ibl_routines(dcontext, code, pc, code->fcache_return,
IBL_BB_PRIVATE, /* source_fragment_type */
/* need thread-private for selfmod regardless of sharing */
false, /* thread_shared */
!DYNAMO_OPTION(bb_ibl_targets), /* target_trace_table */
code->bb_ibl);
#ifdef WINDOWS_PC_SAMPLE
code->ibl_routines_end = pc;
#endif
#if defined(UNIX) && !defined(HAVE_TLS)
/* for HAVE_TLS we use the shared version; w/o TLS we don't
* make any shared routines (PR 361894)
*/
/* must create before emit_do_clone_syscall() in emit_syscall_routines() */
pc = check_size_and_cache_line(code, pc);
code->new_thread_dynamo_start = pc;
pc = emit_new_thread_dynamo_start(dcontext, pc);
#endif
#ifdef WINDOWS
pc = check_size_and_cache_line(code, pc);
code->fcache_enter_indirect = pc;
pc = emit_fcache_enter_indirect(dcontext, code, pc, code->fcache_return);
#endif
pc = emit_syscall_routines(dcontext, code, pc, false/*thread-private*/);
#ifdef TRACE_HEAD_CACHE_INCR
pc = check_size_and_cache_line(code, pc);
code->trace_head_incr = pc;
pc = emit_trace_head_incr(dcontext, pc, code->fcache_return);
#endif
#ifdef CHECK_RETURNS_SSE2_EMIT
/* PR 248210: unsupported feature on x64: need to move to thread-shared gencode
* if want to support it.
*/
IF_X64(ASSERT_NOT_IMPLEMENTED(false));
pc = check_size_and_cache_line(code, pc);
code->pextrw = pc;
pc = emit_pextrw(dcontext, pc);
pc = check_size_and_cache_line(code, pc);
code->pinsrw = pc;
pc = emit_pinsrw(dcontext, pc);
#endif
code->reset_exit_stub = pc;
/* reset exit stub should look just like a direct exit stub */
pc += insert_exit_stub_other_flags(dcontext,
linkstub_fragment(dcontext, (linkstub_t *)
get_reset_linkstub()),
(linkstub_t *) get_reset_linkstub(),
pc, LINK_DIRECT);
if (special_ibl_xfer_is_thread_private()) {
#ifdef CLIENT_INTERFACE
code->special_ibl_xfer[CLIENT_IBL_IDX] = pc;
pc = emit_client_ibl_xfer(dcontext, pc, code);
#endif
#ifdef UNIX
/* i#1238: native exec optimization */
if (DYNAMO_OPTION(native_exec_opt)) {
pc = check_size_and_cache_line(code, pc);
code->special_ibl_xfer[NATIVE_PLT_IBL_IDX] = pc;
pc = emit_native_plt_ibl_xfer(dcontext, pc, code);
/* native ret */
pc = check_size_and_cache_line(code, pc);
code->special_ibl_xfer[NATIVE_RET_IBL_IDX] = pc;
pc = emit_native_ret_ibl_xfer(dcontext, pc, code);
}
#endif
}
/* XXX: i#1149: we should always use thread shared gencode */
if (client_clean_call_is_thread_private()) {
pc = check_size_and_cache_line(code, pc);
code->clean_call_save = pc;
pc = emit_clean_call_save(dcontext, pc, code);
pc = check_size_and_cache_line(code, pc);
code->clean_call_restore = pc;
pc = emit_clean_call_restore(dcontext, pc, code);
}
ASSERT(pc < code->commit_end_pc);
code->gen_end_pc = pc;
release_final_page(code);
DOLOG(3, LOG_EMIT, {
dump_emitted_routines(dcontext, THREAD, "thread-private", code, pc);
});
#ifdef INTERNAL
if (INTERNAL_OPTION(gendump)) {
dump_emitted_routines_to_file(dcontext, "gencode-private", "thread-private",
code, pc);
}
#endif
#ifdef WINDOWS_PC_SAMPLE
if (dynamo_options.profile_pcs && dynamo_options.prof_pcs_gencode >= 2 &&
dynamo_options.prof_pcs_gencode <= 32) {
code->profile = create_profile(code->gen_start_pc, pc,
dynamo_options.prof_pcs_gencode, NULL);
start_profile(code->profile);
} else
code->profile = NULL;
#endif
code->writable = true;
/* For SELFPROT_GENCODE we don't make unwritable until after we patch,
* though for hotp_only we don't patch.
*/
#ifdef HOT_PATCHING_INTERFACE
if (DYNAMO_OPTION(hotp_only))
#endif
protect_generated_code(code, READONLY);
}
#ifdef WINDOWS_PC_SAMPLE
void
arch_thread_profile_exit(dcontext_t *dcontext)
{
arch_extract_profile(dcontext _IF_X64(GENCODE_FROM_DCONTEXT));
}
#endif
void
arch_thread_exit(dcontext_t *dcontext _IF_WINDOWS(bool detach_stacked_callbacks))
{
#ifdef X64
/* PR 244737: thread-private uses only shared gencode on x64 */
ASSERT(dcontext->private_code == NULL);
return;
#endif
/* We only need to unprotect private_code for profile extraction
* so we do it there to also cover the fast exit path.
* Also note that for detach w/ stacked callbacks arch_patch_syscall()
* will have already unprotected.
*/
#ifdef WINDOWS
if (!detach_stacked_callbacks && !DYNAMO_OPTION(thin_client)) {
#endif
/* ensure we didn't miss the init patch and leave it writable! */
ASSERT(!TEST(SELFPROT_GENCODE, DYNAMO_OPTION(protect_mask)) ||
!((generated_code_t *)dcontext->private_code)->writable);
#ifdef WINDOWS
}
#endif
#ifdef WINDOWS_PC_SAMPLE
arch_thread_profile_exit(dcontext);
#endif
#ifdef WINDOWS
if (!detach_stacked_callbacks)
#endif
heap_munmap_post_stack(dcontext, dcontext->private_code, GENCODE_RESERVE_SIZE);
}
#ifdef WINDOWS
/* Patch syscall routines for detach */
static void
arch_patch_syscall_common(dcontext_t *dcontext, byte *target _IF_X64(gencode_mode_t mode))
{
generated_code_t *code = get_emitted_routines_code(dcontext _IF_X64(mode));
if (code != NULL && (!is_shared_gencode(code) || dcontext == GLOBAL_DCONTEXT)) {
/* ensure we didn't miss the init patch and leave it writable! */
ASSERT(!TEST(SELFPROT_GENCODE, DYNAMO_OPTION(protect_mask)) || !code->writable);
/* this is only done for detach, so no need to re-protect */
protect_generated_code(code, WRITABLE);
emit_patch_syscall(dcontext, target _IF_X64(mode));
}
}
void
arch_patch_syscall(dcontext_t *dcontext, byte *target)
{
if (dcontext == GLOBAL_DCONTEXT) {
arch_patch_syscall_common(GLOBAL_DCONTEXT, target _IF_X64(GENCODE_X64));
IF_X64(arch_patch_syscall_common(GLOBAL_DCONTEXT, target _IF_X64(GENCODE_X86)));
} else
arch_patch_syscall_common(GLOBAL_DCONTEXT, target _IF_X64(GENCODE_FROM_DCONTEXT));
}
#endif
void
update_generated_hashtable_access(dcontext_t *dcontext)
{
update_indirect_branch_lookup(dcontext);
}
void
protect_generated_code(generated_code_t *code_in, bool writable)
{
/* i#936: prevent cl v16 (VS2010) from combining the two code->writable
* stores into one prior to the change_protection() call and from
* changing the conditionally-executed stores into always-executed
* stores of conditionally-determined values.
*/
volatile generated_code_t *code = code_in;
if (TEST(SELFPROT_GENCODE, DYNAMO_OPTION(protect_mask)) &&
code->writable != writable) {
byte *genstart = (byte *)PAGE_START(code->gen_start_pc);
if (!writable) {
ASSERT(code->writable);
code->writable = writable;
}
STATS_INC(gencode_prot_changes);
change_protection(genstart, code->commit_end_pc - genstart,
writable);
if (writable) {
ASSERT(!code->writable);
code->writable = writable;
}
}
}
ibl_source_fragment_type_t
get_source_fragment_type(dcontext_t *dcontext, uint fragment_flags)
{
if (TEST(FRAG_IS_TRACE, fragment_flags)) {
return (TEST(FRAG_SHARED, fragment_flags)) ? IBL_TRACE_SHARED : IBL_TRACE_PRIVATE;
} else if (TEST(FRAG_COARSE_GRAIN, fragment_flags)) {
ASSERT(TEST(FRAG_SHARED, fragment_flags));
return IBL_COARSE_SHARED;
} else {
return (TEST(FRAG_SHARED, fragment_flags)) ? IBL_BB_SHARED : IBL_BB_PRIVATE;
}
}
#ifdef WINDOWS
bool
is_shared_syscall_routine(dcontext_t *dcontext, cache_pc pc)
{
if (DYNAMO_OPTION(shared_fragment_shared_syscalls)) {
return (pc == (cache_pc) shared_code->shared_syscall
|| pc == (cache_pc) shared_code->unlinked_shared_syscall)
IF_X64(|| (shared_code_x86 != NULL &&
(pc == (cache_pc) shared_code_x86->shared_syscall
|| pc == (cache_pc) shared_code_x86->unlinked_shared_syscall))
|| (shared_code_x86_to_x64 != NULL &&
(pc == (cache_pc) shared_code_x86_to_x64->shared_syscall
|| pc == (cache_pc) shared_code_x86_to_x64
->unlinked_shared_syscall)));
}
else {
generated_code_t *code = THREAD_GENCODE(dcontext);
return (code != NULL && (pc == (cache_pc) code->shared_syscall
|| pc == (cache_pc) code->unlinked_shared_syscall));
}
}
#endif
bool
is_indirect_branch_lookup_routine(dcontext_t *dcontext, cache_pc pc)
{
#ifdef WINDOWS
if (is_shared_syscall_routine(dcontext, pc))
return true;
#endif
/* we only care if it is found */
return get_ibl_routine_type_ex(dcontext, pc, NULL _IF_X64(NULL));
}
/* Promotes the current ibl routine from IBL_BB* to IBL_TRACE* preserving other properties */
/* There seems to be no need for the opposite transformation */
cache_pc
get_trace_ibl_routine(dcontext_t *dcontext, cache_pc current_entry)
{
ibl_type_t ibl_type = {0};
DEBUG_DECLARE(bool is_ibl = )
get_ibl_routine_type(dcontext, current_entry, &ibl_type);
ASSERT(is_ibl);
ASSERT(IS_IBL_BB(ibl_type.source_fragment_type));
return
#ifdef WINDOWS
DYNAMO_OPTION(shared_syscalls) &&
is_shared_syscall_routine(dcontext, current_entry) ? current_entry :
#endif
get_ibl_routine(dcontext, ibl_type.link_state,
(ibl_type.source_fragment_type == IBL_BB_PRIVATE) ?
IBL_TRACE_PRIVATE : IBL_TRACE_SHARED,
ibl_type.branch_type);
}
/* Shifts the current ibl routine from IBL_BB_SHARED to IBL_BB_PRIVATE,
* preserving other properties.
* There seems to be no need for the opposite transformation
*/
cache_pc
get_private_ibl_routine(dcontext_t *dcontext, cache_pc current_entry)
{
ibl_type_t ibl_type = {0};
DEBUG_DECLARE(bool is_ibl = )
get_ibl_routine_type(dcontext, current_entry, &ibl_type);
ASSERT(is_ibl);
ASSERT(IS_IBL_BB(ibl_type.source_fragment_type));
return get_ibl_routine(dcontext, ibl_type.link_state,
IBL_BB_PRIVATE, ibl_type.branch_type);
}
/* Shifts the current ibl routine from IBL_BB_PRIVATE to IBL_BB_SHARED,
* preserving other properties.
* There seems to be no need for the opposite transformation
*/
cache_pc
get_shared_ibl_routine(dcontext_t *dcontext, cache_pc current_entry)
{
ibl_type_t ibl_type = {0};
DEBUG_DECLARE(bool is_ibl = )
get_ibl_routine_type(dcontext, current_entry, &ibl_type);
ASSERT(is_ibl);
ASSERT(IS_IBL_BB(ibl_type.source_fragment_type));
return get_ibl_routine(dcontext, ibl_type.link_state,
IBL_BB_SHARED, ibl_type.branch_type);
}
/* gets the corresponding routine to current_entry but matching whether
* FRAG_IS_TRACE and FRAG_SHARED are set in flags
*/
cache_pc
get_alternate_ibl_routine(dcontext_t *dcontext, cache_pc current_entry,
uint flags)
{
ibl_type_t ibl_type = {0};
IF_X64(gencode_mode_t mode = GENCODE_FROM_DCONTEXT;)
DEBUG_DECLARE(bool is_ibl = )
get_ibl_routine_type_ex(dcontext, current_entry, &ibl_type _IF_X64(&mode));
ASSERT(is_ibl);
#ifdef WINDOWS
/* shared_syscalls does not change currently
* FIXME: once we support targeting both private and shared syscall
* we will need to change sharing here
*/
if (DYNAMO_OPTION(shared_syscalls) &&
is_shared_syscall_routine(dcontext, current_entry))
return current_entry;
#endif
return get_ibl_routine_ex(dcontext, ibl_type.link_state,
get_source_fragment_type(dcontext, flags),
ibl_type.branch_type _IF_X64(mode));
}
static ibl_entry_point_type_t
get_unlinked_type(ibl_entry_point_type_t link_state)
{
#ifdef X64
if (link_state == IBL_TRACE_CMP)
return IBL_TRACE_CMP_UNLINKED;
#endif
if (link_state == IBL_FAR)
return IBL_FAR_UNLINKED;
else
return IBL_UNLINKED;
}
static ibl_entry_point_type_t
get_linked_type(ibl_entry_point_type_t unlink_state)
{
#ifdef X64
if (unlink_state == IBL_TRACE_CMP_UNLINKED)
return IBL_TRACE_CMP;
#endif
if (unlink_state == IBL_FAR_UNLINKED)
return IBL_FAR;
else
return IBL_LINKED;
}
cache_pc
get_linked_entry(dcontext_t *dcontext, cache_pc unlinked_entry)
{
ibl_type_t ibl_type = {0};
IF_X64(gencode_mode_t mode = GENCODE_FROM_DCONTEXT;)
DEBUG_DECLARE(bool is_ibl = )
get_ibl_routine_type_ex(dcontext, unlinked_entry, &ibl_type _IF_X64(&mode));
ASSERT(is_ibl && IS_IBL_UNLINKED(ibl_type.link_state));
#ifdef WINDOWS
if (unlinked_entry == unlinked_shared_syscall_routine_ex(dcontext _IF_X64(mode))) {
return shared_syscall_routine_ex(dcontext _IF_X64(mode));
}
#endif
return get_ibl_routine_ex(dcontext,
/* for -unsafe_ignore_eflags_{ibl,trace} the trace cmp
* entry and unlink are both identical, so we may mix
* them up but will have no problems */
get_linked_type(ibl_type.link_state),
ibl_type.source_fragment_type, ibl_type.branch_type
_IF_X64(mode));
}
#ifdef X64
cache_pc
get_trace_cmp_entry(dcontext_t *dcontext, cache_pc linked_entry)
{
ibl_type_t ibl_type = {0};
DEBUG_DECLARE(bool is_ibl = )
get_ibl_routine_type(dcontext, linked_entry, &ibl_type);
IF_WINDOWS(ASSERT(linked_entry != shared_syscall_routine(dcontext)));
ASSERT(is_ibl && ibl_type.link_state == IBL_LINKED);
return get_ibl_routine(dcontext, IBL_TRACE_CMP,
ibl_type.source_fragment_type, ibl_type.branch_type);
}
#endif
cache_pc
get_unlinked_entry(dcontext_t *dcontext, cache_pc linked_entry)
{
ibl_type_t ibl_type = {0};
IF_X64(gencode_mode_t mode = GENCODE_FROM_DCONTEXT;)
DEBUG_DECLARE(bool is_ibl = )
get_ibl_routine_type_ex(dcontext, linked_entry, &ibl_type _IF_X64(&mode));
ASSERT(is_ibl && IS_IBL_LINKED(ibl_type.link_state));
#ifdef WINDOWS
if (linked_entry == shared_syscall_routine_ex(dcontext _IF_X64(mode)))
return unlinked_shared_syscall_routine_ex(dcontext _IF_X64(mode));
#endif
return get_ibl_routine_ex(dcontext, get_unlinked_type(ibl_type.link_state),
ibl_type.source_fragment_type, ibl_type.branch_type
_IF_X64(mode));
}
static bool
in_generated_shared_routine(dcontext_t *dcontext, cache_pc pc)
{
if (USE_SHARED_GENCODE()) {
return (pc >= (cache_pc)(shared_code->gen_start_pc) &&
pc < (cache_pc)(shared_code->commit_end_pc))
IF_X64(|| (shared_code_x86 != NULL &&
pc >= (cache_pc)(shared_code_x86->gen_start_pc) &&
pc < (cache_pc)(shared_code_x86->commit_end_pc))
|| (shared_code_x86_to_x64 != NULL &&
pc >= (cache_pc)(shared_code_x86_to_x64->gen_start_pc) &&
pc < (cache_pc)(shared_code_x86_to_x64->commit_end_pc)))
;
}
return false;
}
bool
in_generated_routine(dcontext_t *dcontext, cache_pc pc)
{
generated_code_t *code = THREAD_GENCODE(dcontext);
return ((pc >= (cache_pc)(code->gen_start_pc) &&
pc < (cache_pc)(code->commit_end_pc))
|| in_generated_shared_routine(dcontext, pc));
/* FIXME: what about inlined IBL stubs */
}
bool
in_context_switch_code(dcontext_t *dcontext, cache_pc pc)
{
return (pc >= (cache_pc)fcache_enter_routine(dcontext) &&
/* get last emitted routine */
pc <= get_ibl_routine(dcontext, IBL_LINKED, IBL_SOURCE_TYPE_END-1,
IBL_BRANCH_TYPE_START));
/* FIXME: too hacky, should have an extra field for PC profiling */
}
bool
in_indirect_branch_lookup_code(dcontext_t *dcontext, cache_pc pc)
{
ibl_source_fragment_type_t source_fragment_type;
ibl_branch_type_t branch_type;
for (source_fragment_type = IBL_SOURCE_TYPE_START;
source_fragment_type < IBL_SOURCE_TYPE_END;
source_fragment_type++) {
for (branch_type = IBL_BRANCH_TYPE_START;
branch_type < IBL_BRANCH_TYPE_END;
branch_type++) {
if (pc >= get_ibl_routine(dcontext, IBL_LINKED, source_fragment_type, branch_type) &&
pc < get_ibl_routine(dcontext, IBL_UNLINKED, source_fragment_type, branch_type))
return true;
}
}
return false; /* not an IBL */
/* FIXME: what about inlined IBL stubs */
}
fcache_enter_func_t
fcache_enter_routine(dcontext_t *dcontext)
{
generated_code_t *code = THREAD_GENCODE(dcontext);
return (fcache_enter_func_t) convert_data_to_function(code->fcache_enter);
}
/* exported to dispatch.c */
fcache_enter_func_t
get_fcache_enter_private_routine(dcontext_t *dcontext)
{
return fcache_enter_routine(dcontext);
}
cache_pc
get_reset_exit_stub(dcontext_t *dcontext)
{
generated_code_t *code = THREAD_GENCODE(dcontext);
return (cache_pc) code->reset_exit_stub;
}
cache_pc
get_do_syscall_entry(dcontext_t *dcontext)
{
generated_code_t *code = THREAD_GENCODE(dcontext);
return (cache_pc) code->do_syscall;
}
#ifdef WINDOWS
fcache_enter_func_t
get_fcache_enter_indirect_routine(dcontext_t *dcontext)
{
generated_code_t *code = THREAD_GENCODE(dcontext);
return (fcache_enter_func_t) convert_data_to_function(code->fcache_enter_indirect);
}
cache_pc
get_do_callback_return_entry(dcontext_t *dcontext)
{
generated_code_t *code = THREAD_GENCODE(dcontext);
return (cache_pc) code->do_callback_return;
}
#else
/* PR 286922: we need an int syscall even when vsyscall is sys{call,enter} */
cache_pc
get_do_int_syscall_entry(dcontext_t *dcontext)
{
generated_code_t *code = THREAD_GENCODE(dcontext);
return (cache_pc) code->do_int_syscall;
}
cache_pc
get_do_int81_syscall_entry(dcontext_t *dcontext)
{
generated_code_t *code = THREAD_GENCODE(dcontext);
return (cache_pc) code->do_int81_syscall;
}
cache_pc
get_do_int82_syscall_entry(dcontext_t *dcontext)
{
generated_code_t *code = THREAD_GENCODE(dcontext);
return (cache_pc) code->do_int82_syscall;
}
cache_pc
get_do_clone_syscall_entry(dcontext_t *dcontext)
{
generated_code_t *code = THREAD_GENCODE(dcontext);
return (cache_pc) code->do_clone_syscall;
}
# ifdef VMX86_SERVER
cache_pc
get_do_vmkuw_syscall_entry(dcontext_t *dcontext)
{
generated_code_t *code = THREAD_GENCODE(dcontext);
return (cache_pc) code->do_vmkuw_syscall;
}
# endif
#endif
cache_pc
fcache_return_routine(dcontext_t *dcontext)
{
generated_code_t *code = THREAD_GENCODE(dcontext);
return (cache_pc) code->fcache_return;
}
cache_pc
fcache_return_routine_ex(dcontext_t *dcontext _IF_X64(gencode_mode_t mode))
{
generated_code_t *code = get_emitted_routines_code(dcontext _IF_X64(mode));
return (cache_pc) code->fcache_return;
}
cache_pc
fcache_return_coarse_routine(IF_X64_ELSE(gencode_mode_t mode, void))
{
generated_code_t *code = get_shared_gencode(GLOBAL_DCONTEXT _IF_X64(mode));
ASSERT(DYNAMO_OPTION(coarse_units));
if (code == NULL)
return NULL;
else
return (cache_pc) code->fcache_return_coarse;
}
cache_pc
trace_head_return_coarse_routine(IF_X64_ELSE(gencode_mode_t mode, void))
{
generated_code_t *code = get_shared_gencode(GLOBAL_DCONTEXT _IF_X64(mode));
ASSERT(DYNAMO_OPTION(coarse_units));
if (code == NULL)
return NULL;
else
return (cache_pc) code->trace_head_return_coarse;
}
cache_pc
get_clean_call_save(dcontext_t *dcontext _IF_X64(gencode_mode_t mode))
{
generated_code_t *code;
if (client_clean_call_is_thread_private())
code = get_emitted_routines_code(dcontext _IF_X64(mode));
else
code = get_emitted_routines_code(GLOBAL_DCONTEXT _IF_X64(mode));
ASSERT(code != NULL);
return (cache_pc) code->clean_call_save;
}
cache_pc
get_clean_call_restore(dcontext_t *dcontext _IF_X64(gencode_mode_t mode))
{
generated_code_t *code;
if (client_clean_call_is_thread_private())
code = get_emitted_routines_code(dcontext _IF_X64(mode));
else
code = get_emitted_routines_code(GLOBAL_DCONTEXT _IF_X64(mode));
ASSERT(code != NULL);
return (cache_pc) code->clean_call_restore;
}
static inline cache_pc
get_special_ibl_xfer_entry(dcontext_t *dcontext, int index)
{
generated_code_t *code;
if (special_ibl_xfer_is_thread_private()) {
ASSERT(dcontext != GLOBAL_DCONTEXT);
code = THREAD_GENCODE(dcontext);
} else
code = SHARED_GENCODE_MATCH_THREAD(dcontext);
ASSERT(index >= 0 && index < NUM_SPECIAL_IBL_XFERS);
return code->special_ibl_xfer[index];
}
#ifdef CLIENT_INTERFACE
cache_pc
get_client_ibl_xfer_entry(dcontext_t *dcontext)
{
return get_special_ibl_xfer_entry(dcontext, CLIENT_IBL_IDX);
}
#endif
#ifdef UNIX
cache_pc
get_native_plt_ibl_xfer_entry(dcontext_t *dcontext)
{
return get_special_ibl_xfer_entry(dcontext, NATIVE_PLT_IBL_IDX);
}
cache_pc
get_native_ret_ibl_xfer_entry(dcontext_t *dcontext)
{
return get_special_ibl_xfer_entry(dcontext, NATIVE_RET_IBL_IDX);
}
#endif
/* returns false if target is not an IBL routine.
* if type is not NULL it is set to the type of the found routine.
* if mode_out is NULL, dcontext cannot be GLOBAL_DCONTEXT.
* if mode_out is not NULL, it is set to which mode the found routine is in.
*/
bool
get_ibl_routine_type_ex(dcontext_t *dcontext, cache_pc target, ibl_type_t *type
_IF_X64(gencode_mode_t *mode_out))
{
ibl_entry_point_type_t link_state;
ibl_source_fragment_type_t source_fragment_type;
ibl_branch_type_t branch_type;
#ifdef X64
gencode_mode_t mode;
#endif
/* An up-front range check. Many calls into this routine are with addresses
* outside of the IBL code or the generated_code_t in which IBL resides.
* For all of those cases, this quick up-front check saves the expense of
* examining all of the different IBL entry points.
*/
if ((shared_code == NULL ||
target < shared_code->gen_start_pc ||
target >= shared_code->gen_end_pc)
IF_X64(&& (shared_code_x86 == NULL ||
target < shared_code_x86->gen_start_pc ||
target >= shared_code_x86->gen_end_pc)
&& (shared_code_x86_to_x64 == NULL ||
target < shared_code_x86_to_x64->gen_start_pc ||
target >= shared_code_x86_to_x64->gen_end_pc))) {
if (dcontext == GLOBAL_DCONTEXT ||
/* PR 244737: thread-private uses shared gencode on x64 */
IF_X64(true ||)
target < ((generated_code_t *)dcontext->private_code)->gen_start_pc ||
target >= ((generated_code_t *)dcontext->private_code)->gen_end_pc)
return false;
}
/* a decent compiler should inline these nested loops */
/* iterate in order <linked, unlinked> */
for (link_state = IBL_LINKED;
/* keep in mind we need a signed comparison when going downwards */
(int)link_state >= (int)IBL_UNLINKED; link_state-- ) {
/* it is OK to compare to IBL_BB_PRIVATE even when !SHARED_FRAGMENTS_ENABLED() */
for (source_fragment_type = IBL_SOURCE_TYPE_START;
source_fragment_type < IBL_SOURCE_TYPE_END;
source_fragment_type++) {
for (branch_type = IBL_BRANCH_TYPE_START;
branch_type < IBL_BRANCH_TYPE_END;
branch_type++) {
#ifdef X64
for (mode = GENCODE_X64; mode <= GENCODE_X86_TO_X64; mode++) {
#endif
if (target == get_ibl_routine_ex(dcontext, link_state,
source_fragment_type,
branch_type _IF_X64(mode))) {
if (type) {
type->link_state = link_state;
type->source_fragment_type = source_fragment_type;
type->branch_type = branch_type;
}
#ifdef X64
if (mode_out != NULL)
*mode_out = mode;
#endif
return true;
}
#ifdef X64
}
#endif
}
}
}
#ifdef WINDOWS
if (is_shared_syscall_routine(dcontext, target)) {
if (type != NULL) {
type->branch_type = IBL_SHARED_SYSCALL;
type->source_fragment_type = DEFAULT_IBL_BB();
#ifdef X64
for (mode = GENCODE_X64; mode <= GENCODE_X86_TO_X64; mode++) {
#endif
if (target == unlinked_shared_syscall_routine_ex(dcontext _IF_X64(mode)))
type->link_state = IBL_UNLINKED;
else IF_X64(if (target ==
shared_syscall_routine_ex(dcontext _IF_X64(mode))))
type->link_state = IBL_LINKED;
#ifdef X64
else
continue;
if (mode_out != NULL)
*mode_out = mode;
break;
}
#endif
}
return true;
}
#endif
return false; /* not an IBL */
}
bool
get_ibl_routine_type(dcontext_t *dcontext, cache_pc target, ibl_type_t *type)
{
IF_X64(ASSERT(dcontext != GLOBAL_DCONTEXT)); /* should call get_ibl_routine_type_ex */
return get_ibl_routine_type_ex(dcontext, target, type _IF_X64(NULL));
}
/* returns false if target is not an IBL template
if type is not NULL it is set to the type of the found routine
*/
static bool
get_ibl_routine_template_type(dcontext_t *dcontext, cache_pc target, ibl_type_t *type
_IF_X64(gencode_mode_t *mode_out))
{
ibl_source_fragment_type_t source_fragment_type;
ibl_branch_type_t branch_type;
#ifdef X64
gencode_mode_t mode;
#endif
for (source_fragment_type = IBL_SOURCE_TYPE_START;
source_fragment_type < IBL_SOURCE_TYPE_END;
source_fragment_type++) {
for (branch_type = IBL_BRANCH_TYPE_START;
branch_type < IBL_BRANCH_TYPE_END;
branch_type++) {
#ifdef X64
for (mode = GENCODE_X64; mode <= GENCODE_X86_TO_X64; mode++) {
#endif
if (target == get_ibl_routine_template(dcontext, source_fragment_type,
branch_type _IF_X64(mode))) {
if (type) {
type->link_state = IBL_TEMPLATE;
type->source_fragment_type = source_fragment_type;
type->branch_type = branch_type;
#ifdef X64
if (mode_out != NULL)
*mode_out = mode;
#endif
}
return true;
#ifdef X64
}
#endif
}
}
}
return false; /* not an IBL template */
}
const char *
get_branch_type_name(ibl_branch_type_t branch_type)
{
static const char *const ibl_brtype_names[IBL_BRANCH_TYPE_END] =
{"ret", "indcall", "indjmp"};
return ibl_brtype_names[branch_type];
}
ibl_branch_type_t
get_ibl_branch_type(instr_t *instr)
{
ASSERT(instr_is_mbr(instr)
IF_X86(|| instr_get_opcode(instr) == OP_jmp_far
|| instr_get_opcode(instr) == OP_call_far));
if (instr_is_return(instr))
return IBL_RETURN;
else if (instr_is_call_indirect(instr))
return IBL_INDCALL;
else
return IBL_INDJMP;
}
/* returns a symbolic name if target is an IBL routine or an IBL template,
* otherwise returns NULL
*/
const char *
get_ibl_routine_name(dcontext_t *dcontext, cache_pc target, const char **ibl_brtype_name)
{
static const char *const
ibl_routine_names IF_X64([3]) [IBL_SOURCE_TYPE_END][IBL_LINK_STATE_END] = {
IF_X64({)
{"shared_unlinked_bb_ibl", "shared_delete_bb_ibl",
"shared_bb_far", "shared_bb_far_unlinked",
IF_X64_("shared_bb_cmp") IF_X64_("shared_bb_cmp_unlinked")
"shared_bb_ibl", "shared_bb_ibl_template"},
{"shared_unlinked_trace_ibl", "shared_delete_trace_ibl",
"shared_trace_far", "shared_trace_far_unlinked",
IF_X64_("shared_trace_cmp") IF_X64_("shared_trace_cmp_unlinked")
"shared_trace_ibl", "shared_trace_ibl_template"},
{"private_unlinked_bb_ibl", "private_delete_bb_ibl",
"private_bb_far", "private_bb_far_unlinked",
IF_X64_("private_bb_cmp") IF_X64_("private_bb_cmp_unlinked")
"private_bb_ibl", "private_bb_ibl_template"},
{"private_unlinked_trace_ibl", "private_delete_trace_ibl",
"private_trace_far", "private_trace_far_unlinked",
IF_X64_("private_trace_cmp") IF_X64_("private_trace_cmp_unlinked")
"private_trace_ibl", "private_trace_ibl_template"},
{"shared_unlinked_coarse_ibl", "shared_delete_coarse_ibl",
"shared_coarse_trace_far", "shared_coarse_trace_far_unlinked",
IF_X64_("shared_coarse_trace_cmp") IF_X64_("shared_coarse_trace_cmp_unlinked")
"shared_coarse_ibl", "shared_coarse_ibl_template"},
#ifdef X64
/* PR 282576: for WOW64 processes we have separate x86 routines */
}, {
{"x86_shared_unlinked_bb_ibl", "x86_shared_delete_bb_ibl",
"x86_shared_bb_far", "x86_shared_bb_far_unlinked",
IF_X64_("x86_shared_bb_cmp") IF_X64_("x86_shared_bb_cmp_unlinked")
"x86_shared_bb_ibl", "x86_shared_bb_ibl_template"},
{"x86_shared_unlinked_trace_ibl", "x86_shared_delete_trace_ibl",
"x86_shared_trace_far", "x86_shared_trace_far_unlinked",
IF_X64_("x86_shared_trace_cmp") IF_X64_("x86_shared_trace_cmp_unlinked")
"x86_shared_trace_ibl", "x86_shared_trace_ibl_template"},
{"x86_private_unlinked_bb_ibl", "x86_private_delete_bb_ibl",
"x86_private_bb_far", "x86_private_bb_far_unlinked",
IF_X64_("x86_private_bb_cmp") IF_X64_("x86_private_bb_cmp_unlinked")
"x86_private_bb_ibl", "x86_private_bb_ibl_template"},
{"x86_private_unlinked_trace_ibl", "x86_private_delete_trace_ibl",
"x86_private_trace_far", "x86_private_trace_far_unlinked",
IF_X64_("x86_private_trace_cmp") IF_X64_("x86_private_trace_cmp_unlinked")
"x86_private_trace_ibl", "x86_private_trace_ibl_template"},
{"x86_shared_unlinked_coarse_ibl", "x86_shared_delete_coarse_ibl",
"x86_shared_coarse_trace_far",
"x86_shared_coarse_trace_far_unlinked",
IF_X64_("x86_shared_coarse_trace_cmp")
IF_X64_("x86_shared_coarse_trace_cmp_unlinked")
"x86_shared_coarse_ibl", "x86_shared_coarse_ibl_template"},
}, {
{"x86_to_x64_shared_unlinked_bb_ibl", "x86_to_x64_shared_delete_bb_ibl",
"x86_to_x64_shared_bb_far", "x86_to_x64_shared_bb_far_unlinked",
"x86_to_x64_shared_bb_cmp", "x86_to_x64_shared_bb_cmp_unlinked",
"x86_to_x64_shared_bb_ibl", "x86_to_x64_shared_bb_ibl_template"},
{"x86_to_x64_shared_unlinked_trace_ibl", "x86_to_x64_shared_delete_trace_ibl",
"x86_to_x64_shared_trace_far", "x86_to_x64_shared_trace_far_unlinked",
"x86_to_x64_shared_trace_cmp", "x86_to_x64_shared_trace_cmp_unlinked",
"x86_to_x64_shared_trace_ibl", "x86_to_x64_shared_trace_ibl_template"},
{"x86_to_x64_private_unlinked_bb_ibl", "x86_to_x64_private_delete_bb_ibl",
"x86_to_x64_private_bb_far", "x86_to_x64_private_bb_far_unlinked",
"x86_to_x64_private_bb_cmp", "x86_to_x64_private_bb_cmp_unlinked",
"x86_to_x64_private_bb_ibl", "x86_to_x64_private_bb_ibl_template"},
{"x86_to_x64_private_unlinked_trace_ibl", "x86_to_x64_private_delete_trace_ibl",
"x86_to_x64_private_trace_far", "x86_to_x64_private_trace_far_unlinked",
"x86_to_x64_private_trace_cmp", "x86_to_x64_private_trace_cmp_unlinked",
"x86_to_x64_private_trace_ibl", "x86_to_x64_private_trace_ibl_template"},
{"x86_to_x64_shared_unlinked_coarse_ibl", "x86_to_x64_shared_delete_coarse_ibl",
"x86_to_x64_shared_coarse_trace_far",
"x86_to_x64_shared_coarse_trace_far_unlinked",
"x86_to_x64_shared_coarse_trace_cmp",
"x86_to_x64_shared_coarse_trace_cmp_unlinked",
"x86_to_x64_shared_coarse_ibl", "x86_to_x64_shared_coarse_ibl_template"},
}
#endif
};
ibl_type_t ibl_type;
#ifdef X64
gencode_mode_t mode;
#endif
if (!get_ibl_routine_type_ex(dcontext, target, &ibl_type _IF_X64(&mode))) {
/* not an IBL routine */
if (!get_ibl_routine_template_type(dcontext, target, &ibl_type _IF_X64(&mode))) {
return NULL; /* not an IBL template either */
}
}
/* ibl_type is valid and will give routine or template name, and qualifier */
*ibl_brtype_name = get_branch_type_name(ibl_type.branch_type);
return ibl_routine_names IF_X64([mode])
[ibl_type.source_fragment_type][ibl_type.link_state];
}
static inline
ibl_code_t*
get_ibl_routine_code_internal(dcontext_t *dcontext,
ibl_source_fragment_type_t source_fragment_type,
ibl_branch_type_t branch_type
_IF_X64(gencode_mode_t mode))
{
#ifdef X64
if (((mode == GENCODE_X86 ||
(mode == GENCODE_FROM_DCONTEXT && dcontext != GLOBAL_DCONTEXT &&
dcontext->isa_mode == DR_ISA_IA32 && !X64_CACHE_MODE_DC(dcontext))) &&
shared_code_x86 == NULL) ||
((mode == GENCODE_X86_TO_X64 ||
(mode == GENCODE_FROM_DCONTEXT && dcontext != GLOBAL_DCONTEXT &&
dcontext->isa_mode == DR_ISA_IA32 && X64_CACHE_MODE_DC(dcontext))) &&
shared_code_x86_to_x64 == NULL))
return NULL;
#endif
switch (source_fragment_type) {
case IBL_BB_SHARED:
if (!USE_SHARED_BB_IBL())
return NULL;
return &(get_shared_gencode(dcontext _IF_X64(mode))->bb_ibl[branch_type]);
case IBL_BB_PRIVATE:
return &(get_emitted_routines_code(dcontext _IF_X64(mode))->bb_ibl[branch_type]);
case IBL_TRACE_SHARED:
if (!USE_SHARED_TRACE_IBL())
return NULL;
return &(get_shared_gencode(dcontext _IF_X64(mode))->trace_ibl[branch_type]);
case IBL_TRACE_PRIVATE:
return &(get_emitted_routines_code(dcontext _IF_X64(mode))
->trace_ibl[branch_type]);
case IBL_COARSE_SHARED:
if (!DYNAMO_OPTION(coarse_units))
return NULL;
return &(get_shared_gencode(dcontext _IF_X64(mode))->coarse_ibl[branch_type]);
default:
ASSERT_NOT_REACHED();
}
ASSERT_NOT_REACHED();
return NULL;
}
cache_pc
get_ibl_routine_ex(dcontext_t *dcontext, ibl_entry_point_type_t entry_type,
ibl_source_fragment_type_t source_fragment_type,
ibl_branch_type_t branch_type _IF_X64(gencode_mode_t mode))
{
ibl_code_t *ibl_code =
get_ibl_routine_code_internal(dcontext,
source_fragment_type, branch_type _IF_X64(mode));
if (ibl_code == NULL || !ibl_code->initialized)
return NULL;
switch (entry_type) {
case IBL_LINKED:
return (cache_pc) ibl_code->indirect_branch_lookup_routine;
case IBL_UNLINKED:
return (cache_pc) ibl_code->unlinked_ibl_entry;
case IBL_DELETE:
return (cache_pc) ibl_code->target_delete_entry;
case IBL_FAR:
return (cache_pc) ibl_code->far_ibl;
case IBL_FAR_UNLINKED:
return (cache_pc) ibl_code->far_ibl_unlinked;
#ifdef X64
case IBL_TRACE_CMP:
return (cache_pc) ibl_code->trace_cmp_entry;
case IBL_TRACE_CMP_UNLINKED:
return (cache_pc) ibl_code->trace_cmp_unlinked;
#endif
default:
ASSERT_NOT_REACHED();
}
return NULL;
}
cache_pc
get_ibl_routine(dcontext_t *dcontext, ibl_entry_point_type_t entry_type,
ibl_source_fragment_type_t source_fragment_type,
ibl_branch_type_t branch_type)
{
return get_ibl_routine_ex(dcontext, entry_type, source_fragment_type,
branch_type _IF_X64(GENCODE_FROM_DCONTEXT));
}
cache_pc
get_ibl_routine_template(dcontext_t *dcontext,
ibl_source_fragment_type_t source_fragment_type,
ibl_branch_type_t branch_type
_IF_X64(gencode_mode_t mode))
{
ibl_code_t *ibl_code = get_ibl_routine_code_internal
(dcontext, source_fragment_type, branch_type _IF_X64(mode));
if (ibl_code == NULL || !ibl_code->initialized)
return NULL;
return ibl_code->inline_ibl_stub_template;
}
/* Convert FRAG_TABLE_* flags to FRAG_* flags */
/* FIXME This seems more appropriate in fragment.c but since there's no
* need for the functionality there, we place it here and inline it. We
* can move it if other pieces need the functionality later.
*/
static inline uint
table_flags_to_frag_flags(dcontext_t *dcontext, ibl_table_t *table)
{
uint flags = 0;
if (TEST(FRAG_TABLE_TARGET_SHARED, table->table_flags))
flags |= FRAG_SHARED;
if (TEST(FRAG_TABLE_TRACE, table->table_flags))
flags |= FRAG_IS_TRACE;
/* We want to make sure that any updates to FRAG_TABLE_* flags
* are reflected in this routine. */
ASSERT_NOT_IMPLEMENTED(!TESTANY(~(FRAG_TABLE_INCLUSIVE_HIERARCHY |
FRAG_TABLE_IBL_TARGETED |
FRAG_TABLE_TARGET_SHARED |
FRAG_TABLE_SHARED |
FRAG_TABLE_TRACE |
FRAG_TABLE_PERSISTENT |
HASHTABLE_USE_ENTRY_STATS |
HASHTABLE_ALIGN_TABLE),
table->table_flags));
return flags;
}
/* Derive the PC of an entry point that aids in atomic hashtable deletion.
* FIXME: Once we can correlate from what table the fragment is being
* deleted and therefore type of the corresponding IBL routine, we can
* widen the interface and be more precise about which entry point
* is returned, i.e., specify something other than IBL_GENERIC.
*/
cache_pc
get_target_delete_entry_pc(dcontext_t *dcontext, ibl_table_t *table)
{
/*
* A shared IBL routine makes sure any registers restored on the
* miss path are all saved in the current dcontext - as well as
* copying the ECX in both TLS scratch and dcontext, so it is OK
* to simply return the thread private routine. We have
* proven that they are functionally equivalent (all data in the
* shared lookup is fs indirected to the private dcontext)
*
* FIXME: we can in fact use a global delete_pc entry point that
* is the unlinked path of a shared_ibl_not_found, just like we
* could share all routines. Since it doesn't matter much for now
* we can also return the slightly more efficient private
* ibl_not_found path.
*/
uint frag_flags = table_flags_to_frag_flags(dcontext, table);
ASSERT(dcontext != GLOBAL_DCONTEXT);
return (cache_pc) get_ibl_routine(dcontext, IBL_DELETE,
get_source_fragment_type(dcontext,
frag_flags),
table->branch_type);
}
ibl_code_t *
get_ibl_routine_code_ex(dcontext_t *dcontext, ibl_branch_type_t branch_type,
uint fragment_flags _IF_X64(gencode_mode_t mode))
{
ibl_source_fragment_type_t source_fragment_type =
get_source_fragment_type(dcontext, fragment_flags);
ibl_code_t *ibl_code =
get_ibl_routine_code_internal(dcontext, source_fragment_type, branch_type
_IF_X64(mode));
ASSERT(ibl_code != NULL);
return ibl_code;
}
ibl_code_t *
get_ibl_routine_code(dcontext_t *dcontext, ibl_branch_type_t branch_type,
uint fragment_flags)
{
return get_ibl_routine_code_ex(dcontext, branch_type, fragment_flags
_IF_X64(dcontext == GLOBAL_DCONTEXT ?
FRAGMENT_GENCODE_MODE(fragment_flags) :
GENCODE_FROM_DCONTEXT));
}
#ifdef WINDOWS
/* FIXME We support a private and shared fragments simultaneously targeting
* shared syscall -- -shared_fragment_shared_syscalls must be on and both
* fragment types target the entry point in shared_code. We could optimize
* the private fragment->shared syscall path (case 8025).
*/
/* PR 282576: These separate routines are ugly, but less ugly than adding param to
* the main routines, which are called in many places and usually passed a
* non-global dcontext; also less ugly than adding GLOBAL_DCONTEXT_X86.
*/
cache_pc
shared_syscall_routine_ex(dcontext_t *dcontext _IF_X64(gencode_mode_t mode))
{
generated_code_t *code = DYNAMO_OPTION(shared_fragment_shared_syscalls) ?
get_shared_gencode(dcontext _IF_X64(mode)) :
get_emitted_routines_code(dcontext _IF_X64(mode));
if (code == NULL)
return NULL;
else
return (cache_pc) code->shared_syscall;
}
cache_pc
shared_syscall_routine(dcontext_t *dcontext)
{
return shared_syscall_routine_ex(dcontext _IF_X64(GENCODE_FROM_DCONTEXT));
}
cache_pc
unlinked_shared_syscall_routine_ex(dcontext_t *dcontext _IF_X64(gencode_mode_t mode))
{
generated_code_t *code = DYNAMO_OPTION(shared_fragment_shared_syscalls) ?
get_shared_gencode(dcontext _IF_X64(mode)) :
get_emitted_routines_code(dcontext _IF_X64(mode));
if (code == NULL)
return NULL;
else
return (cache_pc) code->unlinked_shared_syscall;
}
cache_pc
unlinked_shared_syscall_routine(dcontext_t *dcontext)
{
return unlinked_shared_syscall_routine_ex(dcontext _IF_X64(GENCODE_FROM_DCONTEXT));
}
cache_pc
after_shared_syscall_code(dcontext_t *dcontext)
{
return after_shared_syscall_code_ex(dcontext _IF_X64(GENCODE_FROM_DCONTEXT));
}
cache_pc
after_shared_syscall_code_ex(dcontext_t *dcontext _IF_X64(gencode_mode_t mode))
{
generated_code_t *code = get_emitted_routines_code(dcontext _IF_X64(mode));
ASSERT(code != NULL);
return (cache_pc) (code->unlinked_shared_syscall + code->sys_syscall_offs);
}
cache_pc
after_shared_syscall_addr(dcontext_t *dcontext)
{
ASSERT(get_syscall_method() != SYSCALL_METHOD_UNINITIALIZED);
if (DYNAMO_OPTION(sygate_int) &&
get_syscall_method() == SYSCALL_METHOD_INT)
return (int_syscall_address + INT_LENGTH /* sizeof int 2e */);
else
return after_shared_syscall_code(dcontext);
}
/* These are Windows-only since Linux needs to disambiguate its two
* versions of do_syscall
*/
cache_pc
after_do_syscall_code(dcontext_t *dcontext)
{
return after_do_syscall_code_ex(dcontext _IF_X64(GENCODE_FROM_DCONTEXT));
}
cache_pc
after_do_syscall_code_ex(dcontext_t *dcontext _IF_X64(gencode_mode_t mode))
{
generated_code_t *code = get_emitted_routines_code(dcontext _IF_X64(mode));
ASSERT(code != NULL);
return (cache_pc) (code->do_syscall + code->do_syscall_offs);
}
cache_pc
after_do_syscall_addr(dcontext_t *dcontext)
{
ASSERT(get_syscall_method() != SYSCALL_METHOD_UNINITIALIZED);
if (DYNAMO_OPTION(sygate_int) &&
get_syscall_method() == SYSCALL_METHOD_INT)
return (int_syscall_address + INT_LENGTH /* sizeof int 2e */);
else
return after_do_syscall_code(dcontext);
}
#else
cache_pc
after_do_shared_syscall_addr(dcontext_t *dcontext)
{
/* PR 212570: return the thread-shared do_syscall used for vsyscall hook */
generated_code_t *code = get_emitted_routines_code(GLOBAL_DCONTEXT
_IF_X64(GENCODE_X64));
IF_X64(ASSERT_NOT_REACHED()); /* else have to worry about GENCODE_X86 */
ASSERT(code != NULL);
ASSERT(code->do_syscall != NULL);
return (cache_pc) (code->do_syscall + code->do_syscall_offs);
}
cache_pc
after_do_syscall_addr(dcontext_t *dcontext)
{
/* PR 212570: return the thread-shared do_syscall used for vsyscall hook */
generated_code_t *code = get_emitted_routines_code(dcontext
_IF_X64(GENCODE_FROM_DCONTEXT));
ASSERT(code != NULL);
ASSERT(code->do_syscall != NULL);
return (cache_pc) (code->do_syscall + code->do_syscall_offs);
}
bool
is_after_main_do_syscall_addr(dcontext_t *dcontext, cache_pc pc)
{
generated_code_t *code = get_emitted_routines_code(dcontext
_IF_X64(GENCODE_FROM_DCONTEXT));
ASSERT(code != NULL);
return (pc == (cache_pc) (code->do_syscall + code->do_syscall_offs));
}
bool
is_after_do_syscall_addr(dcontext_t *dcontext, cache_pc pc)
{
generated_code_t *code = get_emitted_routines_code(dcontext
_IF_X64(GENCODE_FROM_DCONTEXT));
ASSERT(code != NULL);
return (pc == (cache_pc) (code->do_syscall + code->do_syscall_offs) ||
pc == (cache_pc) (code->do_int_syscall + code->do_int_syscall_offs)
IF_VMX86(|| pc == (cache_pc) (code->do_vmkuw_syscall +
code->do_vmkuw_syscall_offs)));
}
#endif
bool
is_after_syscall_address(dcontext_t *dcontext, cache_pc pc)
{
#ifdef WINDOWS
if (pc == after_shared_syscall_addr(dcontext))
return true;
if (pc == after_do_syscall_addr(dcontext))
return true;
return false;
#else
return is_after_do_syscall_addr(dcontext, pc);
#endif
/* NOTE - we ignore global_do_syscall since that's only used in special
* circumstances and is not something the callers (recreate_app_state)
* really know how to handle. */
}
/* needed b/c linux can have sysenter as main syscall method but also
* has generated int syscall routines
*/
bool
is_after_syscall_that_rets(dcontext_t *dcontext, cache_pc pc)
{
#ifdef WINDOWS
return (is_after_syscall_address(dcontext, pc) &&
does_syscall_ret_to_callsite());
#else
generated_code_t *code = get_emitted_routines_code(dcontext
_IF_X64(GENCODE_FROM_DCONTEXT));
ASSERT(code != NULL);
return ((pc == (cache_pc) (code->do_syscall + code->do_syscall_offs) &&
does_syscall_ret_to_callsite()) ||
pc == (cache_pc) (code->do_int_syscall + code->do_int_syscall_offs)
IF_VMX86(|| pc == (cache_pc) (code->do_vmkuw_syscall +
code->do_vmkuw_syscall_offs)));
#endif
}
#ifdef UNIX
/* PR 212290: can't be static code in x86.asm since it can't be PIC */
cache_pc
get_new_thread_start(dcontext_t *dcontext _IF_X64(gencode_mode_t mode))
{
#ifdef HAVE_TLS
/* for HAVE_TLS we use the shared version; w/o TLS we don't
* make any shared routines (PR 361894)
*/
dcontext = GLOBAL_DCONTEXT;
#endif
generated_code_t *gen = get_emitted_routines_code(dcontext _IF_X64(mode));
return gen->new_thread_dynamo_start;
}
#endif
#ifdef TRACE_HEAD_CACHE_INCR
cache_pc
trace_head_incr_routine(dcontext_t *dcontext)
{
generated_code_t *code = THREAD_GENCODE(dcontext);
return (cache_pc) code->trace_head_incr;
}
#endif
#ifdef CHECK_RETURNS_SSE2_EMIT
cache_pc
get_pextrw_entry(dcontext_t *dcontext)
{
generated_code_t *code = THREAD_GENCODE(dcontext);
return (cache_pc) code->pextrw;
}
cache_pc
get_pinsrw_entry(dcontext_t *dcontext)
{
generated_code_t *code = THREAD_GENCODE(dcontext);
return (cache_pc) code->pinsrw;
}
#endif
/* exported beyond arch/ */
fcache_enter_func_t
get_fcache_enter_shared_routine(dcontext_t *dcontext)
{
return fcache_enter_shared_routine(dcontext);
}
fcache_enter_func_t
fcache_enter_shared_routine(dcontext_t *dcontext)
{
ASSERT(USE_SHARED_GENCODE());
return (fcache_enter_func_t)
convert_data_to_function(SHARED_GENCODE_MATCH_THREAD(dcontext)->fcache_enter);
}
cache_pc
fcache_return_shared_routine(IF_X64_ELSE(gencode_mode_t mode, void))
{
generated_code_t *code = get_shared_gencode(GLOBAL_DCONTEXT _IF_X64(mode));
ASSERT(USE_SHARED_GENCODE());
if (code == NULL)
return NULL;
else
return code->fcache_return;
}
#ifdef TRACE_HEAD_CACHE_INCR
cache_pc
trace_head_incr_shared_routine(IF_X64_ELSE(gencode_mode_t mode, void))
{
generated_code_t *code = get_shared_gencode(GLOBAL_DCONTEXT _IF_X64(mode));
ASSERT(USE_SHARED_GENCODE());
if (code == NULL)
return NULL;
else
return code->trace_head_incr;
}
#endif
/* get the fcache target for the next code cache entry */
cache_pc
get_fcache_target(dcontext_t *dcontext)
{
/* we used to use mcontext.pc, but that's in the writable
* portion of the dcontext, and so for self-protection we use the
* next_tag slot, which is protected
*/
return dcontext->next_tag;
}
/* set the fcache target for the next code cache entry */
void
set_fcache_target(dcontext_t *dcontext, cache_pc value)
{
/* we used to use mcontext.pc, but that's in the writable
* portion of the dcontext, and so for self-protection we use the
* next_tag slot, which is protected
*/
dcontext->next_tag = value;
/* set eip as well to complete mcontext state */
get_mcontext(dcontext)->pc = value;
}
/* For 32-bit linux apps on 64-bit kernels we assume that all syscalls that
* we use this for are ok w/ int (i.e., we don't need a sys{call,enter} version).
*/
byte *
get_global_do_syscall_entry()
{
int method = get_syscall_method();
if (method == SYSCALL_METHOD_INT) {
#ifdef WINDOWS
if (DYNAMO_OPTION(sygate_int))
return (byte *)global_do_syscall_sygate_int;
else
#endif
return (byte *)global_do_syscall_int;
} else if (method == SYSCALL_METHOD_SYSENTER) {
#ifdef WINDOWS
if (DYNAMO_OPTION(sygate_sysenter))
return (byte *)global_do_syscall_sygate_sysenter;
else
return (byte *)global_do_syscall_sysenter;
#else
return (byte *)global_do_syscall_int;
#endif
}
#ifdef WINDOWS
else if (method == SYSCALL_METHOD_WOW64)
return (byte *)global_do_syscall_wow64;
#endif
else if (method == SYSCALL_METHOD_SYSCALL) {
#ifdef X64
return (byte *)global_do_syscall_syscall;
#else
# ifdef WINDOWS
ASSERT_NOT_IMPLEMENTED(false && "PR 205898: 32-bit syscall on Windows NYI");
# else
return (byte *)global_do_syscall_int;
# endif
#endif
} else {
#ifdef UNIX
/* PR 205310: we sometimes have to execute syscalls before we
* see an app syscall: for a signal default action, e.g.
*/
return (byte *)IF_X64_ELSE(global_do_syscall_syscall,global_do_syscall_int);
#else
ASSERT_NOT_REACHED();
#endif
}
return NULL;
}
/* used only by cleanup_and_terminate to avoid the sysenter
* sygate hack version */
byte *
get_cleanup_and_terminate_global_do_syscall_entry()
{
/* see note above: for 32-bit linux apps we use int.
* xref PR 332427 as well where sysenter causes a crash
* if called from cleanup_and_terminate() where ebp is
* left pointing to the old freed stack.
*/
#if defined(WINDOWS) || defined(X64)
if (get_syscall_method() == SYSCALL_METHOD_SYSENTER)
return (byte *)global_do_syscall_sysenter;
else
#endif
#ifdef WINDOWS
if (get_syscall_method() == SYSCALL_METHOD_WOW64 &&
syscall_uses_wow64_index())
return (byte *)global_do_syscall_wow64_index0;
else
#endif
return get_global_do_syscall_entry();
}
#ifdef MACOS
/* There is no single resumption point from sysenter: each sysenter stores
* the caller's retaddr in edx. Thus, there is nothing to hook.
*/
bool
unhook_vsyscall(void)
{
return false;
}
#elif defined(LINUX)
/* PR 212570: for sysenter support we need to regain control after the
* kernel sets eip to a hardcoded user-mode address on the vsyscall page.
* The vsyscall code layout is as follows:
* 0xffffe400 <__kernel_vsyscall+0>: push %ecx
* 0xffffe401 <__kernel_vsyscall+1>: push %edx
* 0xffffe402 <__kernel_vsyscall+2>: push %ebp
* 0xffffe403 <__kernel_vsyscall+3>: mov %esp,%ebp
* 0xffffe405 <__kernel_vsyscall+5>: sysenter
* nops for alignment of return point:
* 0xffffe407 <__kernel_vsyscall+7>: nop
* 0xffffe408 <__kernel_vsyscall+8>: nop
* 0xffffe409 <__kernel_vsyscall+9>: nop
* 0xffffe40a <__kernel_vsyscall+10>: nop
* 0xffffe40b <__kernel_vsyscall+11>: nop
* 0xffffe40c <__kernel_vsyscall+12>: nop
* 0xffffe40d <__kernel_vsyscall+13>: nop
* system call restart point:
* 0xffffe40e <__kernel_vsyscall+14>: jmp 0xffffe403 <__kernel_vsyscall+3>
* system call normal return point:
* 0xffffe410 <__kernel_vsyscall+16>: pop %ebp
* 0xffffe411 <__kernel_vsyscall+17>: pop %edx
* 0xffffe412 <__kernel_vsyscall+18>: pop %ecx
* 0xffffe413 <__kernel_vsyscall+19>: ret
*
* For randomized vsyscall page locations we can mark the page +w and
* write to it. For now, for simplicity, we focus only on that case;
* for vsyscall page at un-reachable 0xffffe000 we bail out and use
* ints for now (perf hit but works). PR 288330 covers leaving
* as sysenters.
*
* There are either nops or garbage after the ret, so we clobber one
* byte past the ret to put in a rel32 jmp (an alternative is to do
* rel8 jmp into the nop area and have a rel32 jmp there). We
* cleverly copy the 4 bytes of displaced code into the nop area, so
* that 1) we don't have to allocate any memory and 2) we don't have
* to do any extra work in dispatch, which will naturally go to the
* post-system-call-instr pc.
*
* Using a hook is much simpler than clobbering the retaddr, which is what
* Windows does and then has to spend a lot of effort juggling transparency
* and control on asynch in/out events.
*/
#define VSYS_DISPLACED_LEN 4
static bool
hook_vsyscall(dcontext_t *dcontext)
{
#ifdef X86
bool res = true;
instr_t instr;
byte *pc;
uint num_nops = 0;
uint prot;
ASSERT(DATASEC_WRITABLE(DATASEC_RARELY_PROT));
IF_X64(ASSERT_NOT_REACHED()); /* no sysenter support on x64 */
ASSERT(vsyscall_page_start != NULL && vsyscall_syscall_end_pc != NULL);
instr_init(dcontext, &instr);
pc = vsyscall_syscall_end_pc;
do {
instr_reset(dcontext, &instr);
pc = decode(dcontext, pc, &instr);
if (instr_is_nop(&instr))
num_nops++;
} while (instr_is_nop(&instr));
vsyscall_sysenter_return_pc = pc;
ASSERT(instr_get_opcode(&instr) == OP_jmp_short ||
instr_get_opcode(&instr) == OP_int /*ubuntu 11.10: i#647*/);
/* We fail if the pattern looks different */
# define CHECK(x) do { \
if (!(x)) { \
ASSERT(false && "vsyscall pattern mismatch"); \
res = false; \
goto hook_vsyscall_return; \
} \
} while (0);
CHECK(num_nops >= VSYS_DISPLACED_LEN);
/* Only now that we've set vsyscall_sysenter_return_pc do we check writability */
if (!DYNAMO_OPTION(hook_vsyscall)) {
res = false;
goto hook_vsyscall_return;
}
get_memory_info(vsyscall_page_start, NULL, NULL, &prot);
if (!TEST(MEMPROT_WRITE, prot)) {
res = set_protection(vsyscall_page_start, PAGE_SIZE, prot|MEMPROT_WRITE);
if (!res)
goto hook_vsyscall_return;
}
LOG(GLOBAL, LOG_SYSCALLS|LOG_VMAREAS, 1,
"Hooking vsyscall page @ "PFX"\n", vsyscall_sysenter_return_pc);
/* The 5 bytes we'll clobber: */
instr_reset(dcontext, &instr);
pc = decode(dcontext, pc, &instr);
CHECK(instr_get_opcode(&instr) == OP_pop);
instr_reset(dcontext, &instr);
pc = decode(dcontext, pc, &instr);
CHECK(instr_get_opcode(&instr) == OP_pop);
instr_reset(dcontext, &instr);
pc = decode(dcontext, pc, &instr);
CHECK(instr_get_opcode(&instr) == OP_pop);
instr_reset(dcontext, &instr);
pc = decode(dcontext, pc, &instr);
CHECK(instr_get_opcode(&instr) == OP_ret);
/* Sometimes the next byte is a nop, sometimes it's non-code */
ASSERT(*pc == RAW_OPCODE_nop || *pc == 0);
/* FIXME: at some point we should pull out all the hook code from
* callback.c into an os-neutral location. For now, this hook
* is very special-case and simple.
*/
/* For thread synch, the datasec prot lock will serialize us (FIXME: do this at
* init time instead, when see [vdso] page in maps file?)
*/
CHECK(pc - vsyscall_sysenter_return_pc == VSYS_DISPLACED_LEN);
ASSERT(pc + 1/*nop*/ - vsyscall_sysenter_return_pc == JMP_LONG_LENGTH);
CHECK(num_nops >= pc - vsyscall_sysenter_return_pc);
memcpy(vsyscall_syscall_end_pc, vsyscall_sysenter_return_pc,
/* we don't copy the 5th byte to preserve nop for nice disassembly */
pc - vsyscall_sysenter_return_pc);
insert_relative_jump(vsyscall_sysenter_return_pc,
/* we require a thread-shared fcache_return */
after_do_shared_syscall_addr(dcontext),
NOT_HOT_PATCHABLE);
if (!TEST(MEMPROT_WRITE, prot)) {
/* we don't override res here since not much point in not using the
* hook once its in if we failed to re-protect: we're going to have to
* trust the app code here anyway */
DEBUG_DECLARE(bool ok =)
set_protection(vsyscall_page_start, PAGE_SIZE, prot);
ASSERT(ok);
}
hook_vsyscall_return:
instr_free(dcontext, &instr);
return res;
# undef CHECK
#elif defined(ARM)
/* FIXME i#1551: NYI on ARM */
ASSERT_NOT_IMPLEMENTED(false);
return false;
#endif /* X86/ARM */
}
bool
unhook_vsyscall(void)
{
#ifdef X86
uint prot;
bool res;
uint len = VSYS_DISPLACED_LEN;
if (get_syscall_method() != SYSCALL_METHOD_SYSENTER)
return false;
ASSERT(!sysenter_hook_failed);
ASSERT(vsyscall_sysenter_return_pc != NULL);
ASSERT(vsyscall_syscall_end_pc != NULL);
get_memory_info(vsyscall_page_start, NULL, NULL, &prot);
if (!TEST(MEMPROT_WRITE, prot)) {
res = set_protection(vsyscall_page_start, PAGE_SIZE, prot|MEMPROT_WRITE);
if (!res)
return false;
}
memcpy(vsyscall_sysenter_return_pc, vsyscall_syscall_end_pc, len);
/* we do not restore the 5th (junk/nop) byte (we never copied it) */
memset(vsyscall_syscall_end_pc, RAW_OPCODE_nop, len);
if (!TEST(MEMPROT_WRITE, prot)) {
res = set_protection(vsyscall_page_start, PAGE_SIZE, prot);
ASSERT(res);
}
return true;
#elif defined(ARM)
/* FIXME i#1551: NYI on ARM */
ASSERT_NOT_IMPLEMENTED(false);
return false;
#endif /* X86/ARM */
}
#endif /* LINUX */
void
check_syscall_method(dcontext_t *dcontext, instr_t *instr)
{
int new_method = SYSCALL_METHOD_UNINITIALIZED;
#ifdef X86
if (instr_get_opcode(instr) == OP_int)
new_method = SYSCALL_METHOD_INT;
else if (instr_get_opcode(instr) == OP_sysenter)
new_method = SYSCALL_METHOD_SYSENTER;
else if (instr_get_opcode(instr) == OP_syscall)
new_method = SYSCALL_METHOD_SYSCALL;
# ifdef WINDOWS
else if (instr_get_opcode(instr) == OP_call_ind)
new_method = SYSCALL_METHOD_WOW64;
# endif
#elif defined(ARM)
if (instr_get_opcode(instr) == OP_svc)
new_method = SYSCALL_METHOD_SVC;
#endif /* X86/ARM */
else
ASSERT_NOT_REACHED();
if (new_method == SYSCALL_METHOD_SYSENTER ||
IF_X64_ELSE(false, new_method == SYSCALL_METHOD_SYSCALL)) {
DO_ONCE({
/* FIXME: DO_ONCE will unprot and reprot, and here we unprot again */
SELF_UNPROTECT_DATASEC(DATASEC_RARELY_PROT);
/* FIXME : using the raw-bits as the app pc for the instr is
* not really supported, but places in monitor assume it as well */
ASSERT(instr_raw_bits_valid(instr) &&
!instr_has_allocated_bits(instr));
/* Some places (such as clean_syscall_wrapper) assume that only int system
* calls are used in older versions of windows. */
IF_WINDOWS(ASSERT(get_os_version() > WINDOWS_VERSION_2000 &&
"Expected int syscall method on NT and 2000"));
/* Used by SYSCALL_PC in win32/os.c for non int system calls */
IF_WINDOWS(app_sysenter_instr_addr = instr_get_raw_bits(instr));
/* we expect, only on XP and later or on recent linux kernels,
* indirected syscalls through a certain page, which we record here
* FIXME: don't allow anyone to make this region writable?
*/
/* FIXME : we need to verify that windows lays out all of the
* syscall stuff as expected on AMD chips: xref PR 205898.
*/
/* FIXME: bootstrapping problem...would be nicer to read ahead and find
* syscall before needing to know about page it's on, but for now we just
* check if our initial assignments were correct
*/
vsyscall_syscall_end_pc = instr_get_raw_bits(instr) +
instr_length(dcontext, instr);
IF_WINDOWS({
/* for XP sp0,1 (but not sp2) and 03 fixup boostrap values */
if (vsyscall_page_start == VSYSCALL_PAGE_START_BOOTSTRAP_VALUE) {
vsyscall_page_start = (app_pc) PAGE_START(instr_get_raw_bits(instr));
ASSERT(vsyscall_page_start == VSYSCALL_PAGE_START_BOOTSTRAP_VALUE);
}
if (vsyscall_after_syscall == VSYSCALL_AFTER_SYSCALL_BOOTSTRAP_VALUE) {
/* for XP sp0,1 and 03 the ret is immediately after the
* sysenter instruction */
vsyscall_after_syscall = instr_get_raw_bits(instr) +
instr_length(dcontext, instr);
ASSERT(vsyscall_after_syscall ==
VSYSCALL_AFTER_SYSCALL_BOOTSTRAP_VALUE);
}
});
/* For linux, we should have found "[vdso]" in the maps file */
IF_LINUX(ASSERT(vsyscall_page_start != NULL &&
vsyscall_page_start ==
(app_pc) PAGE_START(instr_get_raw_bits(instr))));
LOG(GLOBAL, LOG_SYSCALLS|LOG_VMAREAS, 2,
"Found vsyscall @ "PFX" => page "PFX", post "PFX"\n",
instr_get_raw_bits(instr), vsyscall_page_start,
IF_WINDOWS_ELSE(vsyscall_after_syscall, vsyscall_syscall_end_pc));
/* make sure system call numbers match */
IF_WINDOWS(DOCHECK(1, { check_syscall_numbers(dcontext); }));
SELF_PROTECT_DATASEC(DATASEC_RARELY_PROT);
});
} else {
#ifdef WINDOWS
DO_ONCE({
/* FIXME: DO_ONCE will unprot and reprot, and here we unprot again */
SELF_UNPROTECT_DATASEC(DATASEC_RARELY_PROT);
/* Close vsyscall page hole.
* FIXME: the vsyscall page can still be in use and contain int:
* though I have yet to see that case where the page is not marked rx.
* On linux the vsyscall page is reached via "call *%gs:0x10", but
* sometimes that call ends up at /lib/ld-2.3.4.so:_dl_sysinfo_int80
* instead (which is the case when the vsyscall page is marked with no
* permissions).
*/
LOG(GLOBAL, LOG_SYSCALLS|LOG_VMAREAS, 2,
"Closing vsyscall page hole (int @ "PFX") => page "PFX", post "PFX"\n",
instr_get_translation(instr), vsyscall_page_start,
IF_WINDOWS_ELSE(vsyscall_after_syscall, vsyscall_syscall_end_pc));
vsyscall_page_start = NULL;
vsyscall_after_syscall = NULL;
ASSERT_CURIOSITY(new_method != SYSCALL_METHOD_WOW64 ||
(get_os_version() > WINDOWS_VERSION_XP &&
is_wow64_process(NT_CURRENT_PROCESS) &&
"Unexpected WOW64 syscall method"));
/* make sure system call numbers match */
DOCHECK(1, { check_syscall_numbers(dcontext); });
SELF_PROTECT_DATASEC(DATASEC_RARELY_PROT);
});
#else
/* On Linux we can't clear vsyscall_page_start as the app will often use both
* inlined int and vsyscall sysenter system calls. We handle fixing up for that
* in the next ifdef. */
#endif
}
#ifdef UNIX
if (new_method != get_syscall_method() &&
/* PR 286922: for linux, vsyscall method trumps occasional use of int. We
* update do_syscall for the vsyscall method, and use do_int_syscall for any
* int uses. */
(new_method != SYSCALL_METHOD_INT ||
(get_syscall_method() != SYSCALL_METHOD_SYSENTER &&
get_syscall_method() != SYSCALL_METHOD_SYSCALL))) {
ASSERT(get_syscall_method() == SYSCALL_METHOD_UNINITIALIZED ||
get_syscall_method() == SYSCALL_METHOD_INT);
# ifdef LINUX
if (new_method == SYSCALL_METHOD_SYSENTER) {
# ifndef HAVE_TLS
if (DYNAMO_OPTION(hook_vsyscall)) {
/* PR 361894: we use TLS for our vsyscall hook (PR 212570) */
FATAL_USAGE_ERROR(SYSENTER_NOT_SUPPORTED, 2,
get_application_name(), get_application_pid());
}
# endif
/* Hook the sysenter continuation point so we don't lose control */
if (!sysenter_hook_failed && !hook_vsyscall(dcontext)) {
/* PR 212570: for now we bail out to using int;
* for performance we should clobber the retaddr and
* keep the sysenters.
*/
SELF_UNPROTECT_DATASEC(DATASEC_RARELY_PROT);
sysenter_hook_failed = true;
SELF_PROTECT_DATASEC(DATASEC_RARELY_PROT);
LOG(GLOBAL, LOG_SYSCALLS|LOG_VMAREAS, 1,
"Unable to hook vsyscall page; falling back on int\n");
}
if (sysenter_hook_failed)
new_method = SYSCALL_METHOD_INT;
}
# endif /* LINUX */
if (get_syscall_method() == SYSCALL_METHOD_UNINITIALIZED ||
new_method != get_syscall_method()) {
set_syscall_method(new_method);
/* update the places we have emitted syscalls: do_*syscall */
update_syscalls(dcontext);
}
}
#else
/* we assume only single method; else need multiple do_syscalls */
ASSERT(new_method == get_syscall_method());
#endif
}
int
get_syscall_method(void)
{
return syscall_method;
}
/* Does the syscall instruction always return to the invocation point? */
bool
does_syscall_ret_to_callsite(void)
{
return (syscall_method == SYSCALL_METHOD_INT ||
syscall_method == SYSCALL_METHOD_SYSCALL
IF_WINDOWS(|| syscall_method == SYSCALL_METHOD_WOW64)
/* The app is reported to be at whatever's in edx, so
* for our purposes it does return to the call site
* if we always mangle edx to point there. Since we inline
* Mac sysenter (well, we execute it inside fragments, even
* if we don't continue (except maybe in a trace) we do
* want to return true here for skipping syscalls and
* handling interrupted syscalls.
*/
IF_MACOS(|| syscall_method == SYSCALL_METHOD_SYSENTER));
}
void
set_syscall_method(int method)
{
ASSERT(syscall_method == SYSCALL_METHOD_UNINITIALIZED
IF_UNIX(|| syscall_method == SYSCALL_METHOD_INT/*PR 286922*/));
syscall_method = method;
}
#ifdef LINUX
/* PR 313715: If we fail to hook the vsyscall page (xref PR 212570, PR 288330)
* we fall back on int, but we have to tweak syscall param #5 (ebp)
*/
bool
should_syscall_method_be_sysenter(void)
{
return sysenter_hook_failed;
}
#endif
/* returns the address of the first app syscall instruction we saw (see hack
* in win32/os.c that uses this for PRE_SYSCALL_PC, not for general use */
byte *
get_app_sysenter_addr()
{
/* FIXME : would like to assert that this has been initialized, but interp
* bb_process_convertible_indcall() will use it before we initialize it. */
return app_sysenter_instr_addr;
}
void
copy_mcontext(priv_mcontext_t *src, priv_mcontext_t *dst)
{
/* FIXME: do we need this? */
*dst = *src;
}
bool
dr_mcontext_to_priv_mcontext(priv_mcontext_t *dst, dr_mcontext_t *src)
{
/* we assume fields from xdi onward are identical.
* if we append to dr_mcontext_t in the future we'll need
* to check src->size here.
*/
if (src->size != sizeof(dr_mcontext_t))
return false;
if (TESTALL(DR_MC_ALL, src->flags))
*dst = *(priv_mcontext_t*)(&MCXT_FIRST_REG_FIELD(src));
else {
if (TEST(DR_MC_INTEGER, src->flags)) {
/* xsp is in the middle of the mcxt, so we save dst->xsp here and
* restore it later so we can use one memcpy for DR_MC_INTEGER.
*/
reg_t save_xsp = dst->xsp;
memcpy(&MCXT_FIRST_REG_FIELD(dst), &MCXT_FIRST_REG_FIELD(src),
/* end of the mcxt integer gpr */
offsetof(priv_mcontext_t, IF_X86_ELSE(xflags, pc)));
dst->xsp = save_xsp;
}
if (TEST(DR_MC_CONTROL, src->flags)) {
dst->xsp = src->xsp;
dst->xflags = src->xflags;
dst->pc = src->pc;
}
if (TEST(DR_MC_MULTIMEDIA, src->flags)) {
IF_X86_ELSE({
memcpy(&dst->ymm, &src->ymm, sizeof(dst->ymm));
}, {
/* FIXME i#1551: NYI on ARM */
ASSERT_NOT_IMPLEMENTED(false);
});
}
}
return true;
}
bool
priv_mcontext_to_dr_mcontext(dr_mcontext_t *dst, priv_mcontext_t *src)
{
/* we assume fields from xdi onward are identical.
* if we append to dr_mcontext_t in the future we'll need
* to check dst->size here.
*/
if (dst->size != sizeof(dr_mcontext_t))
return false;
if (TESTALL(DR_MC_ALL, dst->flags))
*(priv_mcontext_t*)(&MCXT_FIRST_REG_FIELD(dst)) = *src;
else {
if (TEST(DR_MC_INTEGER, dst->flags)) {
/* xsp is in the middle of the mcxt, so we save dst->xsp here and
* restore it later so we can use one memcpy for DR_MC_INTEGER.
*/
reg_t save_xsp = dst->xsp;
memcpy(&MCXT_FIRST_REG_FIELD(dst), &MCXT_FIRST_REG_FIELD(src),
/* end of the mcxt integer gpr */
offsetof(priv_mcontext_t, IF_X86_ELSE(xflags, pc)));
dst->xsp = save_xsp;
}
if (TEST(DR_MC_CONTROL, dst->flags)) {
dst->xsp = src-></