| /* ********************************************************** |
| * Copyright (c) 2010-2014 Google, Inc. All rights reserved. |
| * Copyright (c) 2000-2010 VMware, Inc. All rights reserved. |
| * **********************************************************/ |
| |
| /* |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are met: |
| * |
| * * Redistributions of source code must retain the above copyright notice, |
| * this list of conditions and the following disclaimer. |
| * |
| * * Redistributions in binary form must reproduce the above copyright notice, |
| * this list of conditions and the following disclaimer in the documentation |
| * and/or other materials provided with the distribution. |
| * |
| * * Neither the name of VMware, Inc. nor the names of its contributors may be |
| * used to endorse or promote products derived from this software without |
| * specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| * ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE |
| * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
| * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
| * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH |
| * DAMAGE. |
| */ |
| |
| /* Copyright (c) 2003-2007 Determina Corp. */ |
| /* Copyright (c) 2001-2003 Massachusetts Institute of Technology */ |
| /* Copyright (c) 2000-2001 Hewlett-Packard Company */ |
| |
| /* file "arch.h" -- internal x86-specific definitions |
| * |
| * References: |
| * "Intel Architecture Software Developer's Manual", 1999. |
| */ |
| |
| #ifndef X86_ARCH_H |
| #define X86_ARCH_H |
| |
| #include <stddef.h> /* for offsetof */ |
| #include "instr.h" /* for reg_id_t */ |
| #include "decode.h" /* for X64_CACHE_MODE_DC */ |
| #include "arch_exports.h" /* for FRAG_IS_32 and FRAG_IS_X86_TO_X64 */ |
| #include "../fragment.h" /* IS_IBL_TARGET */ |
| |
| #ifdef X64 |
| static inline bool |
| mixed_mode_enabled(void) |
| { |
| /* XXX i#49: currently only supporting WOW64 and thus only |
| * creating x86 versions of gencode for WOW64. Eventually we'll |
| * have to either always create for every x64 process, or lazily |
| * create on first appearance of 32-bit code. |
| */ |
| # ifdef WINDOWS |
| return is_wow64_process(NT_CURRENT_PROCESS); |
| # else |
| return false; |
| # endif |
| } |
| #endif |
| |
| /* dcontext_t field offsets |
| * N.B.: DO NOT USE offsetof(dcontext_t) anywhere else if passing to the |
| * dcontext operand construction routines! |
| * Otherwise we will have issues w/ the upcontext offset game below |
| */ |
| |
| /* offs is not raw offset, but includes upcontext size, so we |
| * can tell unprotected from normal! |
| * unprotected are raw 0..sizeof(unprotected_context_t) |
| * protected are raw + sizeof(unprotected_context_t) |
| * (see the instr_shared.c routines for dcontext instr building) |
| * FIXME: we could get rid of this hack if unprotected_context_t == priv_mcontext_t |
| */ |
| #define PROT_OFFS (sizeof(unprotected_context_t)) |
| #define MC_OFFS (offsetof(unprotected_context_t, mcontext)) |
| |
| #ifdef X86 |
| # define XAX_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, xax))) |
| # define XBX_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, xbx))) |
| # define XCX_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, xcx))) |
| # define XDX_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, xdx))) |
| # define XSI_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, xsi))) |
| # define XDI_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, xdi))) |
| # define XBP_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, xbp))) |
| # ifdef X64 |
| # define R8_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, r8))) |
| # define R9_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, r9))) |
| # define R10_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, r10))) |
| # define R11_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, r11))) |
| # define R12_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, r12))) |
| # define R13_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, r13))) |
| # define R14_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, r14))) |
| # define R15_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, r15))) |
| # endif /* X64 */ |
| # define XMM_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, ymm))) |
| # define SCRATCH_REG0 DR_REG_XAX |
| # define SCRATCH_REG1 DR_REG_XBX |
| # define SCRATCH_REG2 DR_REG_XCX |
| # define SCRATCH_REG3 DR_REG_XDX |
| # define SCRATCH_REG4 DR_REG_XSI |
| # define SCRATCH_REG5 DR_REG_XDI |
| # define SCRATCH_REG0_OFFS XAX_OFFSET |
| # define SCRATCH_REG1_OFFS XBX_OFFSET |
| # define SCRATCH_REG2_OFFS XCX_OFFSET |
| # define SCRATCH_REG3_OFFS XDX_OFFSET |
| # define SCRATCH_REG4_OFFS XSI_OFFSET |
| # define SCRATCH_REG5_OFFS XDI_OFFSET |
| #elif defined(ARM) |
| # define R0_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, r0))) |
| # define R1_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, r1))) |
| # define R2_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, r2))) |
| # define R3_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, r3))) |
| # define R4_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, r4))) |
| # define R5_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, r5))) |
| # define XFLAGS_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, xflags))) |
| # define PC_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, pc))) |
| # define SCRATCH_REG0 DR_REG_R0 |
| # define SCRATCH_REG1 DR_REG_R1 |
| # define SCRATCH_REG2 DR_REG_R2 |
| # define SCRATCH_REG3 DR_REG_R3 |
| # define SCRATCH_REG4 DR_REG_R4 |
| # define SCRATCH_REG5 DR_REG_R5 |
| # define SCRATCH_REG0_OFFS R0_OFFSET |
| # define SCRATCH_REG1_OFFS R1_OFFSET |
| # define SCRATCH_REG2_OFFS R2_OFFSET |
| # define SCRATCH_REG3_OFFS R3_OFFSET |
| # define SCRATCH_REG4_OFFS R4_OFFSET |
| # define SCRATCH_REG5_OFFS R5_OFFSET |
| #endif /* X86/ARM */ |
| #define XSP_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, xsp))) |
| #define XFLAGS_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, xflags))) |
| #define PC_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, pc))) |
| |
| #define ERRNO_OFFSET (offsetof(unprotected_context_t, errno)) |
| #define AT_SYSCALL_OFFSET (offsetof(unprotected_context_t, at_syscall)) |
| #define EXIT_REASON_OFFSET (offsetof(unprotected_context_t, exit_reason)) |
| |
| #define NEXT_TAG_OFFSET ((PROT_OFFS)+offsetof(dcontext_t, next_tag)) |
| #define LAST_EXIT_OFFSET ((PROT_OFFS)+offsetof(dcontext_t, last_exit)) |
| #define LAST_FRAG_OFFSET ((PROT_OFFS)+offsetof(dcontext_t, last_fragment)) |
| #define DSTACK_OFFSET ((PROT_OFFS)+offsetof(dcontext_t, dstack)) |
| #define THREAD_RECORD_OFFSET ((PROT_OFFS)+offsetof(dcontext_t, thread_record)) |
| #define WHEREAMI_OFFSET ((PROT_OFFS)+offsetof(dcontext_t, whereami)) |
| |
| #define FRAGMENT_FIELD_OFFSET ((PROT_OFFS)+offsetof(dcontext_t, fragment_field)) |
| #define PRIVATE_CODE_OFFSET ((PROT_OFFS)+offsetof(dcontext_t, private_code)) |
| |
| #ifdef WINDOWS |
| # ifdef CLIENT_INTERFACE |
| # define APP_ERRNO_OFFSET ((PROT_OFFS)+offsetof(dcontext_t, app_errno)) |
| # define APP_FLS_OFFSET ((PROT_OFFS)+offsetof(dcontext_t, app_fls_data)) |
| # define PRIV_FLS_OFFSET ((PROT_OFFS)+offsetof(dcontext_t, priv_fls_data)) |
| # define APP_RPC_OFFSET ((PROT_OFFS)+offsetof(dcontext_t, app_nt_rpc)) |
| # define PRIV_RPC_OFFSET ((PROT_OFFS)+offsetof(dcontext_t, priv_nt_rpc)) |
| # define APP_NLS_CACHE_OFFSET ((PROT_OFFS)+offsetof(dcontext_t, app_nls_cache)) |
| # define PRIV_NLS_CACHE_OFFSET ((PROT_OFFS)+offsetof(dcontext_t, priv_nls_cache)) |
| # define APP_STACK_LIMIT_OFFSET ((PROT_OFFS)+offsetof(dcontext_t, app_stack_limit)) |
| # endif |
| # define NONSWAPPED_SCRATCH_OFFSET ((PROT_OFFS)+offsetof(dcontext_t, nonswapped_scratch)) |
| #endif |
| |
| #ifdef TRACE_HEAD_CACHE_INCR |
| # define TRACE_HEAD_PC_OFFSET ((PROT_OFFS)+offsetof(dcontext_t, trace_head_pc)) |
| #endif |
| |
| #ifdef WINDOWS |
| # define SYSENTER_STORAGE_OFFSET ((PROT_OFFS)+offsetof(dcontext_t, sysenter_storage)) |
| # define IGNORE_ENTEREXIT_OFFSET ((PROT_OFFS)+offsetof(dcontext_t, ignore_enterexit)) |
| #endif |
| |
| #ifdef CLIENT_INTERFACE |
| # define CLIENT_DATA_OFFSET ((PROT_OFFS)+offsetof(dcontext_t, client_data)) |
| #endif |
| |
| #define COARSE_IB_SRC_OFFSET ((PROT_OFFS)+offsetof(dcontext_t, coarse_exit.src_tag)) |
| #define COARSE_DIR_EXIT_OFFSET ((PROT_OFFS)+offsetof(dcontext_t, coarse_exit.dir_exit)) |
| |
| int |
| reg_spill_tls_offs(reg_id_t reg); |
| |
| #define OPSZ_SAVED_XMM (YMM_ENABLED() ? OPSZ_32 : OPSZ_16) |
| #define REG_SAVED_XMM0 (YMM_ENABLED() ? REG_YMM0 : REG_XMM0) |
| |
| /* Xref the partially overlapping CONTEXT_PRESERVE_XMM */ |
| /* This routine also determines whether ymm registers should be saved */ |
| static inline bool |
| preserve_xmm_caller_saved(void) |
| { |
| /* PR 264138: we must preserve xmm0-5 if on a 64-bit Windows kernel. |
| * PR 302107: we must preserve xmm0-15 for 64-bit Linux apps. |
| * i#139: we save xmm0-7 in 32-bit Linux and Windows b/c DR and client |
| * code on modern compilers ends up using xmm regs w/o any flags to easily |
| * disable w/o giving up perf. (Xref PR 306394 where we originally did |
| * not preserve xmm0-7 on a 32-bit kernel b/c DR didn't contain any xmm |
| * reg usage). |
| */ |
| return proc_has_feature(FEATURE_SSE) /* do xmm registers exist? */; |
| } |
| |
| typedef enum { |
| IBL_UNLINKED, |
| IBL_DELETE, |
| /* Pre-ibl routines for far ctis */ |
| IBL_FAR, |
| IBL_FAR_UNLINKED, |
| #ifdef X64 |
| /* PR 257963: trace inline cmp has separate entries b/c it saves flags */ |
| IBL_TRACE_CMP, |
| IBL_TRACE_CMP_UNLINKED, |
| #endif |
| IBL_LINKED, |
| |
| IBL_TEMPLATE, /* a template is presumed to be always linked */ |
| IBL_LINK_STATE_END |
| } ibl_entry_point_type_t; |
| |
| /* we should allow for all {{bb,trace} x {ret,ind call, ind jmp} x {shared, private}} */ |
| /* combinations of routines which are in turn x {unlinked, linked} */ |
| typedef enum { |
| /* FIXME: have a separate flag for private vs shared */ |
| IBL_BB_SHARED, |
| IBL_SOURCE_TYPE_START = IBL_BB_SHARED, |
| IBL_TRACE_SHARED, |
| IBL_BB_PRIVATE, |
| IBL_TRACE_PRIVATE, |
| IBL_COARSE_SHARED, /* no coarse private, for now */ |
| IBL_SOURCE_TYPE_END |
| } ibl_source_fragment_type_t; |
| |
| #define DEFAULT_IBL_BB() \ |
| (DYNAMO_OPTION(shared_bbs) ? IBL_BB_SHARED : IBL_BB_PRIVATE) |
| #define DEFAULT_IBL_TRACE() \ |
| (DYNAMO_OPTION(shared_traces) ? IBL_TRACE_SHARED : IBL_TRACE_PRIVATE) |
| #define IS_IBL_BB(ibltype) \ |
| ((ibltype) == IBL_BB_PRIVATE || (ibltype) == IBL_BB_SHARED) |
| #define IS_IBL_TRACE(ibltype) \ |
| ((ibltype) == IBL_TRACE_PRIVATE || (ibltype) == IBL_TRACE_SHARED) |
| #define IS_IBL_LINKED(ibltype) \ |
| ((ibltype) == IBL_LINKED || (ibltype) == IBL_FAR \ |
| IF_X64(|| (ibltype) == IBL_TRACE_CMP)) |
| #define IS_IBL_UNLINKED(ibltype) \ |
| ((ibltype) == IBL_UNLINKED || (ibltype) == IBL_FAR_UNLINKED \ |
| IF_X64(|| (ibltype) == IBL_TRACE_CMP_UNLINKED)) |
| |
| #define IBL_FRAG_FLAGS(ibl_code) \ |
| (IS_IBL_TRACE((ibl_code)->source_fragment_type) ? FRAG_IS_TRACE : 0) |
| |
| static inline ibl_entry_point_type_t |
| get_ibl_entry_type(uint link_or_instr_flags) |
| { |
| #ifdef X64 |
| if (TEST(LINK_TRACE_CMP, link_or_instr_flags)) |
| return IBL_TRACE_CMP; |
| #endif |
| if (TEST(LINK_FAR, link_or_instr_flags)) |
| return IBL_FAR; |
| else |
| return IBL_LINKED; |
| } |
| |
| typedef struct |
| { |
| /* these could be bit fields, if needed */ |
| ibl_entry_point_type_t link_state; |
| ibl_source_fragment_type_t source_fragment_type; |
| ibl_branch_type_t branch_type; |
| } ibl_type_t; |
| |
| #ifdef X64 |
| /* PR 282576: With shared_code_x86, GLOBAL_DCONTEXT no longer specifies |
| * a unique generated_code_t. Rather than add GLOBAL_DCONTEXT_X86 everywhere, |
| * we add mode parameters to a handful of routines that take in GLOBAL_DCONTEXT. |
| */ |
| /* FIXME i#1551: do we want separate Thumb vs ARM gencode, or we'll always |
| * transition? For fcache exit that's reasonable, but for ibl it would |
| * require two mode transitions. |
| */ |
| typedef enum { |
| GENCODE_X64 = 0, |
| GENCODE_X86, |
| GENCODE_X86_TO_X64, |
| GENCODE_FROM_DCONTEXT, |
| } gencode_mode_t; |
| # define FRAGMENT_GENCODE_MODE(fragment_flags) \ |
| (FRAG_IS_32(fragment_flags) ? GENCODE_X86 : \ |
| (FRAG_IS_X86_TO_X64(fragment_flags) ? GENCODE_X86_TO_X64 : GENCODE_X64)) |
| # define SHARED_GENCODE(gencode_mode) get_shared_gencode(GLOBAL_DCONTEXT, gencode_mode) |
| # define SHARED_GENCODE_MATCH_THREAD(dc) get_shared_gencode(dc, GENCODE_FROM_DCONTEXT) |
| # define THREAD_GENCODE(dc) get_emitted_routines_code(dc, GENCODE_FROM_DCONTEXT) |
| # define GENCODE_IS_X64(gencode_mode) ((gencode_mode) == GENCODE_X64) |
| # define GENCODE_IS_X86(gencode_mode) ((gencode_mode) == GENCODE_X86) |
| # define GENCODE_IS_X86_TO_X64(gencode_mode) ((gencode_mode) == GENCODE_X86_TO_X64) |
| #else |
| # define SHARED_GENCODE(b) get_shared_gencode(GLOBAL_DCONTEXT) |
| # define THREAD_GENCODE(dc) get_emitted_routines_code(dc) |
| # define SHARED_GENCODE_MATCH_THREAD(dc) get_shared_gencode(dc) |
| #endif |
| |
| #define NUM_XMM_REGS NUM_XMM_SAVED |
| #define NUM_GP_REGS DR_NUM_GPR_REGS |
| |
| /* Information about each individual clean call invocation site. |
| * The whole struct is set to 0 at init time. |
| */ |
| typedef struct _clean_call_info_t { |
| void *callee; |
| uint num_args; |
| bool save_fpstate; |
| bool opt_inline; |
| bool should_align; |
| bool save_all_regs; |
| bool skip_save_aflags; |
| bool skip_clear_eflags; |
| uint num_xmms_skip; |
| bool xmm_skip[NUM_XMM_REGS]; |
| uint num_regs_skip; |
| bool reg_skip[NUM_GP_REGS]; |
| bool preserve_mcontext; /* even if skip reg save, preserve mcontext shape */ |
| bool out_of_line_swap; /* whether we use clean_call_{save,restore} gencode */ |
| void *callee_info; /* callee information */ |
| instrlist_t *ilist; /* instruction list for inline optimization */ |
| } clean_call_info_t; |
| |
| cache_pc get_ibl_routine_ex(dcontext_t *dcontext, ibl_entry_point_type_t entry_type, |
| ibl_source_fragment_type_t source_fragment_type, |
| ibl_branch_type_t branch_type _IF_X64(gencode_mode_t mode)); |
| cache_pc get_ibl_routine(dcontext_t *dcontext, ibl_entry_point_type_t entry_type, |
| ibl_source_fragment_type_t source_fragment_type, |
| ibl_branch_type_t branch_type); |
| cache_pc get_ibl_routine_template(dcontext_t *dcontext, |
| ibl_source_fragment_type_t source_fragment_type, |
| ibl_branch_type_t branch_type |
| _IF_X64(gencode_mode_t mode)); |
| bool get_ibl_routine_type(dcontext_t *dcontext, cache_pc target, ibl_type_t *type); |
| bool get_ibl_routine_type_ex(dcontext_t *dcontext, cache_pc target, ibl_type_t *type |
| _IF_X64(gencode_mode_t *mode_out)); |
| const char *get_ibl_routine_name(dcontext_t *dcontext, cache_pc target, |
| const char **ibl_brtype_name); |
| cache_pc get_trace_ibl_routine(dcontext_t *dcontext, cache_pc current_entry); |
| cache_pc get_private_ibl_routine(dcontext_t *dcontext, cache_pc current_entry); |
| cache_pc get_shared_ibl_routine(dcontext_t *dcontext, cache_pc current_entry); |
| cache_pc get_alternate_ibl_routine(dcontext_t *dcontext, cache_pc current_entry, |
| uint flags); |
| |
| ibl_source_fragment_type_t |
| get_source_fragment_type(dcontext_t *dcontext, uint fragment_flags); |
| |
| const char *get_target_delete_entry_name(dcontext_t *dcontext, |
| cache_pc target, |
| const char **ibl_brtype_name); |
| |
| #define GET_IBL_TARGET_TABLE(branch_type, target_trace_table) \ |
| ((target_trace_table) ? offsetof(per_thread_t, trace_ibt[(branch_type)]) : \ |
| offsetof(per_thread_t, bb_ibt[(branch_type)])) |
| |
| |
| #ifdef WINDOWS |
| /* PR 282576: These separate routines are ugly, but less ugly than adding param to |
| * after_shared_syscall_code(), which is called in many places and usually passed a |
| * non-global dcontext; also less ugly than adding GLOBAL_DCONTEXT_X86. |
| */ |
| cache_pc |
| shared_syscall_routine_ex(dcontext_t *dcontext _IF_X64(gencode_mode_t mode)); |
| cache_pc |
| unlinked_shared_syscall_routine_ex(dcontext_t *dcontext _IF_X64(gencode_mode_t mode)); |
| cache_pc shared_syscall_routine(dcontext_t *dcontext); |
| cache_pc unlinked_shared_syscall_routine(dcontext_t *dcontext); |
| #endif |
| #ifdef TRACE_HEAD_CACHE_INCR |
| cache_pc trace_head_incr_routine(dcontext_t *dcontext); |
| cache_pc trace_head_incr_shared_routine(IF_X64(gencode_mode_t mode)); |
| #endif |
| |
| /* in mangle_shared.c */ |
| /* What prepare_for_clean_call() adds to xsp beyond sizeof(priv_mcontext_t) */ |
| static inline int |
| clean_call_beyond_mcontext(void) |
| { |
| return 0; /* no longer adding anything */ |
| } |
| void |
| clean_call_info_init(clean_call_info_t *cci, void *callee, |
| bool save_fpstate, uint num_args); |
| void mangle(dcontext_t *dcontext, instrlist_t *ilist, uint *flags INOUT, |
| bool mangle_calls, bool record_translation); |
| bool |
| parameters_stack_padded(void); |
| /* Inserts a complete call to callee with the passed-in arguments */ |
| bool |
| insert_meta_call_vargs(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, |
| bool clean_call, byte *encode_pc, void *callee, |
| uint num_args, opnd_t *args); |
| void |
| mangle_init(void); |
| void |
| mangle_exit(void); |
| void |
| insert_mov_immed_ptrsz(dcontext_t *dcontext, ptr_int_t val, opnd_t dst, |
| instrlist_t *ilist, instr_t *instr, |
| instr_t **first OUT, instr_t **second OUT); |
| void |
| insert_push_immed_ptrsz(dcontext_t *dcontext, ptr_int_t val, |
| instrlist_t *ilist, instr_t *instr, |
| instr_t **first OUT, instr_t **second OUT); |
| void |
| insert_mov_instr_addr(dcontext_t *dcontext, instr_t *src, byte *encode_estimate, |
| opnd_t dst, instrlist_t *ilist, instr_t *instr, |
| instr_t **first, instr_t **second); |
| void |
| insert_push_instr_addr(dcontext_t *dcontext, instr_t *src_inst, byte *encode_estimate, |
| instrlist_t *ilist, instr_t *instr, |
| instr_t **first, instr_t **second); |
| |
| /* in mangle.c arch-specific implementation */ |
| reg_id_t |
| shrink_reg_for_param(reg_id_t regular, opnd_t arg); |
| uint |
| insert_parameter_preparation(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, |
| bool clean_call, uint num_args, opnd_t *args); |
| void |
| insert_mov_immed_arch(dcontext_t *dcontext, instr_t *src_inst, byte *encode_estimate, |
| ptr_int_t val, opnd_t dst, |
| instrlist_t *ilist, instr_t *instr, |
| instr_t **first, instr_t **second); |
| void |
| insert_push_immed_arch(dcontext_t *dcontext, instr_t *src_inst, byte *encode_estimate, |
| ptr_int_t val, instrlist_t *ilist, instr_t *instr, |
| instr_t **first, instr_t **second); |
| void |
| mangle_syscall(dcontext_t *dcontext, instrlist_t *ilist, uint flags, |
| instr_t *instr, instr_t *next_instr); |
| void |
| mangle_interrupt(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, |
| instr_t *next_instr); |
| instr_t * |
| mangle_direct_call(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, |
| instr_t *next_instr, bool mangle_calls, uint flags); |
| void |
| mangle_indirect_call(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, |
| instr_t *next_instr, bool mangle_calls, uint flags); |
| void |
| mangle_return(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, |
| instr_t *next_instr, uint flags); |
| void |
| mangle_indirect_jump(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, |
| instr_t *next_instr, uint flags); |
| #if defined(X64) || defined(ARM) |
| bool |
| mangle_rel_addr(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, |
| instr_t *next_instr); |
| #endif |
| void mangle_insert_clone_code(dcontext_t *dcontext, instrlist_t *ilist, |
| instr_t *instr, bool skip |
| _IF_X64(gencode_mode_t mode)); |
| /* the stack size of a full context switch for clean call */ |
| int |
| get_clean_call_switch_stack_size(void); |
| /* extra temporarily-used stack usage beyond |
| * get_clean_call_switch_stack_size() |
| */ |
| int |
| get_clean_call_temp_stack_size(void); |
| void |
| insert_clear_eflags(dcontext_t *dcontext, clean_call_info_t *cci, |
| instrlist_t *ilist, instr_t *instr); |
| uint |
| insert_push_all_registers(dcontext_t *dcontext, clean_call_info_t *cci, |
| instrlist_t *ilist, instr_t *instr, |
| uint alignment, instr_t *push_pc); |
| void |
| insert_pop_all_registers(dcontext_t *dcontext, clean_call_info_t *cci, |
| instrlist_t *ilist, instr_t *instr, |
| uint alignment); |
| bool |
| insert_reachable_cti(dcontext_t *dcontext, instrlist_t *ilist, instr_t *where, |
| byte *encode_pc, byte *target, bool jmp, bool precise, |
| reg_id_t scratch, instr_t **inlined_tgt_instr); |
| void |
| insert_get_mcontext_base(dcontext_t *dcontext, instrlist_t *ilist, |
| instr_t *where, reg_id_t reg); |
| uint |
| prepare_for_clean_call(dcontext_t *dcontext, clean_call_info_t *cci, |
| instrlist_t *ilist, instr_t *instr); |
| void |
| cleanup_after_clean_call(dcontext_t *dcontext, clean_call_info_t *cci, |
| instrlist_t *ilist, instr_t *instr); |
| void convert_to_near_rel(dcontext_t *dcontext, instr_t *instr); |
| instr_t *convert_to_near_rel_meta(dcontext_t *dcontext, instrlist_t *ilist, |
| instr_t *instr); |
| #ifdef WINDOWS |
| bool |
| instr_is_call_sysenter_pattern(instr_t *call, instr_t *mov, instr_t *sysenter); |
| #endif |
| int find_syscall_num(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr); |
| |
| /* in mangle.c but not exported to non-x86 files */ |
| #ifdef X86 |
| int |
| insert_out_of_line_context_switch(dcontext_t *dcontext, instrlist_t *ilist, |
| instr_t *instr, bool save); |
| /* mangle the instruction that reference memory via segment register */ |
| void |
| mangle_seg_ref(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, |
| instr_t *next_instr); |
| /* mangle the instruction OP_mov_seg, i.e. the instruction that |
| * read/update the segment register. |
| */ |
| void |
| mangle_mov_seg(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, |
| instr_t *next_instr); |
| void |
| mangle_float_pc(dcontext_t *dcontext, instrlist_t *ilist, |
| instr_t *instr, instr_t *next_instr, uint *flags INOUT); |
| void |
| mangle_exit_cti_prefixes(dcontext_t *dcontext, instr_t *instr); |
| void |
| mangle_far_direct_jump(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, |
| instr_t *next_instr, uint flags); |
| void |
| set_selfmod_sandbox_offsets(dcontext_t *dcontext); |
| bool insert_selfmod_sandbox(dcontext_t *dcontext, instrlist_t *ilist, uint flags, |
| app_pc start_pc, app_pc end_pc, /* end is open */ |
| bool record_translation, bool for_cache); |
| #endif /* X86 */ |
| |
| /* offsets within local_state_t used for specific scratch purposes */ |
| enum { |
| /* ok for this guy to overlap w/ others since he is pre-cache */ |
| FCACHE_ENTER_TARGET_SLOT = TLS_SLOT_REG0, |
| /* FIXME: put register name in each enum name to avoid conflicts |
| * when mixed with raw slot names? |
| */ |
| /* ok for the next_tag and direct_stub to overlap as next_tag is |
| * used for sysenter shared syscall mangling, which uses an |
| * indirect stub. |
| */ |
| MANGLE_NEXT_TAG_SLOT = TLS_SLOT_REG0, |
| DIRECT_STUB_SPILL_SLOT = TLS_SLOT_REG0, |
| MANGLE_RIPREL_SPILL_SLOT = TLS_SLOT_REG0, |
| /* ok for far cti mangling/far ibl and stub/ibl xbx slot usage to overlap */ |
| INDIRECT_STUB_SPILL_SLOT = TLS_SLOT_REG1, |
| MANGLE_FAR_SPILL_SLOT = TLS_SLOT_REG1, |
| /* i#698: float_pc handling stores the mem addr of the float state here. We |
| * assume this slot is not touched on the fcache_return path. |
| */ |
| FLOAT_PC_STATE_SLOT = TLS_SLOT_REG1, |
| MANGLE_XCX_SPILL_SLOT = TLS_SLOT_REG2, |
| /* FIXME: edi is used as the base, yet I labeled this slot for edx |
| * since it's next in the progression -- change one or the other? |
| * (this is case 5239) |
| */ |
| DCONTEXT_BASE_SPILL_SLOT = TLS_SLOT_REG3, |
| PREFIX_XAX_SPILL_SLOT = TLS_SLOT_REG0, |
| #ifdef HASHTABLE_STATISTICS |
| HTABLE_STATS_SPILL_SLOT = TLS_HTABLE_STATS_SLOT, |
| #endif |
| }; |
| |
| /* in interp.c but not exported to non-x86 files */ |
| bool must_not_be_inlined(app_pc pc); |
| |
| /* A simple linker to give us indirection for patching after relocating structures */ |
| typedef struct patch_entry_t { |
| union { |
| instr_t *instr; /* used before instructions are encoded */ |
| size_t offset; /* offset in instruction stream */ |
| } where; |
| ptr_uint_t value_location_offset; /* location containing value to be updated */ |
| /* offset from dcontext->fragment_field (usually pt->trace.field), |
| * or an absolute address */ |
| ushort patch_flags; /* whether to use the address of location or its value */ |
| short instr_offset; /* desired offset within instruction, |
| negative offsets are from end of instruction */ |
| } patch_entry_t; |
| |
| enum { |
| MAX_PATCH_ENTRIES = |
| #ifdef HASHTABLE_STATISTICS |
| 6 + /* will need more only for statistics */ |
| #endif |
| 7, /* we use 5 normally, 7 w/ -atomic_inlined_linking and inlining */ |
| /* Patch entry flags */ |
| /* Patch offset entries for dynamic updates from input variables */ |
| PATCH_TAKE_ADDRESS = 0x01, /* use computed address if set, value at address otherwise */ |
| PATCH_PER_THREAD = 0x02, /* address is relative to the per_thread_t thread local field */ |
| PATCH_UNPROT_STAT = 0x04, /* address is (unprot_ht_statistics_t offs << 16) | (stats offs) */ |
| /* Patch offset markers update an output variable in encode_with_patch_list */ |
| PATCH_MARKER = 0x08, /* if set use only as a static marker */ |
| PATCH_ASSEMBLE_ABSOLUTE = 0x10, /* if set retrieve an absolute pc into given target address, |
| otherwise relative to start pc */ |
| PATCH_OFFSET_VALID = 0x20, /* if set use patch_entry_t.where.offset; |
| * else patch_entry_t.where.instr */ |
| PATCH_UINT_SIZED = 0x40, /* if set value is uint-sized; else pointer-sized */ |
| }; |
| |
| typedef enum { |
| PATCH_TYPE_ABSOLUTE = 0x0, /* link with absolute address, updated dynamically */ |
| PATCH_TYPE_INDIRECT_XDI = 0x1, /* linked with indirection through EDI, no updates */ |
| PATCH_TYPE_INDIRECT_FS = 0x2, /* linked with indirection through FS, no updates */ |
| } patch_list_type_t; |
| |
| typedef struct patch_list_t { |
| ushort num_relocations; |
| ushort /* patch_list_type_t */ type; |
| patch_entry_t entry[MAX_PATCH_ENTRIES]; |
| } patch_list_t; |
| |
| void |
| init_patch_list(patch_list_t *patch, patch_list_type_t type); |
| |
| void |
| add_patch_marker(patch_list_t *patch, instr_t *instr, ushort patch_flags, |
| short instr_offset, ptr_uint_t *target_offset /* OUT */); |
| |
| int |
| encode_with_patch_list(dcontext_t *dcontext, patch_list_t *patch, |
| instrlist_t *ilist, cache_pc start_pc); |
| |
| #ifdef X64 |
| /* Shouldn't need to mark as packed. We order for 6-byte little-endian selector:pc. */ |
| typedef struct _far_ref_t { |
| /* We target WOW64 and cross-plaform so no 8-byte Intel-only pc */ |
| uint pc; |
| ushort selector; |
| } far_ref_t; |
| #endif |
| |
| /* Defines book-keeping structures needed for an indirect branch lookup routine */ |
| typedef struct ibl_code_t { |
| bool initialized:1; /* currently only used for ibl routines */ |
| bool thread_shared_routine:1; |
| bool ibl_head_is_inlined:1; |
| byte *indirect_branch_lookup_routine; |
| /* for far ctis (i#823) */ |
| byte *far_ibl; |
| byte *far_ibl_unlinked; |
| #ifdef X64 |
| /* PR 257963: trace inline cmp has already saved eflags */ |
| byte *trace_cmp_entry; |
| byte *trace_cmp_unlinked; |
| bool x86_mode; /* Is this code for 32-bit (x86 mode)? */ |
| bool x86_to_x64_mode; /* Does this code use r8-r10 as scratch (for x86_to_x64)? */ |
| /* for far ctis (i#823) in mixed-mode (i#49) and x86_to_x64 mode (i#751) */ |
| far_ref_t far_jmp_opnd; |
| far_ref_t far_jmp_unlinked_opnd; |
| #endif |
| byte *unlinked_ibl_entry; |
| byte *target_delete_entry; |
| uint ibl_routine_length; |
| /* offsets into ibl routine */ |
| patch_list_t ibl_patch; |
| ibl_branch_type_t branch_type; |
| ibl_source_fragment_type_t source_fragment_type; |
| |
| /* bookkeeping for the inlined ibl stub template, if inlining */ |
| byte *inline_ibl_stub_template; |
| patch_list_t ibl_stub_patch; |
| uint inline_stub_length; |
| /* for atomic_inlined_linking we store the linkstub twice so need to update |
| * two offsets */ |
| uint inline_linkstub_first_offs; |
| uint inline_linkstub_second_offs; |
| uint inline_unlink_offs; |
| uint inline_linkedjmp_offs; |
| uint inline_unlinkedjmp_offs; |
| |
| #ifdef HASHTABLE_STATISTICS |
| /* need two offsets to get to stats, since in unprotected memory */ |
| uint unprot_stats_offset; |
| uint hashtable_stats_offset; |
| /* e.g. offsetof(per_thread_t, trace) + offsetof(ibl_table_t, bb_ibl_stats) */ |
| /* Note hashtable statistics are associated with the hashtable for easier use when sharing IBL routines */ |
| uint entry_stats_to_lookup_table_offset; /* offset to (entry_stats - lookup_table) */ |
| #endif |
| } ibl_code_t; |
| |
| /* special ibls */ |
| #define NUM_SPECIAL_IBL_XFERS 3 /* client_ibl and native_plt/ret__ibl */ |
| #define CLIENT_IBL_IDX 0 |
| #define NATIVE_PLT_IBL_IDX 1 |
| #define NATIVE_RET_IBL_IDX 2 |
| |
| /* Each thread needs its own copy of these routines, but not all |
| * routines here are created in a thread-private: we could save space |
| * by splitting into two separate structs. |
| * |
| * On x64, we only have thread-shared generated routines, |
| * including do_syscall and shared_syscall and detach's post-syscall |
| * continuation (PR 244737). |
| */ |
| typedef struct _generated_code_t { |
| byte *fcache_enter; |
| byte *fcache_return; |
| #ifdef WINDOWS_PC_SAMPLE |
| byte *fcache_enter_return_end; |
| #endif |
| |
| ibl_code_t trace_ibl[IBL_BRANCH_TYPE_END]; |
| ibl_code_t bb_ibl[IBL_BRANCH_TYPE_END]; |
| ibl_code_t coarse_ibl[IBL_BRANCH_TYPE_END]; |
| #ifdef WINDOWS_PC_SAMPLE |
| byte *ibl_routines_end; |
| #endif |
| |
| #ifdef WINDOWS |
| /* for the shared_syscalls option */ |
| ibl_code_t shared_syscall_code; |
| byte *shared_syscall; |
| byte *unlinked_shared_syscall; |
| byte *end_shared_syscall; /* just marks end */ |
| /* N.B.: these offsets are from the start of unlinked_shared_syscall, |
| * not from shared_syscall (which is later)!!! |
| */ |
| /* offsets into shared_syscall routine */ |
| uint sys_syscall_offs; |
| /* where to patch to unlink end of syscall thread-wide */ |
| uint sys_unlink_offs; |
| #endif |
| byte *do_syscall; |
| uint do_syscall_offs; /* offs of pc after actual syscall instr */ |
| #ifdef WINDOWS |
| byte *fcache_enter_indirect; |
| byte *do_callback_return; |
| #else |
| /* PR 286922: we both need an int and a sys{call,enter} do-syscall for |
| * 32-bit apps on 64-bit kernels. do_syscall is whatever is in |
| * vsyscall, while do_int_syscall is hardcoded to use OP_int. |
| */ |
| byte *do_int_syscall; |
| uint do_int_syscall_offs; /* offs of pc after actual syscall instr */ |
| /* These are for Mac but we avoid ifdefs for simplicity */ |
| byte *do_int81_syscall; |
| uint do_int81_syscall_offs; /* offs of pc after actual syscall instr */ |
| byte *do_int82_syscall; |
| uint do_int82_syscall_offs; /* offs of pc after actual syscall instr */ |
| byte *do_clone_syscall; |
| uint do_clone_syscall_offs; /* offs of pc after actual syscall instr */ |
| # ifdef VMX86_SERVER |
| byte *do_vmkuw_syscall; |
| uint do_vmkuw_syscall_offs; /* offs of pc after actual syscall instr */ |
| # endif |
| #endif |
| #ifdef UNIX |
| /* PR 212290: can't be static code in x86.asm since it can't be PIC */ |
| byte *new_thread_dynamo_start; |
| #endif |
| #ifdef TRACE_HEAD_CACHE_INCR |
| byte *trace_head_incr; |
| #endif |
| #ifdef CHECK_RETURNS_SSE2 |
| byte *pextrw; |
| byte *pinsrw; |
| #endif |
| #ifdef WINDOWS_PC_SAMPLE |
| profile_t *profile; |
| #endif |
| /* For control redirection from a syscall. |
| * We could make this shared-only and save some space, if we |
| * generated a shared fcache_return in all-private-fragment configs. |
| */ |
| byte *reset_exit_stub; |
| |
| /* Coarse-grain fragments don't have linkstubs and need custom routines. |
| * Direct exits use entrance stubs that record the target app pc, |
| * while coarse indirect stubs record the source cache cti. |
| */ |
| /* FIXME: these two return routines are only needed in the global struct */ |
| byte *fcache_return_coarse; |
| byte *trace_head_return_coarse; |
| /* special ibl xfer */ |
| byte *special_ibl_xfer[NUM_SPECIAL_IBL_XFERS]; |
| uint special_ibl_unlink_offs[NUM_SPECIAL_IBL_XFERS]; |
| /* i#171: out-of-line clean call context switch */ |
| byte *clean_call_save; |
| byte *clean_call_restore; |
| |
| bool thread_shared; |
| bool writable; |
| #ifdef X64 |
| gencode_mode_t gencode_mode; /* mode of this code (x64, x86, x86_to_x64) */ |
| #endif |
| |
| /* We store the start of the generated code for simplicity even |
| * though it is always right after this struct; if we really need |
| * to shrink 4 bytes we can remove this field and replace w/ |
| * ((char *)TPC_ptr) + sizeof(generated_code_t) |
| */ |
| byte *gen_start_pc; /* start of generated code */ |
| byte *gen_end_pc; /* end of generated code */ |
| byte *commit_end_pc; /* end of committed region */ |
| /* generated code follows, ends at gen_end_pc < commit_end_pc */ |
| } generated_code_t; |
| |
| /* thread-private generated code */ |
| fcache_enter_func_t fcache_enter_routine(dcontext_t *dcontext); |
| cache_pc fcache_return_routine(dcontext_t *dcontext); |
| cache_pc fcache_return_routine_ex(dcontext_t *dcontext _IF_X64(gencode_mode_t mode)); |
| |
| /* thread-shared generated code */ |
| byte * emit_fcache_enter_shared(dcontext_t *dcontext, generated_code_t *code, byte *pc); |
| byte * emit_fcache_return_shared(dcontext_t *dcontext, generated_code_t *code, byte *pc); |
| fcache_enter_func_t fcache_enter_shared_routine(dcontext_t *dcontext); |
| /* the fcache_return routines are queried by get_direct_exit_target and need more |
| * direct control than the dcontext |
| */ |
| cache_pc fcache_return_shared_routine(IF_X64(gencode_mode_t mode)); |
| |
| /* coarse-grain generated code */ |
| byte * emit_fcache_return_coarse(dcontext_t *dcontext, generated_code_t *code, byte *pc); |
| byte * emit_trace_head_return_coarse(dcontext_t *dcontext, generated_code_t *code, |
| byte *pc); |
| cache_pc fcache_return_coarse_routine(IF_X64(gencode_mode_t mode)); |
| cache_pc trace_head_return_coarse_routine(IF_X64(gencode_mode_t mode)); |
| |
| /* shared clean call context switch */ |
| bool client_clean_call_is_thread_private(); |
| cache_pc get_clean_call_save(dcontext_t *dcontext _IF_X64(gencode_mode_t mode)); |
| cache_pc get_clean_call_restore(dcontext_t *dcontext _IF_X64(gencode_mode_t mode)); |
| |
| void protect_generated_code(generated_code_t *code, bool writable); |
| |
| extern generated_code_t *shared_code; |
| #ifdef X64 |
| extern generated_code_t *shared_code_x86; |
| extern generated_code_t *shared_code_x86_to_x64; |
| #endif |
| |
| static inline bool |
| is_shared_gencode(generated_code_t *code) |
| { |
| if (code == NULL) /* since shared_code_x86 in particular can be NULL */ |
| return false; |
| #ifdef X64 |
| return code == shared_code_x86 || code == shared_code || |
| code == shared_code_x86_to_x64; |
| #else |
| return code == shared_code; |
| #endif |
| } |
| |
| static inline generated_code_t * |
| get_shared_gencode(dcontext_t *dcontext _IF_X64(gencode_mode_t mode)) |
| { |
| #ifdef X64 |
| ASSERT(mode != GENCODE_FROM_DCONTEXT || dcontext != GLOBAL_DCONTEXT |
| IF_INTERNAL(IF_CLIENT_INTERFACE(|| dynamo_exited))); |
| # if defined(INTERNAL) || defined(CLIENT_INTERFACE) |
| /* PR 302344: this is here only for tracedump_origins */ |
| if (dynamo_exited && mode == GENCODE_FROM_DCONTEXT && dcontext == GLOBAL_DCONTEXT) { |
| if (get_x86_mode(dcontext)) |
| return X64_CACHE_MODE_DC(dcontext) ? shared_code_x86_to_x64 : shared_code_x86; |
| else |
| return shared_code; |
| } |
| # endif |
| if (mode == GENCODE_X86) |
| return shared_code_x86; |
| else if (mode == GENCODE_X86_TO_X64) |
| return shared_code_x86_to_x64; |
| else if (mode == GENCODE_FROM_DCONTEXT && !X64_MODE_DC(dcontext)) |
| return X64_CACHE_MODE_DC(dcontext) ? shared_code_x86_to_x64 : shared_code_x86; |
| else |
| return shared_code; |
| #else |
| return shared_code; |
| #endif |
| } |
| |
| /* PR 244737: thread-private uses shared gencode on x64 */ |
| #define USE_SHARED_GENCODE_ALWAYS() IF_X64_ELSE(true, false) |
| /* PR 212570: on linux we need a thread-shared do_syscall for our vsyscall hook, |
| * if we have TLS and support sysenter (PR 361894) |
| */ |
| #define USE_SHARED_GENCODE() \ |
| (USE_SHARED_GENCODE_ALWAYS() || IF_UNIX(IF_HAVE_TLS_ELSE(true, false) ||) \ |
| SHARED_FRAGMENTS_ENABLED() || DYNAMO_OPTION(shared_trace_ibl_routine)) |
| |
| #define USE_SHARED_BB_IBL() \ |
| (USE_SHARED_GENCODE_ALWAYS() || DYNAMO_OPTION(shared_bbs)) |
| |
| #define USE_SHARED_TRACE_IBL() \ |
| (USE_SHARED_GENCODE_ALWAYS() || DYNAMO_OPTION(shared_traces) || \ |
| DYNAMO_OPTION(shared_trace_ibl_routine)) |
| |
| /* returns the thread private code or GLOBAL thread shared code */ |
| static inline generated_code_t* |
| get_emitted_routines_code(dcontext_t *dcontext _IF_X64(gencode_mode_t mode)) |
| { |
| generated_code_t *code; |
| /* This routine exists only because GLOBAL_DCONTEXT is not a real dcontext |
| * structure. Still, useful to wrap all references to private_code. */ |
| /* PR 244737: thread-private uses only shared gencode on x64 */ |
| /* PR 253431: to distinguish shared x86 gencode from x64 gencode, a dcontext |
| * must be passed in; use get_shared_gencode() for x64 builds */ |
| IF_X64(ASSERT(mode != GENCODE_FROM_DCONTEXT || dcontext != GLOBAL_DCONTEXT)); |
| if (USE_SHARED_GENCODE_ALWAYS() || |
| (USE_SHARED_GENCODE() && dcontext == GLOBAL_DCONTEXT)) { |
| code = get_shared_gencode(dcontext _IF_X64(mode)); |
| } else { |
| ASSERT(dcontext != GLOBAL_DCONTEXT); |
| /* NOTE thread private code entry points may also refer to shared |
| * routines */ |
| code = (generated_code_t *) dcontext->private_code; |
| } |
| return code; |
| } |
| |
| ibl_code_t *get_ibl_routine_code(dcontext_t *dcontext, ibl_branch_type_t branch_type, |
| uint fragment_flags); |
| ibl_code_t *get_ibl_routine_code_ex(dcontext_t *dcontext, ibl_branch_type_t branch_type, |
| uint fragment_flags _IF_X64(gencode_mode_t mode)); |
| |
| /* in emit_utils.c but not exported to non-x86 files */ |
| byte * emit_inline_ibl_stub(dcontext_t *dcontext, byte *pc, |
| ibl_code_t *ibl_code, bool target_trace_table); |
| byte * emit_fcache_enter(dcontext_t *dcontext, generated_code_t *code, byte *pc); |
| byte * emit_fcache_return(dcontext_t *dcontext, generated_code_t *code, byte *pc); |
| byte * emit_indirect_branch_lookup(dcontext_t *dcontext, generated_code_t *code, byte *pc, |
| byte *fcache_return_pc, |
| bool target_trace_table, |
| bool inline_ibl_head, |
| ibl_code_t *ibl_code); |
| void update_indirect_branch_lookup(dcontext_t *dcontext); |
| |
| byte *emit_far_ibl(dcontext_t *dcontext, byte *pc, ibl_code_t *ibl_code, cache_pc ibl_tgt |
| _IF_X64(far_ref_t *far_jmp_opnd)); |
| |
| #ifndef WINDOWS |
| void update_syscalls(dcontext_t *dcontext); |
| #endif |
| |
| #ifdef WINDOWS |
| /* FIXME If we widen the interface any further, do we want to use an options |
| * struct or OR-ed flags to replace the bool args? */ |
| byte * emit_shared_syscall(dcontext_t *dcontext, generated_code_t *code, byte *pc, |
| ibl_code_t *ibl_code, |
| patch_list_t *patch, |
| byte *ind_br_lookup_pc, |
| byte *unlinked_ib_lookup_pc, |
| bool target_trace_table, |
| bool inline_ibl_head, |
| bool thread_shared, |
| byte **shared_syscall_pc); |
| |
| byte * |
| emit_shared_syscall_dispatch(dcontext_t *dcontext, byte *pc); |
| |
| byte * |
| emit_unlinked_shared_syscall_dispatch(dcontext_t *dcontext, byte *pc); |
| |
| # ifdef CLIENT_INTERFACE |
| /* i#249: isolate app's PEB by keeping our own copy and swapping on cxt switch */ |
| void |
| preinsert_swap_peb(dcontext_t *dcontext, instrlist_t *ilist, instr_t *next, |
| bool absolute, reg_id_t reg_dr, reg_id_t reg_scratch, bool to_priv); |
| # endif |
| |
| void emit_patch_syscall(dcontext_t *dcontext, byte *target _IF_X64(gencode_mode_t mode)); |
| #endif /* WINDOWS */ |
| |
| byte * emit_do_syscall(dcontext_t *dcontext, generated_code_t *code, byte *pc, |
| byte *fcache_return_pc, bool thread_shared, int interrupt, |
| uint *syscall_offs /*OUT*/); |
| |
| #ifdef WINDOWS |
| /* PR 282576: These separate routines are ugly, but less ugly than adding param to |
| * the main routines, which are called in many places and usually passed a |
| * non-global dcontext; also less ugly than adding GLOBAL_DCONTEXT_X86. |
| */ |
| cache_pc |
| after_shared_syscall_code_ex(dcontext_t *dcontext _IF_X64(gencode_mode_t mode)); |
| cache_pc |
| after_do_syscall_code_ex(dcontext_t *dcontext _IF_X64(gencode_mode_t mode)); |
| |
| byte * emit_fcache_enter_indirect(dcontext_t *dcontext, generated_code_t *code, byte *pc, |
| byte *fcache_return_pc); |
| byte * emit_do_callback_return(dcontext_t *dcontext, byte *pc, |
| byte *fcache_return_pc, bool thread_shared); |
| #else |
| byte * emit_do_clone_syscall(dcontext_t *dcontext, generated_code_t *code, byte *pc, |
| byte *fcache_return_pc, bool thread_shared, |
| uint *syscall_offs /*OUT*/); |
| # ifdef VMX86_SERVER |
| byte * emit_do_vmkuw_syscall(dcontext_t *dcontext, generated_code_t *code, byte *pc, |
| byte *fcache_return_pc, bool thread_shared, |
| uint *syscall_offs /*OUT*/); |
| # endif |
| #endif |
| |
| #ifdef UNIX |
| byte * |
| emit_new_thread_dynamo_start(dcontext_t *dcontext, byte *pc); |
| |
| cache_pc get_new_thread_start(dcontext_t *dcontext _IF_X64(gencode_mode_t mode)); |
| #endif |
| |
| #ifdef TRACE_HEAD_CACHE_INCR |
| byte *emit_trace_head_incr(dcontext_t *dcontext, byte *pc, |
| byte *fcache_return_pc); |
| byte * |
| emit_trace_head_incr_shared(dcontext_t *dcontext, byte *pc, byte *fcache_return_pc); |
| #endif |
| |
| #ifdef CLIENT_INTERFACE |
| byte * |
| emit_client_ibl_xfer(dcontext_t *dcontext, byte *pc, generated_code_t *code); |
| #endif |
| |
| #ifdef UNIX |
| byte * |
| emit_native_plt_ibl_xfer(dcontext_t *dcontext, byte *pc, generated_code_t *code); |
| |
| byte * |
| emit_native_ret_ibl_xfer(dcontext_t *dcontext, byte *pc, generated_code_t *code); |
| #endif |
| |
| /* clean calls are used by core DR: native_exec, so not in CLIENT_INTERFACE */ |
| byte * |
| emit_clean_call_save(dcontext_t *dcontext, byte *pc, generated_code_t *code); |
| |
| byte * |
| emit_clean_call_restore(dcontext_t *dcontext, byte *pc, generated_code_t *code); |
| |
| void |
| insert_save_eflags(dcontext_t *dcontext, instrlist_t *ilist, instr_t *where, |
| uint flags, bool tls, bool absolute _IF_X64(bool x86_to_x64_ibl_opt)); |
| void |
| insert_restore_eflags(dcontext_t *dcontext, instrlist_t *ilist, instr_t *where, |
| uint flags, bool tls, bool absolute |
| _IF_X64(bool x86_to_x64_ibl_opt)); |
| |
| instr_t * create_syscall_instr(dcontext_t *dcontext); |
| |
| void |
| insert_shared_get_dcontext(dcontext_t *dcontext, instrlist_t *ilist, instr_t *where, |
| bool save_xdi); |
| |
| void |
| insert_shared_restore_dcontext_reg(dcontext_t *dcontext, instrlist_t *ilist, |
| instr_t *where); |
| |
| /* in optimize.c */ |
| instr_t *find_next_self_loop(dcontext_t *dcontext, app_pc tag, instr_t *instr); |
| void replace_inst(dcontext_t *dcontext, instrlist_t *ilist, instr_t *old, instr_t *new); |
| void remove_redundant_loads(dcontext_t *dcontext, app_pc tag, |
| instrlist_t *trace); |
| void remove_dead_code(dcontext_t *dcontext, app_pc tag, |
| instrlist_t *trace); |
| |
| #ifdef CHECK_RETURNS_SSE2 |
| /* in retcheck.c */ |
| void check_return_handle_call(dcontext_t *dcontext, instrlist_t *ilist, instr_t *next); |
| void check_return_handle_return(dcontext_t *dcontext, instrlist_t *ilist, instr_t *next); |
| void check_return_too_deep(dcontext_t *dcontext, |
| volatile int errno, volatile reg_t eflags, |
| volatile reg_t reg_edi, volatile reg_t reg_esi, |
| volatile reg_t reg_ebp, volatile reg_t reg_esp, |
| volatile reg_t reg_ebx, volatile reg_t reg_edx, |
| volatile reg_t reg_ecx, volatile reg_t reg_eax); |
| void check_return_too_shallow(dcontext_t *dcontext, |
| volatile int errno, volatile reg_t eflags, |
| volatile reg_t reg_edi, volatile reg_t reg_esi, |
| volatile reg_t reg_ebp, volatile reg_t reg_esp, |
| volatile reg_t reg_ebx, volatile reg_t reg_edx, |
| volatile reg_t reg_ecx, volatile reg_t reg_eax); |
| void check_return_ra_mangled(dcontext_t *dcontext, |
| volatile int errno, volatile reg_t eflags, |
| volatile reg_t reg_edi, volatile reg_t reg_esi, |
| volatile reg_t reg_ebp, volatile reg_t reg_esp, |
| volatile reg_t reg_ebx, volatile reg_t reg_edx, |
| volatile reg_t reg_ecx, volatile reg_t reg_eax); |
| #endif |
| |
| #ifdef UNIX |
| void new_thread_setup(priv_mcontext_t *mc); |
| # ifdef MACOS |
| void new_bsdthread_setup(priv_mcontext_t *mc); |
| # endif |
| #endif |
| |
| void |
| get_xmm_vals(priv_mcontext_t *mc); |
| |
| /* i#350: Fast safe_read without dcontext. On success or failure, returns the |
| * current source pointer. Requires fault handling to be set up. |
| */ |
| void *safe_read_asm(void *dst, const void *src, size_t size); |
| /* These are labels, not function pointers. We declare them as functions to |
| * prevent loads and stores to these globals from compiling. |
| */ |
| void safe_read_asm_pre(void); |
| void safe_read_asm_mid(void); |
| void safe_read_asm_post(void); |
| void safe_read_asm_recover(void); |
| |
| /* from x86.asm */ |
| /* Note these have specialized calling conventions and shouldn't be called from |
| * C code (see comments in x86.asm). */ |
| void global_do_syscall_sysenter(void); |
| void global_do_syscall_int(void); |
| void global_do_syscall_sygate_int(void); |
| void global_do_syscall_sygate_sysenter(void); |
| # ifdef WINDOWS |
| void global_do_syscall_wow64(void); |
| void global_do_syscall_wow64_index0(void); |
| # endif |
| #ifdef X64 |
| void global_do_syscall_syscall(void); |
| #endif |
| void get_xmm_caller_saved(dr_ymm_t *xmm_caller_saved_buf); |
| void get_ymm_caller_saved(dr_ymm_t *ymm_caller_saved_buf); |
| /* returns the value of mmx register #index in val */ |
| void get_mmx_val(OUT uint64 *val, uint index); |
| |
| /* in encode.c */ |
| byte *instr_encode_ignore_reachability(dcontext_t *dcontext_t, instr_t *instr, byte *pc); |
| byte *instr_encode_check_reachability(dcontext_t *dcontext_t, instr_t *instr, byte *pc, |
| bool *has_instr_opnds/*OUT OPTIONAL*/); |
| byte *copy_and_re_relativize_raw_instr(dcontext_t *dcontext, instr_t *instr, |
| byte *dst_pc, byte *final_pc); |
| |
| /* in instr_shared.c */ |
| uint |
| move_mm_reg_opcode(bool aligned16, bool aligned32); |
| |
| /* clean call optimization */ |
| /* Describes usage of a scratch slot. */ |
| enum { |
| SLOT_NONE = 0, |
| SLOT_REG, |
| SLOT_LOCAL, |
| SLOT_FLAGS, |
| }; |
| typedef byte slot_kind_t; |
| |
| /* If kind is: |
| * SLOT_REG: value is a reg_id_t |
| * SLOT_LOCAL: value is meaningless, may change to support multiple locals |
| * SLOT_FLAGS: value is meaningless |
| */ |
| typedef struct _slot_t { |
| slot_kind_t kind; |
| reg_id_t value; |
| } slot_t; |
| |
| /* data structure of clean call callee information. */ |
| typedef struct _callee_info_t { |
| bool bailout; /* if we bail out on function analysis */ |
| uint num_args; /* number of args that will passed in */ |
| int num_instrs; /* total number of instructions of a function */ |
| app_pc start; /* entry point of a function */ |
| app_pc bwd_tgt; /* earliest backward branch target */ |
| app_pc fwd_tgt; /* last forward branch target */ |
| int num_xmms_used; /* number of xmms used by callee */ |
| bool xmm_used[NUM_XMM_REGS]; /* xmm/ymm registers usage */ |
| bool reg_used[NUM_GP_REGS]; /* general purpose registers usage */ |
| int num_callee_save_regs; /* number of regs callee saved */ |
| bool callee_save_regs[NUM_GP_REGS]; /* callee-save registers */ |
| bool has_locals; /* if reference local via stack */ |
| bool xbp_is_fp; /* if xbp is used as frame pointer */ |
| bool opt_inline; /* can be inlined or not */ |
| bool write_aflags; /* if the function changes aflags */ |
| bool read_aflags; /* if the function reads aflags from caller */ |
| bool tls_used; /* application accesses TLS (errno, etc.) */ |
| reg_id_t spill_reg; /* base register for spill slots */ |
| uint slots_used; /* scratch slots needed after analysis */ |
| slot_t scratch_slots[CLEANCALL_NUM_INLINE_SLOTS]; /* scratch slot allocation */ |
| instrlist_t *ilist; /* instruction list of function for inline. */ |
| } callee_info_t; |
| extern callee_info_t default_callee_info; |
| extern clean_call_info_t default_clean_call_info; |
| |
| /* in clean_call_opt.c */ |
| #ifdef CLIENT_INTERFACE |
| void |
| clean_call_opt_init(void); |
| void |
| clean_call_opt_exit(void); |
| #endif /* CLIENT_INTERFACE */ |
| bool |
| analyze_clean_call(dcontext_t *dcontext, clean_call_info_t *cci, instr_t *where, |
| void *callee, bool save_fpstate, uint num_args, opnd_t *args); |
| void |
| insert_inline_clean_call(dcontext_t *dcontext, clean_call_info_t *cci, |
| instrlist_t *ilist, instr_t *where, opnd_t *args); |
| |
| |
| /* in mangle.c */ |
| void |
| insert_push_retaddr(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, |
| ptr_int_t retaddr, opnd_size_t opsize); |
| |
| ptr_uint_t |
| get_call_return_address(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr); |
| |
| #ifdef X64 |
| /* in x86_to_x64.c */ |
| void |
| translate_x86_to_x64(dcontext_t *dcontext, instrlist_t *ilist, INOUT instr_t **instr); |
| #endif |
| |
| /**************************************************************************** |
| * Platform-independent emit_utils_shared.c |
| */ |
| /* add an instruction to patch list and address of location for future updates */ |
| /* Use the type checked wrappers add_patch_entry or add_patch_marker */ |
| void |
| add_patch_entry_internal(patch_list_t *patch, instr_t *instr, ushort patch_flags, |
| short instruction_offset, |
| ptr_uint_t value_location_offset); |
| cache_pc |
| get_direct_exit_target(dcontext_t *dcontext, uint flags); |
| void |
| link_indirect_exit_arch(dcontext_t *dcontext, fragment_t *f, |
| linkstub_t *l, bool hot_patch, |
| app_pc target_tag); |
| cache_pc |
| exit_cti_disp_pc(cache_pc branch_pc); |
| void |
| append_ibl_found(dcontext_t *dcontext, instrlist_t *ilist, |
| ibl_code_t *ibl_code, patch_list_t *patch, |
| uint start_pc_offset, |
| bool collision, |
| bool only_spill_state_in_tls, /* if true, no table info in TLS; |
| * indirection off of XDI is used */ |
| bool restore_eflags, |
| instr_t **fragment_found); |
| #ifdef HASHTABLE_STATISTICS |
| # define HASHLOOKUP_STAT_OFFS(event) (offsetof(hashtable_statistics_t, event##_stat)) |
| void |
| append_increment_counter(dcontext_t *dcontext, instrlist_t *ilist, |
| ibl_code_t *ibl_code, patch_list_t *patch, |
| reg_id_t entry_register, /* register indirect (XCX) or NULL */ |
| /* adjusted to unprot_ht_statistics_t if no entry_register */ |
| uint counter_offset, |
| reg_id_t scratch_register); |
| #endif |
| /* we are sharing bbs w/o ibs -- we assume that a bb |
| * w/ a direct branch cannot have an ib and thus is shared |
| */ |
| #ifdef TRACE_HEAD_CACHE_INCR |
| /* incr routine can't tell whether coming from shared bb |
| * or non-shared fragment (such as a trace) so must always |
| * use shared stubs |
| */ |
| # define FRAG_DB_SHARED(flags) true |
| #else |
| # define FRAG_DB_SHARED(flags) (TEST(FRAG_SHARED, (flags))) |
| #endif |
| |
| /* fragment_t fields */ |
| #define FRAGMENT_TAG_OFFS (offsetof(fragment_t, tag)) |
| |
| enum {PREFIX_SIZE_RESTORE_OF = 2, /* add $0x7f, %al */ |
| PREFIX_SIZE_FIVE_EFLAGS = 1, /* SAHF */ |
| }; |
| |
| /* PR 244737: x64 always uses tls even if all-private */ |
| #define IBL_EFLAGS_IN_TLS() (IF_X64_ELSE(true, SHARED_IB_TARGETS())) |
| |
| /* use indirect branch target prefix? */ |
| static inline bool |
| use_ibt_prefix(uint flags) |
| { |
| /* when no traces, all bbs use IBT prefix */ |
| /* FIXME: currently to allow bb2bb we simply have a prefix on all BB's |
| * should experiment with a shorter prefix for targetting BBs |
| * by restoring the flags in the IBL routine, |
| * or even jump through memory to avoid having the register restore prefix |
| * Alternatively, we can reemit a fragment only once it is known to be an IBL target, |
| * assuming the majority will be reached with an IB when they are first built. |
| * (Simplest counterexample is of a return from a function with no arguments |
| * called within a conditional, but the cache compaction of not having |
| * prefixes on all bb's may offset this double emit). |
| * All of these are covered by case 147. |
| */ |
| return (IS_IBL_TARGET(flags) && |
| /* coarse bbs (and fine in presence of coarse) do not support prefixes */ |
| !(DYNAMO_OPTION(coarse_units) && |
| !TEST(FRAG_IS_TRACE, flags) && |
| DYNAMO_OPTION(bb_ibl_targets))); |
| } |
| |
| static inline bool |
| ibl_use_target_prefix(ibl_code_t *ibl_code) |
| { |
| return !(DYNAMO_OPTION(coarse_units) && |
| /* If coarse units are enabled we need to have no prefix |
| * for both fine and coarse bbs |
| */ |
| ((ibl_code->source_fragment_type == IBL_COARSE_SHARED && |
| DYNAMO_OPTION(bb_ibl_targets)) || |
| (IS_IBL_BB(ibl_code->source_fragment_type) && |
| /* FIXME case 147/9636: if -coarse_units -bb_ibl_targets |
| * but traces are enabled, we won't put prefixes on regular |
| * bbs but will assume we have them here! We don't support |
| * that combination yet. When we do this routine should return |
| * another bit of info: whether to do two separate lookups. |
| */ |
| DYNAMO_OPTION(disable_traces) && DYNAMO_OPTION(bb_ibl_targets)))); |
| } |
| |
| /* add an instruction to patch list and address of location for future updates */ |
| static inline void |
| add_patch_entry(patch_list_t *patch, instr_t *instr, ushort patch_flags, |
| ptr_uint_t value_location_offset) |
| { |
| add_patch_entry_internal(patch, instr, patch_flags, -4 /* offset of imm32 argument */, |
| value_location_offset); |
| } |
| |
| /**************************************************************************** |
| * Platform-specific {x86/arm}/emit_utils.c |
| */ |
| /* macros shared by fcache_enter and fcache_return |
| * in order to generate both thread-private code that uses absolute |
| * addressing and thread-shared or dcontext-shared code that uses |
| * scratch_reg5(xdi/r5) (and scratch_reg4(xsi/r4)) for addressing. |
| * The via_reg macros now auto-magically pick the opnd size from the |
| * target register and so work with more than just pointer-sized values. |
| */ |
| /* PR 244737: even thread-private fragments use TLS on x64. We accomplish |
| * that at the caller site, so we should never see an "absolute" request. |
| */ |
| #define RESTORE_FROM_DC(dc, reg, offs) \ |
| RESTORE_FROM_DC_VIA_REG(absolute, dc, REG_NULL, reg, offs) |
| /* Note the magic absolute boolean that callers are expected to have declared */ |
| #define SAVE_TO_DC(dc, reg, offs) \ |
| SAVE_TO_DC_VIA_REG(absolute, dc, REG_NULL, reg, offs) |
| |
| #define OPND_TLS_FIELD(offs) opnd_create_tls_slot(os_tls_offset(offs)) |
| |
| #define OPND_TLS_FIELD_SZ(offs, sz) \ |
| opnd_create_sized_tls_slot(os_tls_offset(offs), sz) |
| |
| #define SAVE_TO_TLS(dc, reg, offs) \ |
| instr_create_save_to_tls(dc, reg, offs) |
| #define RESTORE_FROM_TLS(dc, reg, offs) \ |
| instr_create_restore_from_tls(dc, reg, offs) |
| |
| #define SAVE_TO_REG(dc, reg, spill) \ |
| instr_create_save_to_reg(dc, reg, spill) |
| #define RESTORE_FROM_REG(dc, reg, spill) \ |
| instr_create_restore_from_reg(dc, reg, spill) |
| |
| #define OPND_DC_FIELD(absolute, dcontext, sz, offs) \ |
| ((absolute) ? (IF_X64_(ASSERT_NOT_IMPLEMENTED(false)) \ |
| opnd_create_dcontext_field_sz(dcontext, (offs), (sz))) : \ |
| opnd_create_dcontext_field_via_reg_sz((dcontext), REG_NULL, (offs), (sz))) |
| |
| /* PR 244737: even thread-private fragments use TLS on x64. We accomplish |
| * that at the caller site, so we should never see an "absolute" request. |
| */ |
| #define RESTORE_FROM_DC_VIA_REG(absolute, dc, reg_dr, reg, offs) \ |
| ((absolute) ? (IF_X64_(ASSERT_NOT_IMPLEMENTED(false)) \ |
| instr_create_restore_from_dcontext((dc), (reg), (offs))) : \ |
| instr_create_restore_from_dc_via_reg((dc), reg_dr, (reg), (offs))) |
| /* Note the magic absolute boolean that callers are expected to have declared */ |
| #define SAVE_TO_DC_VIA_REG(absolute, dc, reg_dr, reg, offs) \ |
| ((absolute) ? (IF_X64_(ASSERT_NOT_IMPLEMENTED(false)) \ |
| instr_create_save_to_dcontext((dc), (reg), (offs))) : \ |
| instr_create_save_to_dc_via_reg((dc), reg_dr, (reg), (offs))) |
| |
| void |
| append_call_exit_dr_hook(dcontext_t *dcontext, instrlist_t *ilist, |
| bool absolute, bool shared); |
| void |
| append_restore_xflags(dcontext_t *dcontext, instrlist_t *ilist, bool absolute); |
| void |
| append_restore_simd_reg(dcontext_t *dcontext, instrlist_t *ilist, bool absolute); |
| void |
| append_restore_gpr(dcontext_t *dcontext, instrlist_t *ilist, bool absolute); |
| void |
| append_save_gpr(dcontext_t *dcontext, instrlist_t *ilist, bool ibl_end, bool absolute, |
| generated_code_t *code, linkstub_t *linkstub, bool coarse_info); |
| void |
| append_save_simd_reg(dcontext_t *dcontext, instrlist_t *ilist, bool absolute); |
| void |
| append_save_clear_xflags(dcontext_t *dcontext, instrlist_t *ilist, bool absolute); |
| bool |
| append_call_enter_dr_hook(dcontext_t *dcontext, instrlist_t *ilist, |
| bool ibl_end, bool absolute); |
| bool |
| append_fcache_return_common(dcontext_t *dcontext, generated_code_t *code, |
| instrlist_t *ilist, bool ibl_end, |
| bool absolute, bool shared, linkstub_t *linkstub, |
| bool coarse_info); |
| void |
| append_ibl_head(dcontext_t *dcontext, instrlist_t *ilist, |
| ibl_code_t *ibl_code, patch_list_t *patch, |
| instr_t **fragment_found, instr_t **compare_tag_inst, |
| instr_t **post_eflags_save, |
| opnd_t miss_tgt, bool miss_8bit, |
| bool target_trace_table, |
| bool inline_ibl_head); |
| #ifdef X64 |
| void |
| instrlist_convert_to_x86(instrlist_t *ilist); |
| #endif |
| |
| #endif /* X86_ARCH_H */ |