| /* ********************************************************** |
| * Copyright (c) 2011-2014 Google, Inc. All rights reserved. |
| * Copyright (c) 2001-2010 VMware, Inc. All rights reserved. |
| * **********************************************************/ |
| |
| /* |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are met: |
| * |
| * * Redistributions of source code must retain the above copyright notice, |
| * this list of conditions and the following disclaimer. |
| * |
| * * Redistributions in binary form must reproduce the above copyright notice, |
| * this list of conditions and the following disclaimer in the documentation |
| * and/or other materials provided with the distribution. |
| * |
| * * Neither the name of VMware, Inc. nor the names of its contributors may be |
| * used to endorse or promote products derived from this software without |
| * specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| * ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE |
| * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
| * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
| * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH |
| * DAMAGE. |
| */ |
| |
| /* Copyright (c) 2003-2007 Determina Corp. */ |
| /* Copyright (c) 2001-2003 Massachusetts Institute of Technology */ |
| /* Copyright (c) 2001 Hewlett-Packard Company */ |
| |
| /* |
| * interp.c - interpreter used for native trace selection |
| */ |
| |
| #include "../globals.h" |
| #include "../link.h" |
| #include "../fragment.h" |
| #include "../emit.h" |
| #include "../dispatch.h" |
| #include "../fcache.h" |
| #include "../monitor.h" /* for trace_abort and monitor_data_t */ |
| #include "arch.h" |
| #include "instr.h" |
| #include "instr_create.h" |
| #include "instrlist.h" |
| #include "decode.h" |
| #include "decode_fast.h" |
| #include "disassemble.h" |
| #include <string.h> /* for memcpy */ |
| #include "instrument.h" |
| #include "../hotpatch.h" |
| #ifdef RETURN_AFTER_CALL |
| # include "../rct.h" |
| #endif |
| #ifdef WINDOWS |
| # include "ntdll.h" /* for EXCEPTION_REGISTRATION */ |
| # include "../nudge.h" /* for generic_nudge_target() address */ |
| #endif |
| #include "../perscache.h" |
| #include "../native_exec.h" |
| |
| #ifdef CHECK_RETURNS_SSE2 |
| #include <setjmp.h> /* for warning when see libc setjmp */ |
| #endif |
| |
| #ifdef VMX86_SERVER |
| # include "vmkuw.h" /* VMKUW_SYSCALL_GATEWAY */ |
| #endif |
| |
| #ifdef ANNOTATIONS |
| # include "../annotations.h" |
| #endif |
| |
| enum { DIRECT_XFER_LENGTH = 5 }; |
| |
| /* forward declarations */ |
| static void process_nops_for_trace(dcontext_t *dcontext, instrlist_t *ilist, uint flags |
| _IF_DEBUG(bool recreating)); |
| static int fixup_last_cti(dcontext_t *dcontext, instrlist_t *trace, |
| app_pc next_tag, uint next_flags, uint trace_flags, |
| fragment_t *prev_f, linkstub_t *prev_l, |
| bool record_translation, uint *num_exits_deleted/*OUT*/, |
| /* If non-NULL, only looks inside trace between these two */ |
| instr_t *start_instr, instr_t *end_instr); |
| bool mangle_trace(dcontext_t *dcontext, instrlist_t *ilist, monitor_data_t *md); |
| |
| /* we use a branch limit of 1 to make it easier for the trace |
| * creation mechanism to stitch basic blocks together |
| */ |
| #define BRANCH_LIMIT 1 |
| |
| /* we limit total bb size to handle cases like infinite loop or sequence |
| * of calls. |
| * also, we have a limit on fragment body sizes, which should be impossible |
| * to break since x86 instrs are max 17 bytes and we only modify ctis. |
| * Although...selfmod mangling does really expand fragments! |
| * -selfmod_max_writes helps for selfmod bbs (case 7893/7909). |
| * System call mangling is also large, for degenerate cases like tests/linux/infinite. |
| * PR 215217: also client additions: we document and assert. |
| * FIXME: need better way to know how big will get, b/c we can construct |
| * cases that will trigger the size assertion! |
| */ |
| /* define replaced by -max_bb_instrs option */ |
| |
| /* exported so micro routines can assert whether held */ |
| DECLARE_CXTSWPROT_VAR(mutex_t bb_building_lock, INIT_LOCK_FREE(bb_building_lock)); |
| |
| /* i#1111: we do not use the lock until the 2nd thread is created */ |
| volatile bool bb_lock_start; |
| |
| #ifdef INTERNAL |
| file_t bbdump_file = INVALID_FILE; |
| #endif |
| |
| /* initialization */ |
| void |
| interp_init() |
| { |
| #ifdef INTERNAL |
| if (INTERNAL_OPTION(bbdump_tags)) { |
| bbdump_file = open_log_file("bbs", NULL, 0); |
| ASSERT(bbdump_file != INVALID_FILE); |
| } |
| #endif |
| } |
| |
| #ifdef CUSTOM_TRACES_RET_REMOVAL |
| # ifdef DEBUG |
| /* don't bother with adding lock */ |
| static int num_rets_removed; |
| # endif |
| #endif |
| |
| /* cleanup */ |
| void |
| interp_exit() |
| { |
| #ifdef INTERNAL |
| if (INTERNAL_OPTION(bbdump_tags)) { |
| close_log_file(bbdump_file); |
| } |
| #endif |
| DELETE_LOCK(bb_building_lock); |
| |
| LOG(GLOBAL, LOG_INTERP|LOG_STATS, 1, "Total application code seen: %d KB\n", |
| GLOBAL_STAT(app_code_seen)/1024); |
| #ifdef CUSTOM_TRACES_RET_REMOVAL |
| # ifdef DEBUG |
| LOG(GLOBAL, LOG_INTERP|LOG_STATS, 1, "Total rets removed: %d\n", |
| num_rets_removed); |
| # endif |
| #endif |
| } |
| |
| /**************************************************************************** |
| **************************************************************************** |
| * |
| * B A S I C B L O C K B U I L D I N G |
| */ |
| |
| /* we have a lot of data to pass around so we package it in this struct |
| * so we can have separate routines for readability |
| */ |
| typedef struct { |
| /* in */ |
| app_pc start_pc; |
| bool app_interp; /* building bb to interp app, as opposed to for pc |
| * translation or figuring out what pages a bb touches? */ |
| bool for_cache; /* normal to-be-executed build? */ |
| bool record_vmlist; /* should vmareas be updated? */ |
| bool mangle_ilist; /* should bb ilist be mangled? */ |
| bool record_translation; /* store translation info for each instr_t? */ |
| bool has_bb_building_lock; /* usually ==for_cache; used for aborting bb building */ |
| file_t outf; /* send disassembly and notes to a file? |
| * we use this mainly for dumping trace origins */ |
| app_pc stop_pc; /* Optional: NULL for normal termination rules. |
| * Only checked for full_decode. |
| */ |
| #ifdef CLIENT_INTERFACE |
| bool pass_to_client; /* pass to client, if a bb hook exists; |
| * we store this up front to avoid race conditions |
| * between full_decode setting and hook calling time. |
| */ |
| bool post_client; /* has the client already processed the bb? */ |
| bool for_trace; /* PR 299808: we tell client if building a trace */ |
| #endif |
| |
| /* in and out */ |
| overlap_info_t *overlap_info; /* if non-null, records overlap information here; |
| * caller must initialize region_start and region_end */ |
| |
| /* out */ |
| instrlist_t *ilist; |
| uint flags; |
| void *vmlist; |
| app_pc end_pc; |
| bool native_exec; /* replace cur ilist with a native_exec version */ |
| bool native_call; /* the gateway is a call */ |
| #ifdef CLIENT_INTERFACE |
| instrlist_t **unmangled_ilist; /* PR 299808: clone ilist pre-mangling */ |
| #endif |
| |
| /* internal usage only */ |
| bool full_decode; /* decode every instruction into a separate instr_t? */ |
| bool follow_direct; /* elide unconditional branches? */ |
| bool check_vm_area; /* whether to call check_thread_vm_area() */ |
| uint num_elide_jmp; |
| uint num_elide_call; |
| app_pc last_page; |
| app_pc cur_pc; |
| app_pc instr_start; |
| app_pc checked_end; /* end of current vmarea checked */ |
| cache_pc exit_target; /* fall-through target of final instr */ |
| uint exit_type; /* indirect branch type */ |
| ibl_branch_type_t ibl_branch_type; /* indirect branch type as an IBL selector */ |
| #ifdef UNIX |
| bool invalid_instr_hack; |
| #endif |
| instr_t *instr; /* the current instr */ |
| int eflags; |
| app_pc pretend_pc; /* selfmod only: decode from separate pc */ |
| DEBUG_DECLARE(bool initialized;) |
| } build_bb_t; |
| |
| /* forward decl */ |
| static inline bool |
| bb_process_syscall(dcontext_t *dcontext, build_bb_t *bb); |
| |
| static void |
| init_build_bb(build_bb_t *bb, app_pc start_pc, bool app_interp, bool for_cache, |
| bool mangle_ilist, bool record_translation, file_t outf, uint known_flags, |
| overlap_info_t *overlap_info) |
| { |
| memset(bb, 0, sizeof(*bb)); |
| bb->check_vm_area = true; |
| bb->start_pc = start_pc; |
| bb->app_interp = app_interp; |
| bb->for_cache = for_cache; |
| if (bb->for_cache) |
| bb->record_vmlist = true; |
| bb->mangle_ilist = mangle_ilist; |
| bb->record_translation = record_translation; |
| bb->outf = outf; |
| bb->overlap_info = overlap_info; |
| bb->follow_direct = !TEST(FRAG_SELFMOD_SANDBOXED, known_flags); |
| bb->flags = known_flags; |
| bb->ibl_branch_type = IBL_GENERIC; /* initialization only */ |
| DODEBUG(bb->initialized = true;); |
| } |
| |
| static void |
| reset_overlap_info(dcontext_t *dcontext, build_bb_t *bb) |
| { |
| bb->overlap_info->start_pc = bb->start_pc; |
| bb->overlap_info->min_pc = bb->start_pc; |
| bb->overlap_info->max_pc = bb->start_pc; |
| bb->overlap_info->contiguous = true; |
| bb->overlap_info->overlap = false; |
| } |
| |
| static void |
| update_overlap_info(dcontext_t *dcontext, build_bb_t *bb, app_pc new_pc, bool jmp) |
| { |
| if (new_pc < bb->overlap_info->min_pc) |
| bb->overlap_info->min_pc = new_pc; |
| if (new_pc > bb->overlap_info->max_pc) |
| bb->overlap_info->max_pc = new_pc; |
| /* we get called at end of all contiguous intervals, so ignore jmps */ |
| LOG(THREAD, LOG_ALL, 5, "\t app_bb_overlaps "PFX".."PFX" %s\n", |
| bb->last_page, new_pc, jmp?"jmp":""); |
| if (!bb->overlap_info->overlap && !jmp) { |
| /* contiguous interval: prev_pc..new_pc (open-ended) */ |
| if (bb->last_page < bb->overlap_info->region_end && |
| new_pc > bb->overlap_info->region_start) { |
| LOG(THREAD_GET, LOG_ALL, 5, "\t it overlaps!\n"); |
| bb->overlap_info->overlap = true; |
| } |
| } |
| if (bb->overlap_info->contiguous && jmp) |
| bb->overlap_info->contiguous = false; |
| } |
| |
| #ifdef DEBUG |
| # define BBPRINT(bb, level, ...) do { \ |
| LOG(THREAD, LOG_INTERP, level, __VA_ARGS__); \ |
| if (bb->outf != INVALID_FILE && bb->outf != (THREAD)) \ |
| print_file(bb->outf, __VA_ARGS__); \ |
| } while (0); |
| #else |
| # ifdef INTERNAL |
| # define BBPRINT(bb, level, ...) do { \ |
| if (bb->outf != INVALID_FILE) \ |
| print_file(bb->outf, __VA_ARGS__); \ |
| } while (0); |
| # else |
| # define BBPRINT(bb, level, ...) /* nothing */ |
| # endif |
| #endif |
| |
| #ifdef WINDOWS |
| extern void intercept_load_dll(void); |
| extern void intercept_unload_dll(void); |
| # ifdef INTERNAL |
| extern void DllMainThreadAttach(void); |
| # endif |
| #endif |
| |
| /* forward declarations */ |
| static bool |
| mangle_bb_ilist(dcontext_t *dcontext, build_bb_t *bb); |
| |
| static void |
| build_native_exec_bb(dcontext_t *dcontext, build_bb_t *bb); |
| |
| static bool |
| at_native_exec_gateway(dcontext_t *dcontext, app_pc start, bool *is_call |
| _IF_DEBUG(bool xfer_target)); |
| |
| #ifdef DEBUG |
| static void |
| report_native_module(dcontext_t *dcontext, app_pc modpc); |
| #endif |
| |
| /*************************************************************************** |
| * Image entry |
| */ |
| |
| static bool reached_image_entry = false; |
| |
| static INLINE_FORCED bool |
| check_for_image_entry(app_pc bb_start) |
| { |
| if (!reached_image_entry && bb_start == get_image_entry()) { |
| LOG(THREAD_GET, LOG_ALL, 1, "Reached image entry point "PFX"\n", bb_start); |
| set_reached_image_entry(); |
| return true; |
| } |
| return false; |
| } |
| |
| void |
| set_reached_image_entry() |
| { |
| SELF_UNPROTECT_DATASEC(DATASEC_RARELY_PROT); |
| reached_image_entry = true; |
| SELF_PROTECT_DATASEC(DATASEC_RARELY_PROT); |
| } |
| |
| bool |
| reached_image_entry_yet() |
| { |
| return reached_image_entry; |
| } |
| |
| /*************************************************************************** |
| * Whether to inline or elide callees |
| */ |
| |
| /* return true if pc is a call target that should NOT be inlined */ |
| #if defined(DEBUG) || !defined(WINDOWS) |
| /* cl.exe non-debug won't let other modules use it if inlined */ |
| inline |
| #endif |
| bool |
| must_not_be_inlined(app_pc pc) |
| { |
| return ( |
| #ifdef INTERNAL |
| !dynamo_options.inline_calls |
| #else |
| 0 |
| #endif |
| #ifdef WINDOWS |
| || pc == (app_pc)intercept_load_dll |
| || pc == (app_pc)intercept_unload_dll |
| /* we're guaranteed to have direct calls to the next routine since our |
| * own DllMain calls it! */ |
| # ifdef INTERNAL |
| || pc == (app_pc) DllMainThreadAttach |
| # endif |
| /* check for nudge handling escape from cache */ |
| || (pc == (app_pc)generic_nudge_handler) |
| #else |
| /* PR 200203: long-term we want to control loading of client |
| * libs, but for now we have to let the loader call _fini() |
| * in the client, which may end up calling __wrap_free(). |
| * It's simpler to let those be interpreted and make a native |
| * call to the real heap routine here as this is a direct |
| * call whereas we'd need native_exec for the others: |
| */ |
| || pc == (app_pc)global_heap_free |
| #endif |
| #ifdef DR_APP_EXPORTS |
| /* i#1237: DR will change dr_app_running_under_dynamorio return value |
| * on seeing a bb starting at dr_app_running_under_dynamorio. |
| */ |
| || pc == (app_pc) dr_app_running_under_dynamorio |
| #endif |
| ); |
| } |
| |
| /* return true if pc is a direct jmp target that should NOT be elided and followed */ |
| static inline bool |
| must_not_be_elided(app_pc pc) |
| { |
| #ifdef WINDOWS |
| /* Allow only the return jump in the landing pad to be elided, as we |
| * interpret the return path from trampolines. The forward jump leads to |
| * the trampoline and shouldn't be elided. */ |
| if (is_on_interception_initial_route(pc)) |
| return true; |
| #endif |
| return (0 |
| #ifdef WINDOWS |
| /* we insert trampolines by adding direct jmps to our interception code buffer |
| * we don't want to interpret the code in that buffer, as it may swap to the |
| * dstack and mess up a return-from-fcache. |
| * N.B.: if use this routine anywhere else, pay attention to the |
| * hack for is_syscall_trampoline() in the use here! |
| */ |
| || (is_in_interception_buffer(pc)) |
| #else /* UNIX */ |
| #endif |
| ); |
| } |
| |
| #ifdef DR_APP_EXPORTS |
| /* This function allows automatically injected dynamo to ignore |
| * dynamo API routines that would really mess things up |
| */ |
| static inline bool |
| must_escape_from(app_pc pc) |
| { |
| /* if ever find ourselves at top of one of these, immediately issue |
| * a ret instruction...haven't set up frame yet so stack fine, only |
| * problem is return value, go ahead and overwrite xax, it's caller-saved |
| * FIXME: is this ok? |
| */ |
| /* Note that we can't just look for direct calls to these functions |
| * because of stubs, etc. that end up doing indirect jumps to them! |
| */ |
| bool res = false |
| #ifdef DR_APP_EXPORTS |
| || (automatic_startup && |
| (pc == (app_pc)dynamorio_app_init || |
| pc == (app_pc)dr_app_start || |
| pc == (app_pc)dynamo_thread_init || |
| pc == (app_pc)dynamorio_app_exit || |
| /* dr_app_stop is a nop already */ |
| pc == (app_pc)dynamo_thread_exit)) |
| #endif |
| ; |
| #ifdef DEBUG |
| if (res) { |
| # ifdef DR_APP_EXPORTS |
| LOG(THREAD_GET, LOG_INTERP, 3, "must_escape_from: found "); |
| if (pc == (app_pc)dynamorio_app_init) |
| LOG(THREAD_GET, LOG_INTERP, 3, "dynamorio_app_init\n"); |
| else if (pc == (app_pc)dr_app_start) |
| LOG(THREAD_GET, LOG_INTERP, 3, "dr_app_start\n"); |
| /* FIXME: are dynamo_thread_* still needed hered? */ |
| else if (pc == (app_pc)dynamo_thread_init) |
| LOG(THREAD_GET, LOG_INTERP, 3, "dynamo_thread_init\n"); |
| else if (pc == (app_pc)dynamorio_app_exit) |
| LOG(THREAD_GET, LOG_INTERP, 3, "dynamorio_app_exit\n"); |
| else if (pc == (app_pc)dynamo_thread_exit) |
| LOG(THREAD_GET, LOG_INTERP, 3, "dynamo_thread_exit\n"); |
| # endif |
| } |
| #endif |
| |
| return res; |
| } |
| #endif /* DR_APP_EXPORTS */ |
| |
| /* Adds bb->instr, which must be a direct call or jmp, to bb->ilist for native |
| * execution. Makes sure its target is reachable from the code cache, which |
| * is critical for jmps b/c they're native for our hooks of app code which may |
| * not be reachable from the code cache. Also needed for calls b/c in the future |
| * (i#774) the DR lib (and thus our must_not_be_inlined() calls) won't be reachable |
| * from the cache. |
| */ |
| static void |
| bb_add_native_direct_xfer(dcontext_t *dcontext, build_bb_t *bb, bool appended) |
| { |
| #ifdef X64 |
| /* i#922: we're going to run this jmp from our code cache so we have to |
| * make sure it still reaches its target. We could try to check |
| * reachability from the likely code cache slot, but these should be |
| * rare enough that making them indirect won't matter and then we have |
| * fewer reachability dependences. |
| * We do this here rather than in mangle() b/c we'd have a hard time |
| * distinguishing native jmp/call due to DR's own operations from a |
| * client's inserted meta jmp/call. |
| */ |
| /* Strategy: write target into xax (DR-reserved) slot and jmp through it. |
| * Alternative would be to embed the target into the code stream. |
| * We don't need to set translation b/c these are meta instrs and they |
| * won't fault. |
| */ |
| ptr_uint_t tgt = (ptr_uint_t) opnd_get_pc(instr_get_target(bb->instr)); |
| opnd_t tls_slot = opnd_create_sized_tls_slot(os_tls_offset(TLS_XAX_SLOT), OPSZ_4); |
| instrlist_meta_append(bb->ilist, INSTR_CREATE_mov_imm |
| (dcontext, tls_slot, OPND_CREATE_INT32((int)tgt))); |
| opnd_set_disp(&tls_slot, opnd_get_disp(tls_slot) + 4); |
| instrlist_meta_append(bb->ilist, INSTR_CREATE_mov_imm |
| (dcontext, tls_slot, OPND_CREATE_INT32((int)(tgt >> 32)))); |
| if (instr_is_ubr(bb->instr)) { |
| instrlist_meta_append(bb->ilist, INSTR_CREATE_jmp_ind |
| (dcontext, |
| opnd_create_tls_slot(os_tls_offset(TLS_XAX_SLOT)))); |
| bb->exit_type |= instr_branch_type(bb->instr); |
| } else { |
| ASSERT(instr_is_call_direct(bb->instr)); |
| instrlist_meta_append(bb->ilist, INSTR_CREATE_call_ind |
| (dcontext, |
| opnd_create_tls_slot(os_tls_offset(TLS_XAX_SLOT)))); |
| } |
| if (appended) |
| instrlist_remove(bb->ilist, bb->instr); |
| instr_destroy(dcontext, bb->instr); |
| bb->instr = NULL; |
| #else |
| if (appended) { |
| /* avoid assert about meta w/ translation but no restore_state callback */ |
| instr_set_translation(bb->instr, NULL); |
| } else |
| instrlist_append(bb->ilist, bb->instr); |
| /* Indicate that relative target must be |
| * re-encoded, and that it is not an exit cti. |
| * However, we must mangle this to ensure it reaches (i#992) |
| * which we special-case in mangle(). |
| */ |
| instr_set_meta(bb->instr); |
| instr_set_raw_bits_valid(bb->instr, false); |
| #endif |
| } |
| |
| /* Perform checks such as looking for dynamo stopping points and bad places |
| * to be. We assume we only have to check after control transfer instructions, |
| * i.e., we assume that all of these conditions are procedures that are only |
| * entered by calling or jumping, never falling through. |
| */ |
| static inline bool |
| check_for_stopping_point(dcontext_t *dcontext, build_bb_t *bb) |
| { |
| #ifdef DR_APP_EXPORTS |
| if (must_escape_from(bb->cur_pc)) { |
| /* x64 will zero-extend to rax, so we use eax here */ |
| reg_id_t reg = IF_X86_ELSE(REG_EAX, DR_REG_R0); |
| BBPRINT(bb, 3, "interp: emergency exit from "PFX"\n", bb->cur_pc); |
| /* if ever find ourselves at top of one of these, immediately issue |
| * a ret instruction...haven't set up frame yet so stack fine, only |
| * problem is return value, go ahead and overwrite xax, it's |
| * caller-saved. |
| * FIXME: is this ok? |
| */ |
| /* move 0 into xax/r0 -- our functions return 0 to indicate success */ |
| instrlist_append(bb->ilist, |
| XINST_CREATE_load_int(dcontext, |
| opnd_create_reg(reg), |
| OPND_CREATE_INT32(0))); |
| /* insert a ret instruction */ |
| instrlist_append(bb->ilist, XINST_CREATE_return(dcontext)); |
| /* should this be treated as a real return? */ |
| bb->exit_type |= LINK_INDIRECT | LINK_RETURN; |
| bb->exit_target = get_ibl_routine(dcontext, IBL_LINKED, DEFAULT_IBL_BB(), IBL_RETURN); |
| return true; |
| } |
| #endif /* DR_APP_EXPORTS */ |
| |
| #ifdef CHECK_RETURNS_SSE2 |
| if (bb->cur_pc == (app_pc)longjmp) { |
| SYSLOG_INTERNAL_WARNING("encountered longjmp, which will cause ret mismatch!"); |
| } |
| #endif |
| |
| return is_stopping_point(dcontext, bb->cur_pc); |
| } |
| |
| /* Arithmetic eflags analysis to see if sequence of instrs reads an |
| * arithmetic flag prior to writing it. |
| * Usage: first initialize status to 0 and eflags_6 to 0. |
| * Then call this routine for each instr in sequence, assigning result to status. |
| * eflags_6 holds flags written and read so far. |
| * Uses these flags, defined in instr.h, as status values: |
| * EFLAGS_WRITE_ARITH = writes all arith flags before reading any |
| * EFLAGS_WRITE_OF = writes OF before reading it (x86-onlY) |
| * EFLAGS_READ_ARITH = reads some of arith flags before writing |
| * EFLAGS_READ_OF = reads OF before writing OF (x86-only) |
| * 0 = no information yet |
| * On ARM, Q and GE flags are ignored. |
| */ |
| static inline int |
| eflags_analysis(instr_t *instr, int status, uint *eflags_6) |
| { |
| uint e6 = *eflags_6; /* local copy */ |
| uint e6_w2r = EFLAGS_WRITE_TO_READ(e6); |
| uint instr_eflags = instr_get_arith_flags(instr, DR_QUERY_DEFAULT); |
| |
| /* Keep going until result is non-zero, also keep going if |
| * result is writes to OF to see if later writes to rest of flags |
| * before reading any, and keep going if reads one of the 6 to see |
| * if later writes to OF before reading it. |
| */ |
| if (instr_eflags == 0 || status == EFLAGS_WRITE_ARITH |
| IF_X86(|| status == EFLAGS_READ_OF)) |
| return status; |
| /* we ignore interrupts */ |
| if ((instr_eflags & EFLAGS_READ_ARITH) != 0 && |
| (!instr_opcode_valid(instr) || !instr_is_interrupt(instr))) { |
| /* store the flags we're reading */ |
| e6 |= (instr_eflags & EFLAGS_READ_ARITH); |
| *eflags_6 = e6; |
| if ((e6_w2r | (instr_eflags & EFLAGS_READ_ARITH)) != e6_w2r) { |
| /* we're reading a flag that has not been written yet */ |
| status = EFLAGS_READ_ARITH; /* some read before all written */ |
| LOG(THREAD_GET, LOG_INTERP, 4, "\treads flag before writing it!\n"); |
| #ifdef X86 |
| if ((instr_eflags & EFLAGS_READ_OF) != 0 && (e6 & EFLAGS_WRITE_OF) == 0) { |
| status = EFLAGS_READ_OF; /* reads OF before writing! */ |
| LOG(THREAD_GET, LOG_INTERP, 4, "\t reads OF prior to writing it!\n"); |
| } |
| #endif |
| } |
| } else if ((instr_eflags & EFLAGS_WRITE_ARITH) != 0) { |
| /* store the flags we're writing */ |
| e6 |= (instr_eflags & EFLAGS_WRITE_ARITH); |
| *eflags_6 = e6; |
| /* check if all written but none read yet */ |
| if ((e6 & EFLAGS_WRITE_ARITH) == EFLAGS_WRITE_ARITH && |
| (e6 & EFLAGS_READ_ARITH) == 0) { |
| status = EFLAGS_WRITE_ARITH; /* all written before read */ |
| LOG(THREAD_GET, LOG_INTERP, 4, "\twrote all 6 flags now!\n"); |
| } |
| #ifdef X86 |
| /* check if at least OF was written but not read */ |
| else if ((e6 & EFLAGS_WRITE_OF) != 0 && (e6 & EFLAGS_READ_OF) == 0) { |
| status = EFLAGS_WRITE_OF; /* OF written before read */ |
| LOG(THREAD_GET, LOG_INTERP, 4, "\twrote overflow flag before reading it!\n"); |
| } |
| #endif |
| } |
| return status; |
| } |
| |
| |
| /* check origins of code for several purposes: |
| * 1) we need list of areas where this thread's fragments come |
| * from, for faster flushing on munmaps |
| * 2) also for faster flushing, each vmarea has a list of fragments |
| * 3) we need to mark as read-only any writable region that |
| * has a fragment come from it, to handle self-modifying code |
| * 4) for PROGRAM_SHEPHERDING restricted code origins for security |
| * 5) for restricted execution environments: not letting bb cross regions |
| */ |
| |
| /* |
| FIXME CASE 7380: |
| since report security violation before execute off bad page, can be |
| false positive due to: |
| - a faulting instruction in middle of bb would have prevented |
| getting there |
| - ignorable syscall in middle |
| - self-mod code would have ended bb sooner than bad page |
| |
| One solution is to have check_thread_vm_area() return false and have |
| bb building stop at checked_end if a violation will occur when we |
| get there. Then we only raise the violation once building a bb |
| starting there. |
| */ |
| |
| static inline void |
| check_new_page_start(dcontext_t *dcontext, build_bb_t *bb) |
| { |
| DEBUG_DECLARE(bool ok;) |
| if (!bb->check_vm_area) |
| return; |
| DEBUG_DECLARE(ok =) check_thread_vm_area(dcontext, bb->start_pc, bb->start_pc, |
| (bb->record_vmlist ? &bb->vmlist : NULL), |
| &bb->flags, &bb->checked_end, |
| false/*!xfer*/); |
| ASSERT(ok); /* cannot return false on non-xfer */ |
| bb->last_page = bb->start_pc; |
| if (bb->overlap_info != NULL) |
| reset_overlap_info(dcontext, bb); |
| } |
| |
| /* Walk forward in straight line from prev_pc to new_pc. |
| * FIXME: with checked_end we don't need to call this on every contig end |
| * while bb building like we used to. Should revisit the overlap info and |
| * walk_app_bb reasons for keeping those contig() calls and see if we can |
| * optimize them away for bb building at least. |
| * i#993: new_pc points to the last byte of the current instruction and is not |
| * an open-ended endpoint. |
| */ |
| static inline bool |
| check_new_page_contig(dcontext_t *dcontext, build_bb_t *bb, app_pc new_pc) |
| { |
| bool is_first_instr = (bb->instr_start == bb->start_pc); |
| if (!bb->check_vm_area) |
| return true; |
| if (bb->checked_end == NULL) { |
| ASSERT(new_pc == bb->start_pc); |
| } else if (new_pc >= bb->checked_end) { |
| if (!check_thread_vm_area(dcontext, new_pc, bb->start_pc, |
| (bb->record_vmlist ? &bb->vmlist : NULL), |
| &bb->flags, &bb->checked_end, |
| /* i#989: We don't want to fall through to an |
| * incompatible vmarea, so we treat fall |
| * through like a transfer. We can't end the |
| * bb before the first instruction, so we pass |
| * false to forcibly merge in the vmarea |
| * flags. |
| */ |
| !is_first_instr/*xfer*/)) { |
| return false; |
| } |
| } |
| if (bb->overlap_info != NULL) |
| update_overlap_info(dcontext, bb, new_pc, false/*not jmp*/); |
| DOLOG(4, LOG_INTERP, { |
| if (PAGE_START(bb->last_page) != PAGE_START(new_pc)) |
| LOG(THREAD, LOG_INTERP, 4, "page boundary crossed\n"); |
| }); |
| bb->last_page = new_pc; /* update even if not new page, for walk_app_bb */ |
| return true; |
| } |
| |
| /* Direct cti from prev_pc to new_pc */ |
| static bool |
| check_new_page_jmp(dcontext_t *dcontext, build_bb_t *bb, app_pc new_pc) |
| { |
| /* For tracking purposes, check the last byte of the cti. */ |
| bool ok = check_new_page_contig(dcontext, bb, bb->cur_pc-1); |
| ASSERT(ok && "should have checked cur_pc-1 in decode loop"); |
| if (!ok) /* Don't follow the jmp in release build. */ |
| return false; |
| /* cur sandboxing doesn't handle direct cti |
| * not good enough to only check this at top of interp -- could walk contig |
| * from non-selfmod to selfmod page, and then do a direct cti, which |
| * check_thread_vm_area would allow (no flag changes on direct cti)! |
| * also not good enough to put this check in check_thread_vm_area, as that |
| * only checks across pages. |
| */ |
| if ((bb->flags & FRAG_SELFMOD_SANDBOXED) != 0) |
| return false; |
| if (PAGE_START(bb->last_page) != PAGE_START(new_pc)) |
| LOG(THREAD, LOG_INTERP, 4, "page boundary crossed\n"); |
| /* do not walk into a native exec dll (we assume not currently there, |
| * though could happen if bypass a gateway -- even then this is a feature |
| * to allow getting back to native ASAP) |
| * FIXME: we could assume that such direct calls only |
| * occur from DGC, and rely on check_thread_vm_area to disallow, |
| * as an (unsafe) optimization |
| */ |
| if (DYNAMO_OPTION(native_exec) && |
| DYNAMO_OPTION(native_exec_dircalls) && |
| !vmvector_empty(native_exec_areas) && |
| is_native_pc(new_pc)) |
| return false; |
| #ifdef CLIENT_INTERFACE |
| /* i#805: If we're crossing a module boundary between two modules that are |
| * and aren't on null_instrument_list, don't elide the jmp. |
| */ |
| if ((!!os_module_get_flag(bb->cur_pc, MODULE_NULL_INSTRUMENT)) != |
| (!!os_module_get_flag(new_pc, MODULE_NULL_INSTRUMENT))) |
| return false; |
| #endif |
| if (!bb->check_vm_area) |
| return true; |
| /* need to check this even if an intra-page jmp b/c we allow sub-page vm regions */ |
| if (!check_thread_vm_area(dcontext, new_pc, bb->start_pc, |
| (bb->record_vmlist ? &bb->vmlist : NULL), |
| &bb->flags, &bb->checked_end, true/*xfer*/)) |
| return false; |
| if (bb->overlap_info != NULL) |
| update_overlap_info(dcontext, bb, new_pc, true/*jmp*/); |
| bb->flags |= FRAG_HAS_DIRECT_CTI; |
| bb->last_page = new_pc; /* update even if not new page, for walk_app_bb */ |
| return true; |
| } |
| |
| static inline void |
| bb_process_invalid_instr(dcontext_t *dcontext, build_bb_t *bb) |
| { |
| |
| /* invalid instr: end bb BEFORE the instr, we'll throw exception if we |
| * reach the instr itself |
| */ |
| LOG(THREAD, LOG_INTERP, 2, "interp: invalid instr at "PFX"\n", bb->instr_start); |
| /* This routine is called by more than just bb builder, also used |
| * for recreating state, so check bb->app_interp parameter to find out |
| * if building a real app bb to be executed |
| */ |
| if (bb->app_interp && bb->instr_start == bb->start_pc) { |
| /* This is first instr in bb so it will be executed for sure and |
| * we need to generate an invalid instruction exception. |
| * A benefit of being first instr is that the state is easy |
| * to translate. |
| */ |
| #ifdef WINDOWS |
| /* Copying the invalid bytes and having the processor generate |
| * the exception would be cleaner in every way except our fear |
| * of a new processor making those bytes valid and us inadvertently |
| * executing the unexamined instructions afterward, since we do not |
| * know the proper amount of bytes to copy. Copying is cleaner |
| * since Windows splits invalid instructions into different cases, |
| * an invalid lock prefix and maybe some other distinctions |
| * (it's all interrupt 6 to the processor), and it is hard to |
| * duplicate Windows' behavior in our forged exception. |
| */ |
| /* FIXME case 10672: provide a runtime option to specify new |
| * instruction formats to avoid this app exception */ |
| ASSERT(dcontext->bb_build_info == bb); |
| bb_build_abort(dcontext, true/*clean vm area*/, true/*unlock*/); |
| /* FIXME : we use illegal instruction here, even though we |
| * know windows uses different exception codes for different |
| * types of invalid instructions (for ex. STATUS_INVALID_LOCK |
| * _SEQUENCE for lock prefix on a jmp instruction) |
| */ |
| if (TEST(DUMPCORE_FORGE_ILLEGAL_INST, DYNAMO_OPTION(dumpcore_mask))) |
| os_dump_core("Warning: Encountered Illegal Instruction"); |
| os_forge_exception(bb->instr_start, ILLEGAL_INSTRUCTION_EXCEPTION); |
| ASSERT_NOT_REACHED(); |
| #else |
| /* FIXME: Linux hack until we have a real os_forge_exception implementation: |
| * copy the bytes and have the process generate the exception. |
| * Once remove this, also disable check at top of insert_selfmod_sandbox |
| * FIXME PR 307880: we now have a preliminary |
| * os_forge_exception impl, but I'm leaving this hack until |
| * we're more comfortable w/ our forging. |
| */ |
| uint sz; |
| instrlist_append(bb->ilist, bb->instr); |
| /* pretend raw bits valid to get it encoded |
| * For now we just do 17 bytes, being wary of unreadable pages. |
| * FIXME: better solution is to have decoder guess at length (if |
| * ok opcode just bad lock prefix or something know length, if |
| * bad opcode just bytes up until know it's bad). |
| */ |
| if (!is_readable_without_exception(bb->instr_start, MAX_INSTR_LENGTH)) { |
| app_pc nxt_page = (app_pc) ALIGN_FORWARD(bb->instr_start, PAGE_SIZE); |
| sz = nxt_page - bb->instr_start; |
| } else { |
| sz = MAX_INSTR_LENGTH; |
| } |
| bb->cur_pc += sz; /* just in case, should have a non-self target */ |
| ASSERT(bb->cur_pc > bb->instr_start); /* else still a self target */ |
| instr_set_raw_bits(bb->instr, bb->instr_start, sz); |
| bb->invalid_instr_hack = true; |
| #endif |
| } else { |
| instr_destroy(dcontext, bb->instr); |
| bb->instr = NULL; |
| } |
| } |
| |
| /* returns true to indicate "elide and continue" and false to indicate "end bb now" |
| * should be used both for converted indirect jumps and |
| * FIXME: for direct jumps by bb_process_ubr |
| */ |
| static inline bool |
| follow_direct_jump(dcontext_t *dcontext, build_bb_t *bb, |
| app_pc target) |
| { |
| if (bb->follow_direct && |
| bb->num_elide_jmp < DYNAMO_OPTION(max_elide_jmp) && |
| (DYNAMO_OPTION(elide_back_jmps) || bb->cur_pc <= target)) { |
| if (check_new_page_jmp(dcontext, bb, target)) { |
| /* Elide unconditional branch and follow target */ |
| bb->num_elide_jmp++; |
| STATS_INC(total_elided_jmps); |
| STATS_TRACK_MAX(max_elided_jmps, bb->num_elide_jmp); |
| bb->cur_pc = target; |
| BBPRINT(bb, 4, " continuing at target "PFX"\n", bb->cur_pc); |
| |
| return true; /* keep bb going */ |
| } else { |
| BBPRINT(bb, 3, " NOT following jmp from "PFX" to "PFX"\n", |
| bb->instr_start, target); |
| } |
| } else { |
| BBPRINT(bb, 3, " NOT attempting to follow jump from "PFX" to "PFX"\n", |
| bb->instr_start, target); |
| } |
| return false; /* stop bb */ |
| } |
| |
| /* returns true to indicate "elide and continue" and false to indicate "end bb now" */ |
| static inline bool |
| bb_process_ubr(dcontext_t *dcontext, build_bb_t *bb) |
| { |
| app_pc tgt = (byte *) opnd_get_pc(instr_get_target(bb->instr)); |
| BBPRINT(bb, 4, "interp: direct jump at "PFX"\n", bb->instr_start); |
| if (must_not_be_elided(tgt)) { |
| #ifdef WINDOWS |
| byte *wrapper_start; |
| if (is_syscall_trampoline(tgt, &wrapper_start)) { |
| /* HACK to avoid entering the syscall trampoline that is meant |
| * only for native syscalls -- we replace the jmp with the |
| * original app mov immed that it replaced |
| */ |
| BBPRINT(bb, 3, |
| "interp: replacing syscall trampoline @"PFX" w/ orig mov @"PFX"\n", |
| bb->instr_start, wrapper_start); |
| instr_reset(dcontext, bb->instr); |
| |
| /* leave bb->cur_pc unchanged */ |
| decode(dcontext, wrapper_start, bb->instr); |
| /* ASSUMPTION: syscall trampoline puts hooked instruction |
| * (usually mov_imm but can be lea if hooked_deeper) here */ |
| ASSERT(instr_get_opcode(bb->instr) == OP_mov_imm || |
| (instr_get_opcode(bb->instr) == OP_lea && |
| DYNAMO_OPTION(native_exec_hook_conflict) == |
| HOOKED_TRAMPOLINE_HOOK_DEEPER)); |
| instrlist_append(bb->ilist, bb->instr); |
| /* translation should point to the trampoline at the |
| * original application address |
| */ |
| if (bb->record_translation) |
| instr_set_translation(bb->instr, bb->instr_start); |
| if (instr_get_opcode(bb->instr) == OP_lea) { |
| app_pc translation = bb->instr_start + |
| instr_length(dcontext, bb->instr); |
| ASSERT_CURIOSITY(instr_length(dcontext, bb->instr) == 4); |
| /* we hooked deep need to add the int 2e instruction */ |
| /* can't use create_syscall_instr because of case 5217 hack */ |
| ASSERT(get_syscall_method() == SYSCALL_METHOD_INT); |
| bb->instr = INSTR_CREATE_int(dcontext, |
| opnd_create_immed_int((char)0x2e, |
| OPSZ_1)); |
| if (bb->record_translation) |
| instr_set_translation(bb->instr, translation); |
| ASSERT(instr_is_syscall(bb->instr) && |
| instr_get_opcode(bb->instr) == OP_int); |
| instrlist_append(bb->ilist, bb->instr); |
| return bb_process_syscall(dcontext, bb); |
| } |
| return true; /* keep bb going */ |
| } |
| #endif |
| BBPRINT(bb, 3, "interp: NOT following jmp to "PFX"\n", tgt); |
| /* add instruction to instruction list */ |
| bb_add_native_direct_xfer(dcontext, bb, false/*!appended*/); |
| /* Case 8711: coarse-grain can't handle non-exit cti */ |
| bb->flags &= ~FRAG_COARSE_GRAIN; |
| STATS_INC(coarse_prevent_cti); |
| return false; /* end bb now */ |
| } else { |
| if (bb->follow_direct && |
| bb->num_elide_jmp < DYNAMO_OPTION(max_elide_jmp) && |
| (DYNAMO_OPTION(elide_back_jmps) || bb->cur_pc <= tgt)) { |
| if (check_new_page_jmp(dcontext, bb, tgt)) { |
| /* Elide unconditional branch and follow target */ |
| bb->num_elide_jmp++; |
| STATS_INC(total_elided_jmps); |
| STATS_TRACK_MAX(max_elided_jmps, bb->num_elide_jmp); |
| bb->cur_pc = tgt; |
| BBPRINT(bb, 4, " continuing at target "PFX"\n", bb->cur_pc); |
| /* pretend never saw this ubr: delete instr, then continue */ |
| instr_destroy(dcontext, bb->instr); |
| bb->instr = NULL; |
| return true; /* keep bb going */ |
| } else { |
| BBPRINT(bb, 3, " NOT following direct jmp from "PFX" to "PFX"\n", |
| bb->instr_start, tgt); |
| } |
| } |
| /* End this bb now */ |
| bb->exit_target = opnd_get_pc(instr_get_target(bb->instr)); |
| instrlist_append(bb->ilist, bb->instr); |
| return false; /* end bb */ |
| } |
| return true; /* keep bb going */ |
| } |
| |
| #ifdef X86 |
| /* returns true if call is elided, |
| * and false if not following due to hitting a limit or other reason */ |
| static bool |
| follow_direct_call(dcontext_t *dcontext, build_bb_t *bb, app_pc callee) |
| { |
| /* FIXME: This code should be reused in bb_process_convertible_indcall() |
| * and in bb_process_call_direct() |
| */ |
| if (bb->follow_direct && |
| bb->num_elide_call < DYNAMO_OPTION(max_elide_call) && |
| (DYNAMO_OPTION(elide_back_calls) || bb->cur_pc <= callee)) { |
| if (check_new_page_jmp(dcontext, bb, callee)) { |
| bb->num_elide_call++; |
| STATS_INC(total_elided_calls); |
| STATS_TRACK_MAX(max_elided_calls, bb->num_elide_call); |
| bb->cur_pc = callee; |
| |
| BBPRINT(bb, 4, " continuing in callee at "PFX"\n", bb->cur_pc); |
| return true; /* keep bb going in callee */ |
| } else { |
| BBPRINT(bb, 3, |
| " NOT following direct (or converted) call from "PFX" to "PFX"\n", |
| bb->instr_start, callee); |
| } |
| } |
| else { |
| BBPRINT(bb, 3, " NOT attempting to follow call from "PFX" to "PFX"\n", |
| bb->instr_start, callee); |
| } |
| return false; /* stop bb */ |
| } |
| #endif /* X86 */ |
| |
| static inline void |
| bb_stop_prior_to_instr(dcontext_t *dcontext, build_bb_t *bb, bool appended) |
| { |
| if (appended) |
| instrlist_remove(bb->ilist, bb->instr); |
| instr_destroy(dcontext, bb->instr); |
| bb->instr = NULL; |
| bb->cur_pc = bb->instr_start; |
| } |
| |
| /* returns true to indicate "elide and continue" and false to indicate "end bb now" */ |
| static inline bool |
| bb_process_call_direct(dcontext_t *dcontext, build_bb_t *bb) |
| { |
| byte *callee = (byte *)opnd_get_pc(instr_get_target(bb->instr)); |
| # ifdef CUSTOM_TRACES_RET_REMOVAL |
| if (callee == bb->instr_start + 5) { |
| LOG(THREAD, LOG_INTERP, 4, "found call to next instruction\n"); |
| } else |
| dcontext->num_calls++; |
| # endif |
| STATS_INC(num_all_calls); |
| BBPRINT(bb, 4, "interp: direct call at "PFX"\n", bb->instr_start); |
| if (must_not_be_inlined(callee)) { |
| BBPRINT(bb, 3, "interp: NOT inlining call to "PFX"\n", callee); |
| /* Case 8711: coarse-grain can't handle non-exit cti. |
| * If we allow this fragment to be coarse we must kill the freeze |
| * nudge thread! |
| */ |
| bb->flags &= ~FRAG_COARSE_GRAIN; |
| STATS_INC(coarse_prevent_cti); |
| bb_add_native_direct_xfer(dcontext, bb, true/*appended*/); |
| return true; /* keep bb going, w/o inlining call */ |
| } else { |
| if (DYNAMO_OPTION(coarse_split_calls) && DYNAMO_OPTION(coarse_units) && |
| TEST(FRAG_COARSE_GRAIN, bb->flags)) { |
| if (instrlist_first(bb->ilist) != bb->instr) { |
| /* have call be in its own bb */ |
| bb_stop_prior_to_instr(dcontext, bb, true/*appended already*/); |
| return false; /* stop bb */ |
| } else { |
| /* single-call fine-grained bb */ |
| bb->flags &= ~FRAG_COARSE_GRAIN; |
| STATS_INC(coarse_prevent_cti); |
| } |
| } |
| /* FIXME: use follow_direct_call() */ |
| if (bb->follow_direct && |
| bb->num_elide_call < DYNAMO_OPTION(max_elide_call) && |
| (DYNAMO_OPTION(elide_back_calls) || bb->cur_pc <= callee)) { |
| if (check_new_page_jmp(dcontext, bb, callee)) { |
| bb->num_elide_call++; |
| STATS_INC(total_elided_calls); |
| STATS_TRACK_MAX(max_elided_calls, bb->num_elide_call); |
| bb->cur_pc = callee; |
| BBPRINT(bb, 4, " continuing in callee at "PFX"\n", bb->cur_pc); |
| return true; /* keep bb going */ |
| } |
| } |
| BBPRINT(bb, 3, " NOT following direct call from "PFX" to "PFX"\n", |
| bb->instr_start, callee); |
| /* End this bb now */ |
| bb->exit_target = callee; |
| return false; /* end bb now */ |
| } |
| return true; /* keep bb going */ |
| } |
| |
| #ifdef WINDOWS |
| |
| /* We check if the instrs call, mov, and sysenter are |
| * "call (%xdx); mov %xsp -> %xdx" or "call %xdx; mov %xsp -> %xdx" |
| * and "sysenter". |
| */ |
| bool |
| instr_is_call_sysenter_pattern(instr_t *call, instr_t *mov, instr_t *sysenter) |
| { |
| instr_t *instr; |
| if (call == NULL || mov == NULL || sysenter == NULL) |
| return false; |
| if (instr_is_meta(call) || instr_is_meta(mov) || |
| instr_is_meta(sysenter)) |
| return false; |
| if (instr_get_next(call) != mov || instr_get_next(mov) != sysenter) |
| return false; |
| /* check sysenter */ |
| if (instr_get_opcode(sysenter) != OP_sysenter) |
| return false; |
| |
| /* FIXME Relax the pattern matching on the "mov; call" pair so that small |
| * changes in the register dataflow and call construct are tolerated. */ |
| |
| /* Did we find a "mov %xsp -> %xdx"? */ |
| instr = mov; |
| if (!(instr != NULL && instr_get_opcode(instr) == OP_mov_ld && |
| instr_num_srcs(instr) == 1 && instr_num_dsts(instr) == 1 && |
| opnd_is_reg(instr_get_dst(instr, 0)) && |
| opnd_get_reg(instr_get_dst(instr, 0)) == REG_XDX && |
| opnd_is_reg(instr_get_src(instr, 0)) && |
| opnd_get_reg(instr_get_src(instr, 0)) == REG_XSP)) { |
| return false; |
| } |
| |
| /* Did we find a "call (%xdx) or "call %xdx" that's already marked |
| * for ind->direct call conversion? */ |
| instr = call; |
| if (!(instr != NULL && TEST(INSTR_IND_CALL_DIRECT, instr->flags) && |
| instr_is_call_indirect(instr) && |
| /* The 2nd src operand should always be %xsp. */ |
| opnd_is_reg(instr_get_src(instr, 1)) && |
| opnd_get_reg(instr_get_src(instr, 1)) == REG_XSP && |
| /* Match 'call (%xdx)' for post-SP2. */ |
| ((opnd_is_near_base_disp(instr_get_src(instr, 0)) && |
| opnd_get_base(instr_get_src(instr, 0)) == REG_XDX && |
| opnd_get_disp(instr_get_src(instr, 0)) == 0) || |
| /* Match 'call %xdx' for pre-SP2. */ |
| (opnd_is_reg(instr_get_src(instr, 0)) && |
| opnd_get_reg(instr_get_src(instr, 0)) == REG_XDX)))) { |
| return false; |
| } |
| |
| return true; |
| } |
| |
| /* Walk up from the bb->instr and verify that the preceding instructions |
| * match the pattern that we expect to precede a sysenter. */ |
| static instr_t * |
| bb_verify_sysenter_pattern(dcontext_t *dcontext, build_bb_t *bb) |
| { |
| /* Walk back up 2 instructions and verify that there's a |
| * "call (%xdx); mov %xsp -> %xdx" or "call %xdx; mov %xsp -> %xdx" |
| * just prior to the sysenter. |
| * We use "xsp" and "xdx" to be ready for x64 sysenter though we don't |
| * expect to see it. |
| */ |
| instr_t *mov, *call; |
| mov = instr_get_prev_expanded(dcontext, bb->ilist, bb->instr); |
| if (mov == NULL) |
| return NULL; |
| call = instr_get_prev_expanded(dcontext, bb->ilist, mov); |
| if (call == NULL) |
| return NULL; |
| if (!instr_is_call_sysenter_pattern(call, mov, bb->instr)) { |
| BBPRINT(bb, 3, "bb_verify_sysenter_pattern -- pattern didn't match\n"); |
| return NULL; |
| } |
| return call; |
| |
| |
| } |
| |
| /* Only used for the Borland SEH exemption. */ |
| /* FIXME - we can't really tell a push from a pop since both are typically a |
| * mov to fs:[0], but double processing doesn't hurt. */ |
| /* NOTE we don't see dynamic SEH frame pushes, we only see the first SEH push |
| * per mov -> fs:[0] instruction in the app. So we don't see modified in place |
| * handler addresses (see at_Borland_SEH_rct_exemption()) or handler addresses |
| * that are passed into a shared routine that sets up the frame (not yet seen, |
| * note that MS dlls that have a _SEH_prolog hardcode the handler address in |
| * the _SEH_prolog routine, only the data is passed in). |
| */ |
| static void |
| bb_process_SEH_push(dcontext_t *dcontext, build_bb_t *bb, void *value) |
| { |
| if (value == NULL || value == (void *)PTR_UINT_MINUS_1) { |
| /* could be popping off the last frame (leaving -1) of the SEH stack */ |
| STATS_INC(num_endlist_SEH_write); |
| ASSERT_CURIOSITY(value != NULL); |
| return; |
| } |
| LOG(THREAD, LOG_INTERP, 3, "App moving "PFX" to fs:[0]\n", value); |
| # ifdef RETURN_AFTER_CALL |
| if (DYNAMO_OPTION(borland_SEH_rct)) { |
| /* xref case 5752, the Borland compiler SEH implementation uses a push |
| * imm ret motif for fall through to the finally of a try finally block |
| * (very similar to what the Microsoft NT at_SEH_rct_exception() is |
| * doing). The layout will always look like this : |
| * push e: (imm32) (e should be in the .E/.F table) |
| * a: |
| * ... |
| * b: ret |
| * c: jmp rel32 (c should be in the .E/.F table) |
| * d: jmp a: (rel8/32) |
| * ... (usually nothing) |
| * e: |
| * (where ret at b is targeting e, or a valid after call). The |
| * exception dispatcher calls c (the SEH frame has c as the handler) |
| * which jmps to the exception handler which, in turn, calls d to |
| * execute the finally block. Fall through is as shown above. So, |
| * we see a .E violation for the handlers call to d and a .C violation |
| * for the fall trough case of the ret @ b targeting e. We may also |
| * see a .E violation for a call to a as sometimes the handler computes |
| * the target of the jmp @ d an passes that to a different exception |
| * handler. |
| * |
| * For try-except we see the following layout : |
| * I've only seen jmp ind in the case that led to needing |
| * at_Borland_SEH_rct_exemption() to be added, not that |
| * it makes any difference. |
| * [ jmp z: (rel8/32) || (rarely) ret || (very rarely) jmp ind] |
| * x: jmp rel32 (x should be in the .E/.F table) |
| * y: |
| * ... |
| * call rel32 |
| * [z: ... || ret ] |
| * Though there may be other optimized layouts (the ret instead of the |
| * jmp z: is one such) so we may not want to rely on anything other |
| * then x y. The exception dispatcher calls x (the SEH frame has x as |
| * the handler) which jmps to the exception handler which, in turn, |
| * jmps to y to execute the except block. We see a .F violation from |
| * the handler's jmp to y. at_Borland_SEH_rct_exemption() covers a |
| * case where the address of x (and thus y) in an existing SEH frame |
| * is changed in place instead of popping and pushing a new frame. |
| * |
| * All addresses (rel and otherwise) should be in the same module. So |
| * we need to recognize the patter and add d:/y: to the .E/.F table |
| * as well as a: (sometimes the handler calculates the target of d and |
| * passes that up to a higher level routine, though I don't see the |
| * point) and add e: to the .C table. |
| * |
| * It would be preferable to handle these exemptions reactively at |
| * the violation point, but unfortunately, by the time we get to the |
| * violation the SEH frame information has been popped off the stack |
| * and is lost, so we have to do it pre-emptively here (pattern |
| * matching at violation time has proven to difficult in the face of |
| * certain compiler optimizations). See at_Borland_SEH_rct_exemption() |
| * in callback.c, that could handle all ind branches to y and ind calls |
| * to d (see below) at an acceptable level of security if we desired. |
| * Handling the ret @ b to e reactively would require the ability to |
| * recreate the exact src cti (so we can use the addr of the ret to |
| * pattern match) at the violation point (something that can't always |
| * currently be done, reset flushing etc.). Handling the ind call to |
| * a (which I've never acutally seen, though I've seen the address |
| * computed and it looks like it could likely be hit) reactively is |
| * more tricky. Prob. the only way to handle that is to allow .E/.F |
| * transistions to any address after a push imm32 of an address in the |
| * same module, but that might be too permissive. FIXME - should still |
| * revisit doing the exemptions reactively at some point, esp. once we |
| * can reliably get the src cti. |
| */ |
| |
| extern bool seen_Borland_SEH; /* set for callback.c */ |
| /* First read in the SEH frame, this is the observed structure and |
| * the first two fields (which are all that we use) are constrained by |
| * ntdll exception dispatcher (see EXCEPTION_REGISTRATION decleration |
| * in ntdll.h). */ |
| /* FIXME - could just use EXCEPTION_REGISTRATION period since all we |
| * need is the handler address and it would allow simpler curiosity |
| * [see 8181] below. If, as is expected, other options make use of |
| * this routine we'll probably have one shared get of the SEH frame |
| * anyways. */ |
| typedef struct _borland_seh_frame_t { |
| EXCEPTION_REGISTRATION reg; |
| reg_t xbp; /* not used by us */ |
| } borland_seh_frame_t; |
| borland_seh_frame_t frame; |
| /* will hold [b,e] or [x-1,y] */ |
| byte target_buf[RET_0_LENGTH + 2 * JMP_LONG_LENGTH]; |
| app_pc handler_jmp_target = NULL; |
| |
| if (!safe_read(value, sizeof(frame), &frame)) { |
| /* We already checked for NULL and -1 above so this should be |
| * a valid SEH frame. Xref 8181, borland_seh_frame_t struct is |
| * bigger then EXCEPTION_REGISTRATION (which is all that is |
| * required) so verify smaller size is readable. */ |
| ASSERT_CURIOSITY(sizeof(EXCEPTION_REGISTRATION) < sizeof(frame) && |
| safe_read(value, sizeof(EXCEPTION_REGISTRATION), |
| &frame)); |
| goto post_borland; |
| } |
| /* frame.reg.handler is c or y, read extra prior bytes to look for b */ |
| if (!safe_read((app_pc)frame.reg.handler - RET_0_LENGTH, |
| sizeof(target_buf), target_buf)) { |
| goto post_borland; |
| } |
| if (is_jmp_rel32(&target_buf[RET_0_LENGTH], (app_pc)frame.reg.handler, |
| &handler_jmp_target)) { |
| /* we have a possible match, now do the more expensive checking */ |
| app_pc base; |
| LOG(THREAD, LOG_INTERP, 3, |
| "Read possible borland SEH frame @"PFX"\n\t" |
| "next="PFX" handler="PFX" xbp="PFX"\n\t", |
| value, frame.reg.prev, frame.reg.handler, frame.xbp); |
| DOLOG(3, LOG_INTERP, { |
| dump_buffer_as_bytes(THREAD, target_buf, sizeof(target_buf), 0); |
| }); |
| /* optimize check if we've already processed this frame once */ |
| if ((DYNAMO_OPTION(rct_ind_jump) != OPTION_DISABLED || |
| DYNAMO_OPTION(rct_ind_call) != OPTION_DISABLED) && |
| rct_ind_branch_target_lookup(dcontext, |
| (app_pc)frame.reg.handler + |
| JMP_LONG_LENGTH)) { |
| /* we already processed this SEH frame once, this is prob. a |
| * frame pop, no need to continue */ |
| STATS_INC(num_borland_SEH_dup_frame); |
| LOG(THREAD, LOG_INTERP, 3, |
| "Processing duplicate Borland SEH frame\n"); |
| goto post_borland; |
| } |
| base = get_module_base((app_pc)frame.reg.handler); |
| STATS_INC(num_borland_SEH_initial_match); |
| /* Perf opt, we use the cheaper get_allocation_base() below instead |
| * of get_module_base(). We are checking the result against a |
| * known module base (base) so no need to duplicate the is module |
| * check. FIXME - the checks prob. aren't even necessary given the |
| * later is_in_code_section checks. Xref case 8171. */ |
| /* FIXME - (perf) we could cache the region from the first |
| * is_in_code_section() call and check against that before falling |
| * back on is_in_code_section in case of multiple code sections. */ |
| if (base != NULL && |
| get_allocation_base(handler_jmp_target) == base && |
| get_allocation_base(bb->instr_start) == base && |
| /* FIXME - with -rct_analyze_at_load we should be able to |
| * verify that frame->handler (x: c:) is on the .E/.F |
| * table already. We could also try to match known pre x: |
| * post y: patterns. */ |
| is_in_code_section(base, bb->instr_start, NULL, NULL) && |
| is_in_code_section(base, handler_jmp_target, NULL, NULL) && |
| is_range_in_code_section(base, (app_pc)frame.reg.handler, |
| (app_pc)frame.reg.handler+JMP_LONG_LENGTH+1, |
| NULL, NULL)) { |
| app_pc finally_target; |
| byte push_imm_buf[PUSH_IMM32_LENGTH]; |
| DEBUG_DECLARE(bool ok;) |
| /* we have a match, add handler+JMP_LONG_LENGTH (y: d:) |
| * to .E/.F table */ |
| STATS_INC(num_borland_SEH_try_match); |
| LOG(THREAD, LOG_INTERP, 2, |
| "Found Borland SEH frame adding "PFX" to .E/.F table\n", |
| (app_pc)frame.reg.handler+JMP_LONG_LENGTH); |
| if ((DYNAMO_OPTION(rct_ind_jump) != OPTION_DISABLED || |
| DYNAMO_OPTION(rct_ind_call) != OPTION_DISABLED)) { |
| mutex_lock(&rct_module_lock); |
| rct_add_valid_ind_branch_target(dcontext, |
| (app_pc)frame.reg.handler + |
| JMP_LONG_LENGTH); |
| mutex_unlock(&rct_module_lock); |
| } |
| /* we set this as an enabler for another exemption in |
| * callback .C, see notes there */ |
| if (!seen_Borland_SEH) { |
| SELF_UNPROTECT_DATASEC(DATASEC_RARELY_PROT); |
| seen_Borland_SEH = true; |
| SELF_PROTECT_DATASEC(DATASEC_RARELY_PROT); |
| } |
| /* case 8648: used to decide which RCT entries to persist */ |
| DEBUG_DECLARE(ok =) os_module_set_flag(base, MODULE_HAS_BORLAND_SEH); |
| ASSERT(ok); |
| /* look for .C addresses for try finally */ |
| if (target_buf[0] == RAW_OPCODE_ret && |
| (is_jmp_rel32(&target_buf[RET_0_LENGTH+JMP_LONG_LENGTH], |
| (app_pc)frame.reg.handler+JMP_LONG_LENGTH, |
| &finally_target) || |
| is_jmp_rel8(&target_buf[RET_0_LENGTH+JMP_LONG_LENGTH], |
| (app_pc)frame.reg.handler+JMP_LONG_LENGTH, |
| &finally_target)) && |
| safe_read(finally_target - sizeof(push_imm_buf), |
| sizeof(push_imm_buf), push_imm_buf) && |
| push_imm_buf[0] == RAW_OPCODE_push_imm32) { |
| app_pc push_val = *(app_pc *)&push_imm_buf[1]; |
| /* do a few more, expensive, sanity checks */ |
| /* FIXME - (perf) see earlier note on get_allocation_base() |
| * and is_in_code_section() usage. */ |
| if (get_allocation_base(finally_target) == base && |
| is_in_code_section(base, finally_target, NULL, NULL) && |
| get_allocation_base(push_val) == base && |
| /* FIXME - could also check that push_val is in |
| * .E/.F table, at least for -rct_analyze_at_load */ |
| is_in_code_section(base, push_val, NULL, NULL)) { |
| /* Full match, add push_val (e:) to the .C table |
| * and finally_target (a:) to the .E/.F table */ |
| STATS_INC(num_borland_SEH_finally_match); |
| LOG(THREAD, LOG_INTERP, 2, |
| "Found Borland SEH finally frame adding "PFX" to" |
| " .C table and "PFX" to .E/.F table\n", |
| push_val, finally_target); |
| if ((DYNAMO_OPTION(rct_ind_jump) != OPTION_DISABLED || |
| DYNAMO_OPTION(rct_ind_call) != OPTION_DISABLED)) { |
| mutex_lock(&rct_module_lock); |
| rct_add_valid_ind_branch_target(dcontext, |
| finally_target); |
| mutex_unlock(&rct_module_lock); |
| } |
| if (DYNAMO_OPTION(ret_after_call)) { |
| fragment_add_after_call(dcontext, push_val); |
| } |
| } else { |
| ASSERT_CURIOSITY(false && |
| "partial borland seh finally match"); |
| } |
| } |
| } |
| } |
| } |
| post_borland: |
| # endif /* RETURN_AFTER_CALL */ |
| return; |
| } |
| |
| /* helper routine for bb_process_fs_ref |
| * return true if bb should be continued, false if it shouldn't */ |
| static bool |
| bb_process_fs_ref_opnd(dcontext_t *dcontext, build_bb_t *bb, opnd_t dst, |
| bool *is_to_fs0) |
| { |
| ASSERT(is_to_fs0 != NULL); |
| *is_to_fs0 = false; |
| if (opnd_is_far_base_disp(dst) && /* FIXME - check size? */ |
| opnd_get_segment(dst) == SEG_FS) { |
| /* is a write to fs:[*] */ |
| if (bb->instr_start != bb->start_pc) { |
| /* Not first instruction in the bb, end bb before this |
| * instruction, so we can see it as the first instruction of a |
| * new bb where we can use the register state. */ |
| /* As is, always ending the bb here has a mixed effect on mem usage |
| * with default options. We do end up with slightly more bb's |
| * (and associated bookeeping costs), but frequently with MS dlls |
| * we reduce code cache dupliaction from jmp/call ellision |
| * (_SEH_[Pro,Epi]log otherwise ends up frequently duplicated for |
| * instance). */ |
| /* FIXME - we must stop the bb here even if there's already |
| * a bb built for the next instruction, as we have to have |
| * reproducible bb building for recreate app state. We should |
| * only get here through code duplication (typically jmp/call |
| * inlining, though can also be through multiple entry points into |
| * the same block of non cti instructions). */ |
| bb_stop_prior_to_instr(dcontext, bb, false/*not appended yet*/); |
| return false; /* stop bb */ |
| } |
| /* Only process the push if building a new bb for cache, can't check |
| * this any earlier since have to preserve bb building/ending behavior |
| * even when not for cache (for recreation etc.). */ |
| if (bb->app_interp) { |
| /* check is write to fs:[0] */ |
| /* XXX: this won't identify all memory references (need to switch to |
| * instr_compute_address_ex_priv() in order to handle VSIB) but the |
| * current usage is just to identify the Borland pattern so that's ok. |
| */ |
| if (opnd_compute_address_priv(dst, get_mcontext(dcontext)) == NULL) { |
| /* we have new mov to fs:[0] */ |
| *is_to_fs0 = true; |
| } |
| } |
| } |
| return true; |
| } |
| |
| /* While currently only used for Borland SEH exemptions, this analysis could |
| * also be helpful for other SEH tasks (xref case 5824). */ |
| static bool |
| bb_process_fs_ref(dcontext_t *dcontext, build_bb_t *bb) |
| { |
| ASSERT(DYNAMO_OPTION(process_SEH_push) && |
| instr_get_prefix_flag(bb->instr, PREFIX_SEG_FS)); |
| |
| /* If this is the first instruction of a bb for the cache we |
| * want to fully decode it, check if it's pushing an SEH frame |
| * and, if so, pass it to the SEH checking routines (currently |
| * just used for the Borland SEH rct handling). If this is not |
| * the first instruction of the bb then we want to stop the bb |
| * just before this instruction so that when we do process this |
| * instruction it will be the first in the bb (allowing us to |
| * use the register state). */ |
| if (!bb->full_decode) { |
| instr_decode(dcontext, bb->instr); |
| /* is possible this is an invalid instr that made it through the fast |
| * decode, FIXME is there a better way to handle this? */ |
| if (!instr_valid(bb->instr)) { |
| ASSERT_NOT_TESTED(); |
| if (bb->cur_pc == NULL) |
| bb->cur_pc = bb->instr_start; |
| bb_process_invalid_instr(dcontext, bb); |
| return false; /* stop bb */ |
| } |
| ASSERT(instr_get_prefix_flag(bb->instr, PREFIX_SEG_FS)); |
| } |
| /* expect to see only simple mov's to fs:[0] for new SEH frames |
| * FIXME - might we see other types we'd want to intercept? |
| * do we want to proccess pop instructions (usually just for removing |
| * a frame)? */ |
| if (instr_get_opcode(bb->instr) == OP_mov_st) { |
| bool is_to_fs0; |
| opnd_t dst = instr_get_dst(bb->instr, 0); |
| if (!bb_process_fs_ref_opnd(dcontext, bb, dst, &is_to_fs0)) |
| return false; /* end bb */ |
| /* Only process the push if building a new bb for cache, can't check |
| * this any earlier since have to preserve bb building/ending behavior |
| * even when not for cache (for recreation etc.). */ |
| if (bb->app_interp) { |
| if (is_to_fs0) { |
| ptr_int_t value = 0; |
| opnd_t src = instr_get_src(bb->instr, 0); |
| if (opnd_is_immed_int(src)) { |
| value = opnd_get_immed_int(src); |
| } else if (opnd_is_reg(src)) { |
| value = reg_get_value_priv(opnd_get_reg(src), get_mcontext(dcontext)); |
| } else { |
| ASSERT_NOT_REACHED(); |
| } |
| STATS_INC(num_SEH_pushes_processed); |
| LOG(THREAD, LOG_INTERP, 3, "found mov to fs:[0] @ "PFX"\n", |
| bb->instr_start); |
| bb_process_SEH_push(dcontext, bb, (void *)value); |
| } else { |
| STATS_INC(num_fs_movs_not_SEH); |
| } |
| } |
| } |
| # if defined(DEBUG) && defined(INTERNAL) |
| else if (INTERNAL_OPTION(check_for_SEH_push)) { |
| /* Debug build Sanity check that we aren't missing SEH frame pushes */ |
| int i; |
| int num_dsts = instr_num_dsts(bb->instr); |
| for (i = 0; i < num_dsts; i++) { |
| bool is_to_fs0; |
| opnd_t dst = instr_get_dst(bb->instr, i); |
| if (!bb_process_fs_ref_opnd(dcontext, bb, dst, &is_to_fs0)) { |
| STATS_INC(num_process_SEH_bb_early_terminate_debug); |
| return false; /* end bb */ |
| } |
| /* common case is pop instructions to fs:[0] when popping an |
| * SEH frame stored on tos */ |
| if (is_to_fs0) { |
| if (instr_get_opcode(bb->instr) == OP_pop) { |
| LOG(THREAD, LOG_INTERP, 4, |
| "found pop to fs:[0] @ "PFX"\n", bb->instr_start); |
| STATS_INC(num_process_SEH_pop_fs0); |
| } else { |
| /* an unexpected SEH frame push */ |
| LOG(THREAD, LOG_INTERP, 1, |
| "found unexpected write to fs:[0] @"PFX"\n", |
| bb->instr_start); |
| DOLOG(1, LOG_INTERP, { |
| loginst(dcontext, 1, bb->instr, ""); |
| }); |
| ASSERT_CURIOSITY(!is_to_fs0); |
| } |
| } |
| } |
| } |
| # endif |
| return true; /* continue bb */ |
| } |
| #endif /* win32 */ |
| |
| #if defined(UNIX) && !defined(DGC_DIAGNOSTICS) && defined(X86) |
| /* The basic strategy for mangling mov_seg instruction is: |
| * For mov fs/gs => reg/[mem], simply mangle it to write |
| * the app's fs/gs selector value into dst. |
| * For mov reg/mem => fs/gs, we make it as the first instruction |
| * of bb, and mark that bb not linked and has mov_seg instr, |
| * and change that instruction to be a nop. |
| * Then whenever before entering code cache, we check if that's the bb |
| * has mov_seg. If yes, we will update the information we maintained |
| * about the app's fs/gs. |
| */ |
| /* check if the basic block building should continue on a mov_seg instr. */ |
| static bool |
| bb_process_mov_seg(dcontext_t *dcontext, build_bb_t *bb) |
| { |
| reg_id_t seg; |
| |
| if (!INTERNAL_OPTION(mangle_app_seg)) |
| return true; /* continue bb */ |
| |
| /* if it is a read, we only need mangle the instruction. */ |
| ASSERT(instr_num_srcs(bb->instr) == 1); |
| if (opnd_is_reg(instr_get_src(bb->instr, 0)) && |
| reg_is_segment(opnd_get_reg(instr_get_src(bb->instr, 0)))) |
| return true; /* continue bb */ |
| |
| /* it is an update, we need set to be the first instr of bb */ |
| ASSERT(instr_num_dsts(bb->instr) == 1); |
| ASSERT(opnd_is_reg(instr_get_dst(bb->instr, 0))); |
| seg = opnd_get_reg(instr_get_dst(bb->instr, 0)); |
| ASSERT(reg_is_segment(seg)); |
| /* we only need handle fs/gs */ |
| if (seg != SEG_GS && seg != SEG_FS) |
| return true; /* continue bb */ |
| /* if no private loader, we only need mangle the non-tls seg */ |
| if (seg == IF_X64_ELSE(SEG_FS, SEG_FS) && |
| IF_CLIENT_INTERFACE_ELSE(!INTERNAL_OPTION(private_loader), true)) |
| return true; /* continue bb */ |
| |
| if (bb->instr_start == bb->start_pc) { |
| /* the first instruction, we can continue build bb. */ |
| /* this bb cannot be part of trace! */ |
| bb->flags |= FRAG_CANNOT_BE_TRACE; |
| bb->flags |= FRAG_HAS_MOV_SEG; |
| return true; /* continue bb */ |
| } |
| |
| LOG(THREAD, LOG_INTERP, 3, "ending bb before mov_seg\n"); |
| /* Set cur_pc back to the start of this instruction and delete this |
| * instruction from the bb ilist. |
| */ |
| bb->cur_pc = instr_get_raw_bits(bb->instr); |
| instrlist_remove(bb->ilist, bb->instr); |
| instr_destroy(dcontext, bb->instr); |
| /* Set instr to NULL in order to get translation of exit cti correct. */ |
| bb->instr = NULL; |
| /* this block must be the last one in a trace |
| * breaking traces here shouldn't be a perf issue b/c this is so rare, |
| * it should happen only once per thread on setting up tls. |
| */ |
| bb->flags |= FRAG_MUST_END_TRACE; |
| return false; /* stop bb here */ |
| } |
| #endif /* UNIX && X86 */ |
| |
| /* Returns true to indicate that ignorable syscall processing is completed |
| * with *continue_bb indicating if the bb should be continued or not. |
| * When returning false, continue_bb isn't pertinent. |
| */ |
| static bool |
| bb_process_ignorable_syscall(dcontext_t *dcontext, build_bb_t *bb, |
| int sysnum, bool *continue_bb) |
| { |
| STATS_INC(ignorable_syscalls); |
| BBPRINT(bb, 3, "found ignorable system call 0x%04x\n", sysnum); |
| #ifdef WINDOWS |
| if (get_syscall_method() != SYSCALL_METHOD_SYSENTER) { |
| DOCHECK(1, { |
| if (get_syscall_method() == SYSCALL_METHOD_WOW64) |
| ASSERT_NOT_TESTED(); |
| }); |
| if (continue_bb != NULL) |
| *continue_bb = true; |
| return true; |
| } |
| else { |
| /* Can we continue interp after the sysenter at the instruction |
| * after the call to sysenter? */ |
| instr_t *call = bb_verify_sysenter_pattern(dcontext, bb); |
| |
| if (call != NULL) { |
| /* If we're continuing code discovery at the after-call address, |
| * change the cur_pc to continue at the after-call addr. This is |
| * safe since the preceding call is in the fragment and |
| * %xsp/(%xsp) hasn't changed since the call. Obviously, we assume |
| * that the sysenter breaks control flow in fashion such any |
| * instruction that follows it isn't reached by DR. |
| */ |
| if (DYNAMO_OPTION(ignore_syscalls_follow_sysenter)) { |
| bb->cur_pc = |
| instr_get_raw_bits(call) + instr_length(dcontext, call); |
| if (continue_bb != NULL) |
| *continue_bb = true; |
| return true; |
| } |
| else { |
| /* End this bb now. We set the exit target so that control |
| * skips the vsyscall 'ret' that's executed natively after the |
| * syscall and ends up at the correct place. |
| */ |
| /* FIXME Assigning exit_target causes the fragment to end |
| * with a direct exit stub to the after-call address, which |
| * is fine. If bb->exit_target < bb->start_pc, the future |
| * fragment for exit_target is marked as a trace head which |
| * isn't intended. A potentially undesirable side effect |
| * is that exit_target's fragment can't be included in |
| * trace for start_pc. |
| */ |
| bb->exit_target = |
| instr_get_raw_bits(call) + instr_length(dcontext, call); |
| if (continue_bb != NULL) |
| *continue_bb = false; |
| return true; |
| } |
| } |
| STATS_INC(ignorable_syscalls_failed_sysenter_pattern); |
| /* Pattern match failed but the syscall is ignorable so maybe we |
| * can try shared syscall? */ |
| /* Decrement the stat to prevent double counting. We rarely expect to hit |
| * this case. */ |
| STATS_DEC(ignorable_syscalls); |
| return false; |
| } |
| #elif defined (MACOS) |
| if (instr_get_opcode(bb->instr) == OP_sysenter) { |
| /* To continue after the sysenter we need to go to the ret ibl, as user-mode |
| * sysenter wrappers put the retaddr into edx as the post-kernel continuation. |
| */ |
| bb->exit_type |= LINK_INDIRECT|LINK_RETURN; |
| bb->ibl_branch_type = IBL_RETURN; |
| bb->exit_target = get_ibl_routine(dcontext, get_ibl_entry_type(bb->exit_type), |
| DEFAULT_IBL_BB(), bb->ibl_branch_type); |
| LOG(THREAD, LOG_INTERP, 4, "sysenter exit target = "PFX"\n", bb->exit_target); |
| if (continue_bb != NULL) |
| *continue_bb = false; |
| } else if (continue_bb != NULL) |
| *continue_bb = true; |
| return true; |
| #else |
| if (continue_bb != NULL) |
| *continue_bb = true; |
| return true; |
| #endif |
| } |
| |
| #ifdef WINDOWS |
| /* Process a syscall that is executed via shared syscall. */ |
| static void |
| bb_process_shared_syscall(dcontext_t *dcontext, build_bb_t *bb, int sysnum) |
| { |
| ASSERT(DYNAMO_OPTION(shared_syscalls)); |
| DODEBUG({ |
| if (ignorable_system_call(sysnum, bb->instr, NULL)) |
| STATS_INC(ignorable_syscalls); |
| else |
| STATS_INC(optimizable_syscalls); |
| }); |
| BBPRINT(bb, 3, "found %soptimizable system call 0x%04x\n", |
| INTERNAL_OPTION(shared_eq_ignore) ? "ignorable-" : "", |
| sysnum); |
| |
| LOG(THREAD, LOG_INTERP, 3, |
| "ending bb at syscall & NOT removing the interrupt itself\n"); |
| |
| /* Mark the instruction as pointing to shared syscall */ |
| bb->instr->flags |= INSTR_SHARED_SYSCALL; |
| /* this block must be the last one in a trace */ |
| bb->flags |= FRAG_MUST_END_TRACE; |
| /* we redirect all optimizable syscalls to a single shared piece of code. |
| * Once a fragment reaches the shared syscall code, it can be safely |
| * deleted, for example, if the thread is interrupted for a callback and |
| * DR needs to delete fragments for cache management. |
| * |
| * Note that w/shared syscall, syscalls can be executed from TWO |
| * places -- shared_syscall and do_syscall. |
| */ |
| bb->exit_target = shared_syscall_routine(dcontext); |
| /* make sure translation for ending jmp ends up right, mangle will |
| * remove this instruction, so set to NULL so translation does the |
| * right thing */ |
| bb->instr = NULL; |
| } |
| #endif |
| |
| static bool |
| bb_process_non_ignorable_syscall(dcontext_t *dcontext, build_bb_t *bb, |
| int sysnum) |
| { |
| BBPRINT(bb, 3, "found non-ignorable system call 0x%04x\n", sysnum); |
| STATS_INC(non_ignorable_syscalls); |
| bb->exit_type |= LINK_NI_SYSCALL; |
| /* destroy the interrupt instruction */ |
| LOG(THREAD, LOG_INTERP, 3, |
| "ending bb at syscall & removing the interrupt itself\n"); |
| /* Indicate that this is a non-ignorable syscall so mangle will remove */ |
| /* FIXME i#1551: maybe we should union int80 and svc as both are inline syscall? */ |
| #ifdef UNIX |
| if (instr_get_opcode(bb->instr) == IF_X86_ELSE(OP_int, OP_svc)) { |
| # if defined(MACOS) && defined(X86) |
| int num = instr_get_interrupt_number(bb->instr); |
| if (num == 0x81 || num == 0x82) { |
| bb->exit_type |= LINK_SPECIAL_EXIT; |
| bb->instr->flags |= INSTR_BRANCH_SPECIAL_EXIT; |
| } else { |
| ASSERT(num == 0x80); |
| # endif /* MACOS && X86 */ |
| bb->exit_type |= LINK_NI_SYSCALL_INT; |
| bb->instr->flags |= INSTR_NI_SYSCALL_INT; |
| # ifdef MACOS |
| } |
| # endif |
| } else |
| #endif |
| bb->instr->flags |= INSTR_NI_SYSCALL; |
| /* Set instr to NULL in order to get translation of exit cti correct. */ |
| bb->instr = NULL; |
| /* this block must be the last one in a trace */ |
| bb->flags |= FRAG_MUST_END_TRACE; |
| return false; /* end bb now */ |
| } |
| |
| /* returns true to indicate "continue bb" and false to indicate "end bb now" */ |
| static inline bool |
| bb_process_syscall(dcontext_t *dcontext, build_bb_t *bb) |
| { |
| int sysnum; |
| #ifdef CLIENT_INTERFACE |
| /* PR 307284: for simplicity do syscall/int processing post-client. |
| * We give up on inlining but we can still use ignorable/shared syscalls |
| * and trace continuation. |
| */ |
| if (bb->pass_to_client && !bb->post_client) |
| return false; |
| #endif |
| #ifdef DGC_DIAGNOSTICS |
| if (TEST(FRAG_DYNGEN, bb->flags) && !is_dyngen_vsyscall(bb->instr_start)) { |
| LOG(THREAD, LOG_INTERP, 1, "WARNING: syscall @ "PFX" in dyngen code!\n", |
| bb->instr_start); |
| } |
| #endif |
| BBPRINT(bb, 4, "interp: syscall @ "PFX"\n", bb->instr_start); |
| check_syscall_method(dcontext, bb->instr); |
| bb->flags |= FRAG_HAS_SYSCALL; |
| /* if we can identify syscall number and it is an ignorable syscall, |
| * we let bb keep going, else we end bb and flag it |
| */ |
| sysnum = find_syscall_num(dcontext, bb->ilist, bb->instr); |
| #ifdef VMX86_SERVER |
| DOSTATS({ |
| if (instr_get_opcode(bb->instr) == OP_int && |
| instr_get_interrupt_number(bb->instr) == VMKUW_SYSCALL_GATEWAY) { |
| STATS_INC(vmkuw_syscall_sites); |
| LOG(THREAD, LOG_SYSCALLS, 2, "vmkuw system call site: #=%d\n", sysnum); |
| } |
| }); |
| #endif |
| BBPRINT(bb, 3, "syscall # is %d\n", sysnum); |
| #ifdef CLIENT_INTERFACE |
| if (sysnum != -1 && instrument_filter_syscall(dcontext, sysnum)) { |
| BBPRINT(bb, 3, "client asking to intercept => pretending syscall # %d is -1\n", |
| sysnum); |
| sysnum = -1; |
| } |
| #endif |
| if (sysnum != -1 && |
| DYNAMO_OPTION(ignore_syscalls) && |
| ignorable_system_call(sysnum, bb->instr, NULL) |
| #ifdef X86 |
| /* PR 288101: On Linux we do not yet support inlined sysenter instrs as we |
| * do not have in-cache support for the post-sysenter continuation: we rely |
| * for now on very simple sysenter handling where dispatch uses asynch_target |
| * to know where to go next. |
| */ |
| IF_LINUX(&& instr_get_opcode(bb->instr) != OP_sysenter) |
| #endif /* X86 */ |
| ) { |
| |
| bool continue_bb; |
| |
| if (bb_process_ignorable_syscall(dcontext, bb, sysnum, &continue_bb)) { |
| if (!DYNAMO_OPTION(inline_ignored_syscalls)) |
| continue_bb = false; |
| return continue_bb; |
| } |
| } |
| #ifdef WINDOWS |
| if (sysnum != -1 && DYNAMO_OPTION(shared_syscalls) && |
| optimizable_system_call(sysnum)) { |
| bb_process_shared_syscall(dcontext, bb, sysnum); |
| return false; |
| } |
| #endif |
| |
| /* Fall thru and handle as a non-ignorable syscall. */ |
| return bb_process_non_ignorable_syscall(dcontext, bb, sysnum); |
| } |
| |
| /* Case 3922: for wow64 we treat "call *fs:0xc0" as a system call. |
| * Only sets continue_bb if it returns true. |
| */ |
| static bool |
| bb_process_indcall_syscall(dcontext_t *dcontext, build_bb_t *bb, |
| bool *continue_bb) |
| { |
| ASSERT(continue_bb != NULL); |
| #ifdef WINDOWS |
| if (instr_is_wow64_syscall(bb->instr)) { |
| /* we could check the preceding instrs but we don't bother */ |
| *continue_bb = bb_process_syscall(dcontext, bb); |
| return true; |
| } |
| #endif |
| return false; |
| } |
| |
| /* returns true to indicate "continue bb" and false to indicate "end bb now" */ |
| static inline bool |
| bb_process_interrupt(dcontext_t *dcontext, build_bb_t *bb) |
| { |
| #if defined(DEBUG) || defined(INTERNAL) || defined(WINDOWS) |
| int num = instr_get_interrupt_number(bb->instr); |
| #endif |
| #ifdef CLIENT_INTERFACE |
| /* PR 307284: for simplicity do syscall/int processing post-client. |
| * We give up on inlining but we can still use ignorable/shared syscalls |
| * and trace continuation. |
| * PR 550752: we cannot end at int 0x2d: we live w/ client consequences |
| */ |
| if (bb->pass_to_client && !bb->post_client IF_WINDOWS(&& num != 0x2d)) |
| return false; |
| #endif |
| BBPRINT(bb, 3, "int 0x%x @ "PFX"\n", num, bb->instr_start); |
| #ifdef WINDOWS |
| if (num == 0x2b) { |
| /* interrupt 0x2B signals return from callback */ |
| /* end block here and come back to dynamo to perform interrupt */ |
| bb->exit_type |= LINK_CALLBACK_RETURN; |
| BBPRINT(bb, 3, "ending bb at cb ret & removing the interrupt itself\n"); |
| /* Set instr to NULL in order to get translation of exit cti |
| * correct. mangle will destroy the instruction */ |
| bb->instr = NULL; |
| bb->flags |= FRAG_MUST_END_TRACE; |
| STATS_INC(num_int2b); |
| return false; |
| } else { |
| SYSLOG_INTERNAL_INFO_ONCE("non-syscall, non-int2b 0x%x @ "PFX" from "PFX, |
| num, bb->instr_start, bb->start_pc); |
| } |
| #endif /* WINDOWS */ |
| return true; |
| } |
| |
| /* If the current instr in the BB is an indirect call that can be converted into a |
| * direct call, process it and return true, else, return false. |
| * FIXME PR 288327: put in linux call* to vsyscall page |
| */ |
| static bool |
| bb_process_convertible_indcall(dcontext_t *dcontext, build_bb_t *bb) |
| { |
| #ifdef X86 |
| /* We perform several levels of checking, each increasingly more stringent |
| * and expensive, with a false return should any fail. |
| */ |
| instr_t *instr; |
| opnd_t src0; |
| instr_t *call_instr; |
| int call_src_reg; |
| app_pc callee; |
| bool vsyscall = false; |
| |
| /* Check if this BB can be extended and the instr is a (near) indirect call */ |
| if (instr_get_opcode(bb->instr) != OP_call_ind) |
| return false; |
| |
| /* Check if we have a "mov <imm> -> %reg; call %reg" or a |
| * "mov <imm> -> %reg; call (%reg)" pair. First check for the call. |
| */ |
| /* The 'if' conditions are broken up to make the code more readable |
| * while #ifdef-ing the WINDOWS case. It's still ugly though. |
| */ |
| instr = bb->instr; |
| if (!( |
| # ifdef WINDOWS |
| /* Match 'call (%xdx)' for a post-SP2 indirect call to sysenter. */ |
| (opnd_is_near_base_disp(instr_get_src(instr, 0)) && |
| opnd_get_base(instr_get_src(instr, 0)) == REG_XDX && |
| opnd_get_disp(instr_get_src(instr, 0)) == 0) || |
| # endif |
| /* Match 'call %reg'. */ |
| opnd_is_reg(instr_get_src(instr, 0)))) |
| return false; |
| |
| /* If there's no CTI in the BB, we can check if there are 5+ preceding |
| * bytes and if they could hold a "mov" instruction. |
| */ |
| if (!TEST(FRAG_HAS_DIRECT_CTI, bb->flags) && |
| bb->instr_start - 5 >= bb->start_pc) { |
| |
| byte opcode = *((byte *) bb->instr_start - 5); |
| |
| /* Check the opcode. Do we see a "mov ... -> %reg"? Valid opcodes are in |
| * the 0xb8-0xbf range (Intel IA-32 ISA ref, v.2) and specify the |
| * destination register, i.e., 0xb8 means that %xax is the destination. |
| */ |
| if (opcode < 0xb8 || opcode > 0xbf) |
| return false; |
| } |
| |
| /* Check the previous instruction -- is it really a "mov"? */ |
| src0 = instr_get_src(instr, 0); |
| call_instr = instr; |
| instr = instr_get_prev_expanded(dcontext, bb->ilist, bb->instr); |
| call_src_reg = opnd_is_near_base_disp(src0) ? opnd_get_base(src0) : |
| opnd_get_reg(src0); |
| if (instr == NULL || instr_get_opcode(instr) != OP_mov_imm || |
| opnd_get_reg(instr_get_dst(instr, 0)) != call_src_reg) |
| return false; |
| |
| /* For the general case, we don't try to optimize a call |
| * thru memory -- just check that the call uses a register. |
| */ |
| callee = NULL; |
| if (opnd_is_reg(src0)) { |
| /* Extract the target address. */ |
| callee = (app_pc) opnd_get_immed_int(instr_get_src(instr, 0)); |
| # ifdef WINDOWS |
| # ifdef PROGRAM_SHEPHERDING |
| /* FIXME - is checking for on vsyscall page better or is checking == to |
| * VSYSCALL_BOOTSTRAP_ADDR? Both are hacky. */ |
| if (is_dyngen_vsyscall((app_pc)opnd_get_immed_int(instr_get_src(instr, 0)))) { |
| LOG(THREAD, LOG_INTERP, 4, "Pre-SP2 style indirect call " |
| "to sysenter found at "PFX"\n", bb->instr_start); |
| STATS_INC(num_sysenter_indcalls); |
| vsyscall = true; |
| ASSERT(opnd_get_immed_int(instr_get_src(instr, 0)) == |
| (ptr_int_t)VSYSCALL_BOOTSTRAP_ADDR); |
| ASSERT(!use_ki_syscall_routines()); /* double check our determination */ |
| } |
| else |
| # endif |
| # endif |
| STATS_INC(num_convertible_indcalls); |
| } |
| # ifdef WINDOWS |
| /* Match the "call (%xdx)" to sysenter case for SP2-patched os's. Memory at |
| * address VSYSCALL_BOOTSTRAP_ADDR (0x7ffe0300) holds the address of |
| * KiFastSystemCall or (FIXME - not handled) on older platforms KiIntSystemCall. |
| * FIXME It's unsavory to hard-code 0x7ffe0300, but the constant has little |
| * context in an SP2 os. It's a hold-over from pre-SP2. |
| */ |
| else if (get_syscall_method() == SYSCALL_METHOD_SYSENTER |
| && call_src_reg == REG_XDX |
| && opnd_get_immed_int(instr_get_src(instr, 0)) == |
| (ptr_int_t)VSYSCALL_BOOTSTRAP_ADDR) { |
| /* Extract the target address. We expect that the memory read using the |
| * value in the immediate field is ok as it's the vsyscall page |
| * which 1) cannot be made unreadable and 2) cannot be made writable so |
| * the stored value will not change. Of course, it's possible that the |
| * os could change the page contents. |
| */ |
| callee = (app_pc) |
| *((ptr_uint_t *) opnd_get_immed_int(instr_get_src(instr, 0))); |
| if (get_app_sysenter_addr() == NULL) { |
| /* For the first call* we've yet to decode an app syscall, yet we |
| * cannot have later recreations have differing behavior, so we must |
| * handle that case (even though it doesn't matter performance-wise |
| * as the first call* is usually in runtime init code that's |
| * executed once). So we do a raw byte compare to: |
| * ntdll!KiFastSystemCall: |
| * 7c82ed50 8bd4 mov xdx,xsp |
| * 7c82ed52 0f34 sysenter |
| */ |
| uint raw; |
| if (!safe_read(callee, sizeof(raw), &raw) || raw != 0x340fd48b) |
| callee = NULL; |
| } else { |
| /* The callee should be a 2 byte "mov %xsp -> %xdx" followed by the |
| * sysenter -- check the sysenter's address as 2 bytes past the callee. |
| */ |
| if (callee + 2 != get_app_sysenter_addr()) |
| callee = NULL; |
| } |
| vsyscall = (callee != NULL); |
| ASSERT(use_ki_syscall_routines()); /* double check our determination */ |
| DODEBUG({ |
| if (callee == NULL) |
| ASSERT_CURIOSITY(false && "call* to vsyscall unexpected mismatch"); |
| else { |
| LOG(THREAD, LOG_INTERP, 4, "Post-SP2 style indirect call " |
| "to sysenter found at "PFX"\n", bb->instr_start); |
| STATS_INC(num_sysenter_indcalls); |
| } |
| }); |
| } |
| # endif |
| |
| /* Check if register dataflow matched and we were able to extract |
| * the callee address. |
| */ |
| if (callee == NULL) |
| return false; |
| |
| if (vsyscall) { |
| /* Case 8917: abandon coarse-grainness in favor of performance */ |
| bb->flags &= ~FRAG_COARSE_GRAIN; |
| STATS_INC(coarse_prevent_indcall); |
| } |
| |
| LOG(THREAD, LOG_INTERP, 4, "interp: possible convertible" |
| " indirect call from "PFX" to "PFX"\n", |
| bb->instr_start, callee); |
| |
| if (must_not_be_inlined(callee)) { |
| BBPRINT(bb, 3, " NOT inlining indirect call to "PFX"\n", callee); |
| /* Case 8711: coarse-grain can't handle non-exit cti */ |
| bb->flags &= ~FRAG_COARSE_GRAIN; |
| STATS_INC(coarse_prevent_cti); |
| ASSERT_CURIOSITY_ONCE(!vsyscall && "leaving call* to vsyscall"); |
| /* no need for bb_add_native_direct_xfer() b/c it's already indirect */ |
| return true; /* keep bb going, w/o inlining call */ |
| } |
| |
| if (bb->follow_direct && |
| bb->num_elide_call < DYNAMO_OPTION(max_elide_call) && |
| (DYNAMO_OPTION(elide_back_calls) || bb->cur_pc <= callee)) { |
| /* FIXME This is identical to the code for evaluating a |
| * direct call's callee. If such code appears in another |
| * (3rd) place, we should outline it. |
| * FIXME: use follow_direct_call() |
| */ |
| if (vsyscall) { |
| /* As a flag to allow our xfer from now-non-coarse to coarse |
| * (for vsyscall-in-ntdll) we pre-emptively mark as has-syscall. |
| */ |
| ASSERT(!TEST(FRAG_HAS_SYSCALL, bb->flags)); |
| bb->flags |= FRAG_HAS_SYSCALL; |
| } |
| if (check_new_page_jmp(dcontext, bb, callee)) { |
| if (vsyscall) /* Restore */ |
| bb->flags &= ~FRAG_HAS_SYSCALL; |
| bb->num_elide_call++; |
| STATS_INC(total_elided_calls); |
| STATS_TRACK_MAX(max_elided_calls, bb->num_elide_call); |
| bb->cur_pc = callee; |
| /* FIXME: when using follow_direct_call don't forget to set this */ |
| call_instr->flags |= INSTR_IND_CALL_DIRECT; |
| BBPRINT(bb, 4, " continuing in callee at "PFX"\n", bb->cur_pc); |
| return true; /* keep bb going */ |
| } |
| if (vsyscall) { |
| /* Case 8917: Restore, just in case, though we certainly expect to have |
| * this flag set as soon as we decode a few more instrs and hit the |
| * syscall itself -- but for pre-sp2 we currently could be elsewhere on |
| * the same page, so let's be safe here. |
| */ |
| bb->flags &= ~FRAG_HAS_SYSCALL; |
| } |
| } |
| /* FIXME: we're also not converting to a direct call - was this intended? */ |
| BBPRINT(bb, 3, " NOT following indirect call from "PFX" to "PFX"\n", |
| bb->instr_start, callee); |
| DODEBUG({ |
| if (vsyscall) { |
| DO_ONCE({ |
| /* Case 9095: don't complain so loudly if user asked for no elision */ |
| if (DYNAMO_OPTION(max_elide_call) <= 2) |
| SYSLOG_INTERNAL_WARNING("leaving call* to vsyscall"); |
| else |
| ASSERT_CURIOSITY(false && "leaving call* to vsyscall"); |
| }); |
| } |
| });; |
| #elif defined(ARM) |
| /* FIXME i#1551: NYI on ARM */ |
| ASSERT_NOT_IMPLEMENTED(false); |
| #endif /* X86 */ |
| return false; /* stop bb */ |
| } |
| |
| /* if we make the IAT sections unreadable we will need to map to proper location */ |
| static inline app_pc |
| read_from_IAT(app_pc iat_reference) |
| { |
| /* FIXME: we should have looked up where the real IAT should be at |
| * the time of checking whether is_in_IAT |
| */ |
| return *(app_pc*) iat_reference; |
| } |
| |
| #ifdef X86 |
| /* returns whether target is an IAT of a module that we convert. Note |
| * users still have to check the referred to value to verify targeting |
| * a native module. |
| */ |
| static bool |
| is_targeting_convertible_IAT(dcontext_t *dcontext, instr_t *instr, |
| app_pc *iat_reference /* OUT */) |
| { |
| /* FIXME: we could give up on optimizing a particular module, |
| * if too many writes to its IAT are found, |
| * even 1 may be too much to handle! |
| */ |
| |
| /* We only allow constant address, |
| * any registers used for effective address calculation |
| * can not be guaranteed to be constant dynamically. |
| */ |
| /* FIXME: yet a 'call %reg' if that value is an export would be a |
| * good sign that we should go backwards and look for a possible |
| * mov IAT[func] -> %reg and then optimize that as well - case 1948 |
| */ |
| |
| app_pc memory_reference = NULL; |
| opnd_t opnd = instr_get_target(instr); |
| |
| LOG(THREAD, LOG_INTERP, 4, "is_targeting_convertible_IAT: "); |
| |
| /* A typical example of a proper call |
| * ff 15 8810807c call dword ptr [kernel32+0x1088 (7c801088)] |
| * where |
| * [7c801088] = 7c90f04c ntdll!RtlAnsiStringToUnicodeString |
| * |
| * The ModR/M byte for a displacement only with no SIB should be |
| * 15 for CALL, 25 for JMP, (no far versions for IAT) |
| */ |
| if (opnd_is_near_base_disp(opnd)) { |
| /* FIXME PR 253930: pattern-match x64 IAT calls */ |
| IF_X64(ASSERT_NOT_IMPLEMENTED(false)); |
| memory_reference = (app_pc)(ptr_uint_t)opnd_get_disp(opnd); |
| |
| /* now should check all other fields */ |
| if (opnd_get_base(opnd) != REG_NULL || |
| opnd_get_index(opnd) != REG_NULL) { |
| /* this is not a pure memory reference, can't be IAT */ |
| return false; |
| } |
| ASSERT(opnd_get_scale(opnd) == 0); |
| } else { |
| return false; |
| } |
| |
| LOG(THREAD, LOG_INTERP, 3, "is_targeting_convertible_IAT: memory_reference "PFX"\n", |
| memory_reference); |
| |
| /* FIXME: if we'd need some more additional structures those can |
| * be looked up in a separate hashtable based on the IAT base, or |
| * we'd have to extend the vmareas with custom fields |
| */ |
| ASSERT(DYNAMO_OPTION(IAT_convert)); |
| if (vmvector_overlap(IAT_areas, memory_reference, memory_reference+1)) { |
| /* IAT has to be in the same module as current instruction, |
| * but even in the unlikely reference by address from another |
| * module there is really no problem, so not worth checking |
| */ |
| ASSERT_CURIOSITY(get_module_base(instr->bytes) == get_module_base(memory_reference)); |
| |
| /* FIXME: now that we know it is in IAT/GOT, |
| * we have to READ the contents and return that |
| * safely to the caller so they can convert accordingly |
| */ |
| |
| /* FIXME: we would want to add the IAT section to the vmareas |
| * of a region that has a converted block. Then on a write to |
| * IAT we can flush efficiently only blocks affected by a |
| * particular module, for a first hack though flushing |
| * everything on a hooker will do. |
| */ |
| *iat_reference = memory_reference; |
| return true; |
| } else { |
| /* plain global function |
| * e.g. ntdll!RtlUnicodeStringToAnsiString+0x4c: |
| * ff15c009917c call dword ptr [ntdll!RtlAllocateStringRoutine (7c9109c0)] |
| */ |
| return false; |
| } |
| } |
| #endif /* X86 */ |
| |
| /* If the current instr in the BB is an indirect call through IAT that |
| * can be converted into a direct call, process it and return true, |
| * else, return false. |
| */ |
| static bool |
| bb_process_IAT_convertible_indjmp(dcontext_t *dcontext, build_bb_t *bb, |
| bool *elide_continue) |
| { |
| #ifdef X86 |
| app_pc iat_reference; |
| app_pc target; |
| ASSERT(DYNAMO_OPTION(IAT_convert)); |
| |
| /* Check if the instr is a (near) indirect jump */ |
| if (instr_get_opcode(bb->instr) != OP_jmp_ind) { |
| ASSERT_CURIOSITY(false && "far ind jump"); |
| return false; /* not matching, stop bb */ |
| } |
| |
| if (!is_targeting_convertible_IAT(dcontext, bb->instr, |
| &iat_reference)) { |
| DOSTATS({ |
| if (EXIT_IS_IND_JMP_PLT(bb->exit_type)) { |
| /* see how often we mark as likely a PLT a JMP which in |
| * fact is not going through IAT |
| */ |
| STATS_INC(num_indirect_jumps_PLT_not_IAT); |
| LOG(THREAD, LOG_INTERP, 3, |
| "bb_process_IAT_convertible_indjmp: indirect jmp not PLT instr=" |
| PFX"\n", bb->instr->bytes); |
| } |
| }); |
| |
| return false; /* not matching, stop bb */ |
| } |
| |
| target = read_from_IAT(iat_reference); |
| |
| DOLOG(4, LOG_INTERP, { |
| char name[MAXIMUM_SYMBOL_LENGTH]; |
| print_symbolic_address(target, name, sizeof(name), false); |
| LOG(THREAD, LOG_INTERP, 4, |
| "bb_process_IAT_convertible_indjmp: target="PFX" %s\n", target, name); |
| }); |
| |
| STATS_INC(num_indirect_jumps_IAT); |
| DOSTATS({ |
| if (!EXIT_IS_IND_JMP_PLT(bb->exit_type)) { |
| /* count any other known uses for an indirect jump to go |
| * through the IAT other than PLT uses, although a block |
| * reaching max_elide_call would prevent the above |
| * match */ |
| STATS_INC(num_indirect_jumps_IAT_not_PLT); |
| /* FIXME: case 6459 for further inquiry */ |
| LOG(THREAD, LOG_INTERP, 4, |
| "bb_process_IAT_convertible_indjmp: indirect jmp not PLT target="PFX"\n", |
| target); |
| } |
| }); |
| |
| if (must_not_be_elided(target)) { |
| ASSERT_NOT_TESTED(); |
| BBPRINT(bb, 3, |
| " NOT inlining indirect jmp to must_not_be_elided "PFX"\n", target); |
| return false; /* do not convert indirect jump, will stop bb */ |
| } |
| |
| /* Verify not targeting native exec DLLs, note that the IATs of |
| * any module may have imported a native DLL. Note it may be |
| * possible to optimize with a range check on IAT subregions, but |
| * this check isn't much slower. |
| */ |
| |
| /* IAT_elide should definitely not touch native_exec modules. |
| * |
| * FIXME: we also prevent IAT_convert from optimizing imports in |
| * native_exec_list DLLs, although we could let that convert to a |
| * direct jump and require native_exec_dircalls to be always on to |
| * intercept those jmps. |
| */ |
| if (DYNAMO_OPTION(native_exec) && |
| is_native_pc(target)) { |
| BBPRINT(bb, 3, |
| " NOT inlining indirect jump to native exec module "PFX"\n", target); |
| STATS_INC(num_indirect_jumps_IAT_native); |
| return false; /* do not convert indirect jump, stop bb */ |
| } |
| |
| /* mangle mostly as such as direct jumps would be mangled in |
| * bb_process_ubr(dcontext, bb) but note bb->instr has already |
| * been appended so has to reverse some of its actions |
| */ |
| |
| /* pretend never saw an indirect JMP, we'll either add a new |
| direct JMP or we'll just continue in target */ |
| instrlist_remove(bb->ilist, bb->instr); /* bb->instr has been appended already */ |
| instr_destroy(dcontext, bb->instr); |
| bb->instr = NULL; |
| |
| if (DYNAMO_OPTION(IAT_elide)) { |
| /* try to elide just as a direct jmp would have been elided */ |
| |
| /* We could have used follow_direct_call instead since |
| * commonly this really is a disguised CALL*. Yet for PLT use |
| * of the form of CALL PLT[foo]; JMP* IAT[foo] we would have |
| * already counted the CALL. If we have tail call elimination |
| * that converts a CALL* into a JMP* it is also OK to treat as |
| * a JMP instead of a CALL just as if sharing tails. |
| */ |
| if (follow_direct_jump(dcontext, bb, target)) { |
| LOG(THREAD, LOG_INTERP, 4, |
| "bb_process_IAT_convertible_indjmp: eliding jmp* target="PFX"\n", |
| target); |
| |
| STATS_INC(num_indirect_jumps_IAT_elided); |
| *elide_continue = true; /* do not stop bb */ |
| return true; /* converted indirect to direct */ |
| } |
| } |
| /* otherwise convert to direct jump without eliding */ |
| |
| /* we set bb->instr to NULL so unlike bb_process_ubr |
| * we get the final exit_target added by build_bb_ilist |
| * FIXME: case 85: which will work only when we're using bb->mangle_ilist |
| * FIXME: what are callers supposed to see when we do NOT mangle? |
| */ |
| |
| LOG(THREAD, LOG_INTERP, 4, |
| "bb_process_IAT_convertible_indjmp: converting jmp* target="PFX"\n", |
| target); |
| |
| STATS_INC(num_indirect_jumps_IAT_converted); |
| /* end basic block with a direct JMP to target */ |
| bb->exit_target = target; |
| *elide_continue = false; /* matching, but should stop bb */ |
| return true; /* matching */ |
| #elif defined(ARM) |
| /* FIXME i#1551: NYI on ARM */ |
| ASSERT_NOT_IMPLEMENTED(false); |
| return false; |
| #endif /* X86/ARM */ |
| } |
| |
| /* Returns true if the current instr in the BB is an indirect call |
| * through IAT that can be converted into a direct call, process it |
| * and sets elide_continue. Otherwise function return false. |
| * OUT elide_continue is set when bb building should continue in target, |
| * and not set when bb building should be stopped. |
| */ |
| bool |
| bb_process_IAT_convertible_indcall(dcontext_t *dcontext, build_bb_t *bb, |
| bool *elide_continue) |
| { |
| #ifdef X86 |
| app_pc iat_reference; |
| app_pc target; |
| ASSERT(DYNAMO_OPTION(IAT_convert)); |
| |
| /* FIXME: the code structure is the same as |
| * bb_process_IAT_convertible_indjmp, could fuse the two |
| */ |
| |
| /* We perform several levels of checking, each increasingly more stringent |
| * and expensive, with a false return should any fail. |
| */ |
| |
| /* Check if the instr is a (near) indirect call */ |
| if (instr_get_opcode(bb->instr) != OP_call_ind) { |
| ASSERT_CURIOSITY(false && "far call"); |
| return false; /* not matching, stop bb */ |
| } |
| |
| if (!is_targeting_convertible_IAT(dcontext, bb->instr, |
| &iat_reference)) { |
| return false; /* not matching, stop bb */ |
| } |
| target = read_from_IAT(iat_reference); |
| DOLOG(4, LOG_INTERP, { |
| char name[MAXIMUM_SYMBOL_LENGTH]; |
| print_symbolic_address(target, name, sizeof(name), false); |
| LOG(THREAD, LOG_INTERP, 4, |
| "bb_process_IAT_convertible_indcall: target="PFX" %s\n", target, name); |
| }); |
| STATS_INC(num_indirect_calls_IAT); |
| |
| /* mangle mostly as such as direct calls are mangled with |
| * bb_process_call_direct(dcontext, bb) |
| */ |
| |
| if (must_not_be_inlined(target)) { |
| ASSERT_NOT_TESTED(); |
| BBPRINT(bb, 3, |
| " NOT inlining indirect call to must_not_be_inlined "PFX"\n", target); |
| return false; /* do not convert indirect call, stop bb */ |
| } |
| |
| /* Verify not targeting native exec DLLs, note that the IATs of |
| * any module may have imported a native DLL. Note it may be |
| * possible to optimize with a range check on IAT subregions, but |
| * this check isn't much slower. |
| */ |
| if (DYNAMO_OPTION(native_exec) && |
| is_native_pc(target)) { |
| BBPRINT(bb, 3, |
| " NOT inlining indirect call to native exec module "PFX"\n", target); |
| STATS_INC(num_indirect_calls_IAT_native); |
| return false; /* do not convert indirect call, stop bb */ |
| } |
| |
| /* mangle_indirect_call and calculate return address as of |
| * bb->instr and will remove bb->instr |
| * FIXME: it would have been |
| * better to replace in instrlist with a direct call and have |
| * mangle_{in,}direct_call use other than the raw bytes, but this for now does the job. |
| */ |
| bb->instr->flags |= INSTR_IND_CALL_DIRECT; |
| |
| if (DYNAMO_OPTION(IAT_elide)) { |
| /* try to elide just as a direct call would have been elided */ |
| if (follow_direct_call(dcontext, bb, target)) { |
| LOG(THREAD, LOG_INTERP, 4, |
| "bb_process_IAT_convertible_indcall: eliding call* flags=0x%08x target=" |
| PFX"\n", bb->instr->flags, target); |
| |
| STATS_INC(num_indirect_calls_IAT_elided); |
| *elide_continue = true; /* do not stop bb */ |
| return true; /* converted indirect to direct */ |
| } |
| } |
| /* otherwise convert to direct call without eliding */ |
| |
| LOG(THREAD, LOG_INTERP, 4, |
| "bb_process_IAT_convertible_indcall: converting call* flags=0x%08x target="PFX |
| "\n", bb->instr->flags, target); |
| |
| STATS_INC(num_indirect_calls_IAT_converted); |
| /* bb->instr has been appended already, and will get removed by |
| * mangle_indirect_call. We don't need to set to NULL, since this |
| * instr is a CTI and the final jump's translation target should |
| * still be the original indirect call. |
| */ |
| bb->exit_target = target; |
| /* end basic block with a direct CALL to target. With default |
| * options it should get mangled to a PUSH; JMP |
| */ |
| *elide_continue = false; /* matching, but should stop bb */ |
| return true; /* converted indirect to direct */ |
| #elif defined(ARM) |
| /* FIXME i#1551: NYI on ARM */ |
| ASSERT_NOT_IMPLEMENTED(false); |
| return false; |
| #endif /* X86/ARM */ |
| } |
| |
| /* Called on instructions that save the FPU state */ |
| static void |
| bb_process_float_pc(dcontext_t *dcontext, build_bb_t *bb) |
| { |
| /* i#698: for instructions that save the floating-point state |
| * (e.g., fxsave), we go back to dispatch to translate the fp pc. |
| * We rule out being in a trace (and thus a potential alternative |
| * would be to use a FRAG_ flag). These are rare instructions so that |
| * shouldn't have a significant perf impact: except we've been hitting |
| * libm code that uses fnstenv and is not rare, so we have non-inlined |
| * translation under an option for now. |
| */ |
| if (DYNAMO_OPTION(translate_fpu_pc)) { |
| bb->exit_type |= LINK_SPECIAL_EXIT; |
| bb->flags |= FRAG_CANNOT_BE_TRACE; |
| } |
| /* If we inline the pc update, we can't persist. Simplest to keep fine-grained. */ |
| bb->flags &= ~FRAG_COARSE_GRAIN; |
| } |
| |
| static bool |
| instr_will_be_exit_cti(instr_t *inst) |
| { |
| /* can't use instr_is_exit_cti() on pre-mangled instrs */ |
| return (instr_is_app(inst) && |
| instr_is_cti(inst) && |
| (!instr_is_near_call_direct(inst) || |
| !must_not_be_inlined(instr_get_branch_target_pc(inst))) |
| /* PR 239470: ignore wow64 syscall, which is an ind call */ |
| IF_WINDOWS(&& !instr_is_wow64_syscall(inst))); |
| } |
| |
| #ifdef CLIENT_INTERFACE |
| /* PR 215217: check syscall restrictions */ |
| static bool |
| client_check_syscall(instrlist_t *ilist, instr_t *inst, |
| bool *found_syscall, bool *found_int) |
| { |
| int op_int = IF_X86_ELSE(OP_int, OP_svc); |
| /* We do consider the wow64 call* a syscall here (it is both |
| * a syscall and a call*: PR 240258). |
| */ |
| if (instr_is_syscall(inst) || instr_get_opcode(inst) == op_int) { |
| if (instr_is_syscall(inst) && found_syscall != NULL) |
| *found_syscall = true; |
| /* Xref PR 313869 - we should be ignoring int 3 here. */ |
| if (instr_get_opcode(inst) == op_int && found_int != NULL) |
| *found_int = true; |
| /* For linux an ignorable syscall is not a problem. Our |
| * pre-syscall-exit jmp is added post client mangling so should |
| * be robust. |
| * FIXME: now that we have -no_inline_ignored_syscalls should |
| * we assert on ignorable also? Probably we'd have to have |
| * an exception for the middle of a trace? |
| */ |
| if (IF_UNIX(TEST(INSTR_NI_SYSCALL, inst->flags)) |
| /* PR 243391: only block-ending interrupt 2b matters */ |
| IF_WINDOWS(instr_is_syscall(inst) || |
| ((instr_get_opcode(inst) == OP_int && |
| instr_get_interrupt_number(inst) == 0x2b)))) { |
| /* This check means we shouldn't hit the exit_type flags |
| * check below but we leave it in place in case we add |
| * other flags in future |
| */ |
| if (inst != instrlist_last(ilist)) { |
| CLIENT_ASSERT(false, "a syscall or interrupt must terminate the block"); |
| return false; |
| } |
| /* should we forcibly delete the subsequent instrs? |
| * or the client has to deal w/ bad behavior in release build? |
| */ |
| } |
| } |
| return true; |
| } |
| |
| /* Pass bb to client, and afterward check for criteria we require and rescan for |
| * eflags and other flags that might have changed. |
| * Returns true normally; returns false to indicate "go native". |
| */ |
| static bool |
| client_process_bb(dcontext_t *dcontext, build_bb_t *bb) |
| { |
| dr_emit_flags_t emitflags = DR_EMIT_DEFAULT; |
| instr_t *inst; |
| bool found_exit_cti = false; |
| bool found_syscall = false; |
| bool found_int = false; |
| #ifdef ANNOTATIONS |
| app_pc trailing_annotation_pc = NULL; |
| #endif |
| instr_t *last_app_instr = NULL; |
| |
| /* This routine is called by more than just bb builder, also used |
| * for recreating state, so only call if caller requested it |
| * (usually that coincides w/ bb->app_interp being set, but not |
| * when recreating state on a fault (PR 214962)). |
| * FIXME: hot patches shouldn't be injected during state recreations; |
| * does predicating on bb->app_interp take care of this issue? |
| */ |
| if (!bb->pass_to_client) |
| return true; |
| |
| /* i#995: DR may build a bb with one invalid instruction, which won't be |
| * passed to cliennt. |
| * FIXME: i#1000, we should present the bb to the client. |
| * i#1000-c#1: the bb->ilist could be empty. |
| */ |
| if (instrlist_first(bb->ilist) == NULL) |
| return true; |
| if (!instr_opcode_valid(instrlist_first(bb->ilist)) && |
| /* For -fast_client_decode we can have level 0 instrs so check |
| * to ensure this is a single-instr bb that was built just to |
| * raise the fault for us. |
| * XXX i#1000: shouldn't we pass this to the client? It might not handle an |
| * invalid instr properly though. |
| */ |
| instrlist_first(bb->ilist) == instrlist_last(bb->ilist)) { |
| return true; |
| } |
| |
| /* Call the bb creation callback(s) */ |
| if (!instrument_basic_block(dcontext, (app_pc) bb->start_pc, bb->ilist, |
| bb->for_trace, !bb->app_interp, &emitflags)) { |
| /* although no callback was called we must process syscalls/ints (PR 307284) */ |
| } |
| if (bb->for_cache && TEST(DR_EMIT_GO_NATIVE, emitflags)) { |
| LOG(THREAD, LOG_INTERP, 2, "client requested that we go native\n"); |
| SYSLOG_INTERNAL_INFO("thread "TIDFMT" is going native at client request", |
| get_thread_id()); |
| /* we leverage the existing native_exec mechanism */ |
| dcontext->native_exec_postsyscall = bb->start_pc; |
| dcontext->next_tag = BACK_TO_NATIVE_AFTER_SYSCALL; |
| dynamo_thread_not_under_dynamo(dcontext); |
| return false; |
| } |
| |
| bb->post_client = true; |
| |
| /* FIXME: instrumentor may totally mess us up -- our flags |
| * or syscall info might be wrong. xref PR 215217 |
| */ |
| |
| /* PR 215217, PR 240265: |
| * We need to check for client changes that require a new exit |
| * target. We can't practically analyze the instrlist to decipher |
| * the exit, so we'll search backwards and require that the last |
| * cti is the exit cti. Typically, the last instruction in the |
| * block should be the exit. Post-mbr and post-syscall positions |
| * are particularly fragile, as our mangling code sets state up for |
| * the exit that could be messed up by instrs inserted after the |
| * mbr/syscall. We thus disallow such instrs (except for |
| * dr_insert_mbr_instrumentation()). xref cases 10503, 10782, 10784 |
| * |
| * Here's what we support: |
| * - more than one exit cti; all but the last must be a ubr |
| * - an exit cbr or call must be the final instr in the block |
| * - only one mbr; must be the final instr in the block and the exit target |
| * - clients can't change the exit of blocks ending in a syscall |
| * (or int), and the syscall must be the final instr in the block; |
| * client can, however, remove the syscall and then add a different exit |
| * - client can't add a translation target that's outside of the original |
| * source code bounds, or else our cache consistency breaks down |
| * (the one exception to this is that a jump can translate to its target) |
| */ |
| |
| /* we set to NULL to have a default of fall-through */ |
| bb->exit_target = NULL; |
| bb->exit_type = 0; |
| |
| /* N.B.: we're walking backward */ |
| for (inst = instrlist_last(bb->ilist); inst != NULL; inst = instr_get_prev(inst)) { |
| |
| if (!instr_opcode_valid(inst)) |
| continue; |
| |
| if (instr_is_cti(inst) && inst != instrlist_last(bb->ilist)) { |
| /* PR 213005: coarse_units can't handle added ctis (meta or not) |
| * since decode_fragment(), used for state recreation, can't |
| * distinguish from exit cti. |
| * i#665: we now support intra-fragment meta ctis |
| * to make persistence usable for clients |
| */ |
| if (!opnd_is_instr(instr_get_target(inst)) || instr_is_app(inst)) { |
| bb->flags &= ~FRAG_COARSE_GRAIN; |
| STATS_INC(coarse_prevent_client); |
| } |
| } |
| |
| if (instr_is_meta(inst)) { |
| #ifdef ANNOTATIONS |
| /* Save the trailing_annotation_pc in case a client truncates the bb there. */ |
| if (is_annotation_label(inst) && last_app_instr == NULL) { |
| dr_instr_label_data_t *label_data = instr_get_label_data_area(inst); |
| trailing_annotation_pc = GET_ANNOTATION_APP_PC(label_data); |
| } |
| #endif |
| continue; |
| } |
| |
| /* in case bb was truncated, find last non-meta fall-through */ |
| if (last_app_instr == NULL) |
| last_app_instr = inst; |
| |
| /* PR 215217: client should not add new source code regions, else our |
| * cache consistency (both page prot and selfmod) will fail |
| */ |
| ASSERT(!bb->for_cache || bb->vmlist != NULL); |
| /* For selfmod recreation we don't check vmareas so we don't have vmlist. |
| * We live w/o the checks there. |
| */ |
| CLIENT_ASSERT(!bb->for_cache || |
| vm_list_overlaps(dcontext, bb->vmlist, |
| instr_get_translation(inst), |
| instr_get_translation(inst)+1) || |
| (instr_is_ubr(inst) && opnd_is_pc(instr_get_target(inst)) && |
| instr_get_translation(inst) == opnd_get_pc(instr_get_target(inst))) |
| /* the displaced code and jmp return from intercept buffer |
| * has translation fields set to hooked app routine */ |
| IF_WINDOWS(|| dr_fragment_app_pc(bb->start_pc) != bb->start_pc), |
| "block's app sources (instr_set_translation() targets) " |
| "must remain within original bounds"); |
| |
| /* PR 307284: we didn't process syscalls and ints pre-client |
| * so do so now to get bb->flags and bb->exit_type set |
| */ |
| if (instr_is_syscall(inst) || |
| instr_get_opcode(inst) == IF_X86_ELSE(OP_int, OP_svc)) { |
| instr_t *tmp = bb->instr; |
| bb->instr = inst; |
| if (instr_is_syscall(bb->instr)) |
| bb_process_syscall(dcontext, bb); |
| else if (instr_get_opcode(bb->instr) == IF_X86_ELSE(OP_int, OP_svc)) { |
| /* non-syscall int */ |
| bb_process_interrupt(dcontext, bb); |
| } |
| if (inst != instrlist_last(bb->ilist)) |
| bb->instr = tmp; |
| } |
| |
| /* ensure syscall/int2b terminates block */ |
| client_check_syscall(bb->ilist, inst, &found_syscall, &found_int); |
| |
| if (instr_will_be_exit_cti(inst)) { |
| |
| if (!found_exit_cti) { |
| /* We're about to clobber the exit_type and could lose any |
| * special flags set above, even if the client doesn't change |
| * the exit target. We undo such flags after this ilist walk |
| * to support client removal of syscalls/ints. |
| * EXIT_IS_IND_JMP_PLT() is used for -IAT_{convert,elide}, which |
| * is off by default for CI; it's also used for native_exec, |
| * but we're not sure if we want to support that with CI. |
| * xref case 10846 and i#198 |
| */ |
| CLIENT_ASSERT(!TEST(~(LINK_DIRECT | LINK_INDIRECT | LINK_CALL | |
| LINK_RETURN | LINK_JMP | |
| LINK_NI_SYSCALL_ALL | LINK_SPECIAL_EXIT |
| IF_WINDOWS(| LINK_CALLBACK_RETURN)), |
| bb->exit_type) && |
| !EXIT_IS_IND_JMP_PLT(bb->exit_type), |
| "client unsupported block exit type internal error"); |
| |
| found_exit_cti = true; |
| bb->instr = inst; |
| |
| if (instr_is_near_ubr(inst) || instr_is_near_call_direct(inst)) { |
| CLIENT_ASSERT(instr_is_near_ubr(inst) || |
| inst == instrlist_last(bb->ilist) || |
| /* for elision we assume calls are followed |
| * by their callee target code |
| */ |
| DYNAMO_OPTION(max_elide_call) > 0, |
| "an exit call must terminate the block"); |
| /* a ubr need not be the final instr */ |
| if (inst == last_app_instr) { |
| bb->exit_target = instr_get_branch_target_pc(inst); |
| bb->exit_type = instr_branch_type(inst); |
| } |
| } |
| else if (instr_is_cbr(inst)) { |
| CLIENT_ASSERT(inst == instrlist_last(bb->ilist), |
| "an exit cbr must terminate the block"); |
| /* A null exit target specifies a cbr (see below). */ |
| bb->exit_target = NULL; |
| bb->exit_type = 0; |
| instr_exit_branch_set_type(bb->instr, |
| instr_branch_type(inst)); |
| } |
| else { |
| ASSERT(instr_is_mbr(inst) || instr_is_far_cti(inst)); |
| CLIENT_ASSERT(inst == instrlist_last(bb->ilist), |
| "an exit mbr or far cti must terminate the block"); |
| bb->exit_type = instr_branch_type(inst); |
| bb->exit_target = get_ibl_routine(dcontext, |
| get_ibl_entry_type(bb->exit_type), |
| DEFAULT_IBL_BB(), |
| get_ibl_branch_type(inst)); |
| } |
| |
| /* since we're walking backward, at the first exit cti |
| * we can check for post-cti code |
| */ |
| if (inst != instrlist_last(bb->ilist)) { |
| if (TEST(FRAG_COARSE_GRAIN, bb->flags)) { |
| /* PR 213005: coarse can't handle code beyond ctis */ |
| bb->flags &= ~FRAG_COARSE_GRAIN; |
| STATS_INC(coarse_prevent_client); |
| } |
| /* decode_fragment can't handle code beyond ctis */ |
| if (!instr_is_near_call_direct(inst) || |
| DYNAMO_OPTION(max_elide_call) == 0) |
| bb->flags |= FRAG_CANNOT_BE_TRACE; |
| } |
| |
| } |
| |
| /* Case 10784: Clients can confound trace building when they |
| * introduce more than one exit cti; we'll just disable traces |
| * for these fragments. |
| * PR 215179: we're currently later marking them no-trace for pad_jmps |
| * reasons as well. |
| */ |
| else { |
| CLIENT_ASSERT(instr_is_near_ubr(inst) || |
| (instr_is_near_call_direct(inst) && |
| /* for elision we assume calls are followed |
| * by their callee target code |
| */ |
| DYNAMO_OPTION(max_elide_call) > 0), |
| "a second exit cti must be a ubr"); |
| if (!instr_is_near_call_direct(inst) || |
| DYNAMO_OPTION(max_elide_call) == 0) |
| bb->flags |= FRAG_CANNOT_BE_TRACE; |
| /* our cti check above should have already turned off coarse */ |
| ASSERT(!TEST(FRAG_COARSE_GRAIN, bb->flags)); |
| } |
| } |
| } |
| |
| /* To handle the client modifying syscall numbers we cannot inline |
| * syscalls in the middle of a bb. |
| */ |
| ASSERT(!DYNAMO_OPTION(inline_ignored_syscalls)); |
| |
| ASSERT((TEST(FRAG_HAS_SYSCALL, bb->flags) && found_syscall) || |
| (!TEST(FRAG_HAS_SYSCALL, bb->flags) && !found_syscall)); |
| IF_WINDOWS(ASSERT(!TEST(LINK_CALLBACK_RETURN, bb->exit_type) || found_int)); |
| |
| /* Note that we do NOT remove, or set, FRAG_HAS_DIRECT_CTI based on |
| * client modifications: setting it for a selfmod fragment could |
| * result in an infinite loop, and it is mainly used for elision, which we |
| * are not doing for client ctis. Clients are not supposed add new |
| * app source regions (PR 215217). |
| */ |
| |
| /* Client might have truncated: re-set fall-through. */ |
| if (last_app_instr != NULL) { |
| #ifdef ANNOTATIONS |
| if (trailing_annotation_pc != NULL) { |
| /* If the client truncated at an annotation, include the annotation. */ |
| bb->cur_pc = trailing_annotation_pc; |
| } else { |
| #endif |
| /* We do not take instr_length of what the client put in, but rather |
| * the length of the translation target |
| */ |
| app_pc last_app_pc = instr_get_translation(last_app_instr); |
| bb->cur_pc = decode_next_pc(dcontext, last_app_pc); |
| #ifdef ANNOTATIONS |
| } |
| #endif |
| LOG(THREAD, LOG_INTERP, 3, |
| "setting cur_pc (for fall-through) to" PFX"\n", bb->cur_pc); |
| /* don't set bb->instr if last instr is still syscall/int. |
| * FIXME: I'm not 100% convinced the logic here covers everything |
| * build_bb_ilist does. |
| * FIXME: what about if last instr was invalid, or if client adds |
| * some invalid instrs: xref bb_process_invalid_instr() |
| */ |
| if (bb->instr != NULL || (!found_int && !found_syscall)) |
| bb->instr = last_app_instr; |
| } else |
| bb->instr = NULL; /* no app instrs left */ |
| |
| /* PR 215217: re-scan for accurate eflags. |
| * FIXME: should we not do eflags tracking while decoding, then, and always |
| * do it afterward? |
| */ |
| /* for -fast_client_decode, we don't support the client changing the app code */ |
| if (!INTERNAL_OPTION(fast_client_decode)) { |
| bb->eflags = forward_eflags_analysis(dcontext, bb->ilist, |
| instrlist_first(bb->ilist)); |
| } |
| |
| if (TEST(DR_EMIT_STORE_TRANSLATIONS, emitflags)) { |
| /* PR 214962: let client request storage instead of recreation */ |
| bb->flags |= FRAG_HAS_TRANSLATION_INFO; |
| /* if we didn't have record on from start, can't store translation info */ |
| CLIENT_ASSERT(!INTERNAL_OPTION(fast_client_decode), |
| "-fast_client_decode not compatible with " |
| "DR_EMIT_STORE_TRANSLATIONS"); |
| ASSERT(bb->record_translation && bb->full_decode); |
| } |
| |
| if (DYNAMO_OPTION(coarse_enable_freeze)) { |
| /* If we're not persisting, ignore the presence or absence of the flag |
| * so we avoid undoing savings from -opt_memory with a tool that |
| * doesn't support persistence. |
| */ |
| if (!TEST(DR_EMIT_PERSISTABLE, emitflags)) { |
| bb->flags &= ~FRAG_COARSE_GRAIN; |
| STATS_INC(coarse_prevent_client); |
| } |
| } |
| |
| if (TEST(DR_EMIT_MUST_END_TRACE, emitflags)) { |
| /* i#848: let client terminate traces */ |
| bb->flags |= FRAG_MUST_END_TRACE; |
| } |
| return true; |
| } |
| #endif /* CLIENT_INTERFACE */ |
| |
| #ifdef DR_APP_EXPORTS |
| static void |
| mangle_pre_client(dcontext_t *dcontext, build_bb_t *bb) |
| { |
| if (bb->start_pc == (app_pc) dr_app_running_under_dynamorio) { |
| /* i#1237: set return value to be true in dr_app_running_under_dynamorio */ |
| instr_t *ret = instrlist_last(bb->ilist); |
| instr_t *mov = instr_get_prev(ret); |
| ASSERT(ret != NULL && instr_is_return(ret) && |
| mov != NULL && |
| IF_X86(instr_get_opcode(mov) == OP_mov_imm &&) |
| IF_ARM(instr_get_opcode(mov) == OP_mov && |
| OPND_IS_IMMED_INT(instr_get_src(mov, 0)) &&) |
| (bb->start_pc == instr_get_raw_bits(mov) || |
| /* the translation field might be NULL */ |
| bb->start_pc == instr_get_translation(mov))); |
| instr_set_src(mov, 0, OPND_CREATE_INT32(1)); |
| } |
| } |
| #endif /* DR_APP_EXPORTS */ |
| |
| /* Interprets the application's instructions until the end of a basic |
| * block is found, and prepares the resulting instrlist for creation of |
| * a fragment, but does not create the fragment, just returns the instrlist. |
| * Caller is responsible for freeing the list and its instrs! |
| * |
| * Input parameters in bb control aspects of creation: |
| * If app_interp is true, this is considered real app code. |
| * If pass_to_client is true, |
| * calls instrument routine on bb->ilist before mangling |
| * If mangle_ilist is true, mangles the ilist, else leaves it in app form |
| * If record_vmlist is true, updates the vmareas data structures |
| * If for_cache is true, bb building lock is assumed to be held. |
| * record_vmlist should also be true. |
| * Caller must set and later clear dcontext->bb_build_info. |
| * For !for_cache, build_bb_ilist() sets and clears it, making the |
| * assumption that the caller is doing no other reading from the region. |
| * If record_translation is true, records translation for inserted instrs |
| * If outf != NULL, does full disassembly with comments to outf |
| * If overlap_info != NULL, records overlap information for the block in |
| * the overlap_info (caller must fill in region_start and region_end). |
| * |
| * FIXME: now that we have better control over following direct ctis, |
| * should we have adaptive mechanism to decided whether to follow direct |
| * ctis, since some bmarks are better doing so (gap, vortex, wupwise) |
| * and others are worse (apsi, perlbmk)? |
| */ |
| static void |
| build_bb_ilist(dcontext_t *dcontext, build_bb_t *bb) |
| { |
| /* Design decision: we will not try to identify branches that target |
| * instructions in this basic block, when we take those branches we will |
| * just make a new basic block and duplicate part of this one |
| */ |
| int total_branches = 0; |
| uint total_instrs = 0; |
| uint total_writes = 0; /* only used for selfmod */ |
| instr_t *non_cti; /* used if !full_decode */ |
| byte *non_cti_start_pc; /* used if !full_decode */ |
| uint eflags_6 = 0; /* holds arith eflags written so far (in read slots) */ |
| #ifdef HOT_PATCHING_INTERFACE |
| bool hotp_should_inject = false, hotp_injected = false; |
| #endif |
| app_pc page_start_pc = (app_pc) NULL; |
| bool bb_build_nested = false; |
| /* Caller will free objects allocated here so we must use the passed-in |
| * dcontext for allocation; we need separate var for non-global dcontext. |
| */ |
| dcontext_t *my_dcontext = get_thread_private_dcontext(); |
| DEBUG_DECLARE(bool regenerated = false;) |
| bool stop_bb_on_fallthrough = false; |
| |
| ASSERT(bb->initialized); |
| /* note that it's ok for bb->start_pc to be NULL as our check_new_page_start |
| * will catch it |
| */ |
| /* vmlist must start out empty (or N/A) */ |
| ASSERT(bb->vmlist == NULL || !bb->record_vmlist); |
| ASSERT(!bb->for_cache || bb->record_vmlist); /* for_cache assumes record_vmlist */ |
| |
| #ifdef CUSTOM_TRACES_RET_REMOVAL |
| my_dcontext->num_calls = 0; |
| my_dcontext->num_rets = 0; |
| #endif |
| |
| /* Support bb abort on decode fault */ |
| if (my_dcontext != NULL) { |
| if (bb->for_cache) { |
| /* Caller should have set! */ |
| ASSERT(bb == (build_bb_t *) my_dcontext->bb_build_info); |
| } else if (my_dcontext->bb_build_info == NULL) { |
| my_dcontext->bb_build_info = (void *) bb; |
| } else { |
| /* For nested we leave the original, which should be the only vmlist, |
| * and we give up on freeing dangling instr_t and instrlist_t from this decode. |
| * We need the original's for_cache so we know to free the bb_building_lock. |
| * FIXME: use TRY to handle decode exceptions locally? Shouldn't have |
| * violation remediations on a !for_cache build. |
| */ |
| ASSERT(bb->vmlist == NULL && !bb->for_cache && |
| ((build_bb_t *)my_dcontext->bb_build_info)->for_cache); |
| /* FIXME: add nested as a field so we can have stat on nested faults */ |
| bb_build_nested = true; |
| } |
| } else |
| ASSERT(dynamo_exited); |
| |
| if ((bb->record_translation |
| IF_CLIENT_INTERFACE(&& !INTERNAL_OPTION(fast_client_decode))) || |
| !bb->for_cache |
| /* to split riprel, need to decode every instr */ |
| /* in x86_to_x64, need to translate every x86 instr */ |
| IF_X64(|| DYNAMO_OPTION(coarse_split_riprel) || DYNAMO_OPTION(x86_to_x64)) |
| IF_CLIENT_INTERFACE(|| INTERNAL_OPTION(full_decode))) |
| bb->full_decode = true; |
| else { |
| #if defined(STEAL_REGISTER) || defined(CHECK_RETURNS_SSE2) |
| bb->full_decode = true; |
| #endif |
| } |
| |
| LOG(THREAD, LOG_INTERP, 3, "\ninterp%s: ", |
| IF_X64_ELSE(X64_MODE_DC(dcontext) ? "" : " (x86 mode)", "")); |
| BBPRINT(bb, 3, "start_pc = "PFX"\n", bb->start_pc); |
|