blob: c20577542de648492dc86a0e0338e1f2fb1e9585 [file] [log] [blame]
/* **********************************************************
* Copyright (c) 2011-2014 Google, Inc. All rights reserved.
* Copyright (c) 2001-2010 VMware, Inc. All rights reserved.
* **********************************************************/
/*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* * Neither the name of VMware, Inc. nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
/* Copyright (c) 2003-2007 Determina Corp. */
/* Copyright (c) 2001-2003 Massachusetts Institute of Technology */
/* Copyright (c) 2001 Hewlett-Packard Company */
/*
* interp.c - interpreter used for native trace selection
*/
#include "../globals.h"
#include "../link.h"
#include "../fragment.h"
#include "../emit.h"
#include "../dispatch.h"
#include "../fcache.h"
#include "../monitor.h" /* for trace_abort and monitor_data_t */
#include "arch.h"
#include "instr.h"
#include "instr_create.h"
#include "instrlist.h"
#include "decode.h"
#include "decode_fast.h"
#include "disassemble.h"
#include <string.h> /* for memcpy */
#include "instrument.h"
#include "../hotpatch.h"
#ifdef RETURN_AFTER_CALL
# include "../rct.h"
#endif
#ifdef WINDOWS
# include "ntdll.h" /* for EXCEPTION_REGISTRATION */
# include "../nudge.h" /* for generic_nudge_target() address */
#endif
#include "../perscache.h"
#include "../native_exec.h"
#ifdef CHECK_RETURNS_SSE2
#include <setjmp.h> /* for warning when see libc setjmp */
#endif
#ifdef VMX86_SERVER
# include "vmkuw.h" /* VMKUW_SYSCALL_GATEWAY */
#endif
#ifdef ANNOTATIONS
# include "../annotations.h"
#endif
enum { DIRECT_XFER_LENGTH = 5 };
/* forward declarations */
static void process_nops_for_trace(dcontext_t *dcontext, instrlist_t *ilist, uint flags
_IF_DEBUG(bool recreating));
static int fixup_last_cti(dcontext_t *dcontext, instrlist_t *trace,
app_pc next_tag, uint next_flags, uint trace_flags,
fragment_t *prev_f, linkstub_t *prev_l,
bool record_translation, uint *num_exits_deleted/*OUT*/,
/* If non-NULL, only looks inside trace between these two */
instr_t *start_instr, instr_t *end_instr);
bool mangle_trace(dcontext_t *dcontext, instrlist_t *ilist, monitor_data_t *md);
/* we use a branch limit of 1 to make it easier for the trace
* creation mechanism to stitch basic blocks together
*/
#define BRANCH_LIMIT 1
/* we limit total bb size to handle cases like infinite loop or sequence
* of calls.
* also, we have a limit on fragment body sizes, which should be impossible
* to break since x86 instrs are max 17 bytes and we only modify ctis.
* Although...selfmod mangling does really expand fragments!
* -selfmod_max_writes helps for selfmod bbs (case 7893/7909).
* System call mangling is also large, for degenerate cases like tests/linux/infinite.
* PR 215217: also client additions: we document and assert.
* FIXME: need better way to know how big will get, b/c we can construct
* cases that will trigger the size assertion!
*/
/* define replaced by -max_bb_instrs option */
/* exported so micro routines can assert whether held */
DECLARE_CXTSWPROT_VAR(mutex_t bb_building_lock, INIT_LOCK_FREE(bb_building_lock));
/* i#1111: we do not use the lock until the 2nd thread is created */
volatile bool bb_lock_start;
#ifdef INTERNAL
file_t bbdump_file = INVALID_FILE;
#endif
/* initialization */
void
interp_init()
{
#ifdef INTERNAL
if (INTERNAL_OPTION(bbdump_tags)) {
bbdump_file = open_log_file("bbs", NULL, 0);
ASSERT(bbdump_file != INVALID_FILE);
}
#endif
}
#ifdef CUSTOM_TRACES_RET_REMOVAL
# ifdef DEBUG
/* don't bother with adding lock */
static int num_rets_removed;
# endif
#endif
/* cleanup */
void
interp_exit()
{
#ifdef INTERNAL
if (INTERNAL_OPTION(bbdump_tags)) {
close_log_file(bbdump_file);
}
#endif
DELETE_LOCK(bb_building_lock);
LOG(GLOBAL, LOG_INTERP|LOG_STATS, 1, "Total application code seen: %d KB\n",
GLOBAL_STAT(app_code_seen)/1024);
#ifdef CUSTOM_TRACES_RET_REMOVAL
# ifdef DEBUG
LOG(GLOBAL, LOG_INTERP|LOG_STATS, 1, "Total rets removed: %d\n",
num_rets_removed);
# endif
#endif
}
/****************************************************************************
****************************************************************************
*
* B A S I C B L O C K B U I L D I N G
*/
/* we have a lot of data to pass around so we package it in this struct
* so we can have separate routines for readability
*/
typedef struct {
/* in */
app_pc start_pc;
bool app_interp; /* building bb to interp app, as opposed to for pc
* translation or figuring out what pages a bb touches? */
bool for_cache; /* normal to-be-executed build? */
bool record_vmlist; /* should vmareas be updated? */
bool mangle_ilist; /* should bb ilist be mangled? */
bool record_translation; /* store translation info for each instr_t? */
bool has_bb_building_lock; /* usually ==for_cache; used for aborting bb building */
file_t outf; /* send disassembly and notes to a file?
* we use this mainly for dumping trace origins */
app_pc stop_pc; /* Optional: NULL for normal termination rules.
* Only checked for full_decode.
*/
#ifdef CLIENT_INTERFACE
bool pass_to_client; /* pass to client, if a bb hook exists;
* we store this up front to avoid race conditions
* between full_decode setting and hook calling time.
*/
bool post_client; /* has the client already processed the bb? */
bool for_trace; /* PR 299808: we tell client if building a trace */
#endif
/* in and out */
overlap_info_t *overlap_info; /* if non-null, records overlap information here;
* caller must initialize region_start and region_end */
/* out */
instrlist_t *ilist;
uint flags;
void *vmlist;
app_pc end_pc;
bool native_exec; /* replace cur ilist with a native_exec version */
bool native_call; /* the gateway is a call */
#ifdef CLIENT_INTERFACE
instrlist_t **unmangled_ilist; /* PR 299808: clone ilist pre-mangling */
#endif
/* internal usage only */
bool full_decode; /* decode every instruction into a separate instr_t? */
bool follow_direct; /* elide unconditional branches? */
bool check_vm_area; /* whether to call check_thread_vm_area() */
uint num_elide_jmp;
uint num_elide_call;
app_pc last_page;
app_pc cur_pc;
app_pc instr_start;
app_pc checked_end; /* end of current vmarea checked */
cache_pc exit_target; /* fall-through target of final instr */
uint exit_type; /* indirect branch type */
ibl_branch_type_t ibl_branch_type; /* indirect branch type as an IBL selector */
#ifdef UNIX
bool invalid_instr_hack;
#endif
instr_t *instr; /* the current instr */
int eflags;
app_pc pretend_pc; /* selfmod only: decode from separate pc */
DEBUG_DECLARE(bool initialized;)
} build_bb_t;
/* forward decl */
static inline bool
bb_process_syscall(dcontext_t *dcontext, build_bb_t *bb);
static void
init_build_bb(build_bb_t *bb, app_pc start_pc, bool app_interp, bool for_cache,
bool mangle_ilist, bool record_translation, file_t outf, uint known_flags,
overlap_info_t *overlap_info)
{
memset(bb, 0, sizeof(*bb));
bb->check_vm_area = true;
bb->start_pc = start_pc;
bb->app_interp = app_interp;
bb->for_cache = for_cache;
if (bb->for_cache)
bb->record_vmlist = true;
bb->mangle_ilist = mangle_ilist;
bb->record_translation = record_translation;
bb->outf = outf;
bb->overlap_info = overlap_info;
bb->follow_direct = !TEST(FRAG_SELFMOD_SANDBOXED, known_flags);
bb->flags = known_flags;
bb->ibl_branch_type = IBL_GENERIC; /* initialization only */
DODEBUG(bb->initialized = true;);
}
static void
reset_overlap_info(dcontext_t *dcontext, build_bb_t *bb)
{
bb->overlap_info->start_pc = bb->start_pc;
bb->overlap_info->min_pc = bb->start_pc;
bb->overlap_info->max_pc = bb->start_pc;
bb->overlap_info->contiguous = true;
bb->overlap_info->overlap = false;
}
static void
update_overlap_info(dcontext_t *dcontext, build_bb_t *bb, app_pc new_pc, bool jmp)
{
if (new_pc < bb->overlap_info->min_pc)
bb->overlap_info->min_pc = new_pc;
if (new_pc > bb->overlap_info->max_pc)
bb->overlap_info->max_pc = new_pc;
/* we get called at end of all contiguous intervals, so ignore jmps */
LOG(THREAD, LOG_ALL, 5, "\t app_bb_overlaps "PFX".."PFX" %s\n",
bb->last_page, new_pc, jmp?"jmp":"");
if (!bb->overlap_info->overlap && !jmp) {
/* contiguous interval: prev_pc..new_pc (open-ended) */
if (bb->last_page < bb->overlap_info->region_end &&
new_pc > bb->overlap_info->region_start) {
LOG(THREAD_GET, LOG_ALL, 5, "\t it overlaps!\n");
bb->overlap_info->overlap = true;
}
}
if (bb->overlap_info->contiguous && jmp)
bb->overlap_info->contiguous = false;
}
#ifdef DEBUG
# define BBPRINT(bb, level, ...) do { \
LOG(THREAD, LOG_INTERP, level, __VA_ARGS__); \
if (bb->outf != INVALID_FILE && bb->outf != (THREAD)) \
print_file(bb->outf, __VA_ARGS__); \
} while (0);
#else
# ifdef INTERNAL
# define BBPRINT(bb, level, ...) do { \
if (bb->outf != INVALID_FILE) \
print_file(bb->outf, __VA_ARGS__); \
} while (0);
# else
# define BBPRINT(bb, level, ...) /* nothing */
# endif
#endif
#ifdef WINDOWS
extern void intercept_load_dll(void);
extern void intercept_unload_dll(void);
# ifdef INTERNAL
extern void DllMainThreadAttach(void);
# endif
#endif
/* forward declarations */
static bool
mangle_bb_ilist(dcontext_t *dcontext, build_bb_t *bb);
static void
build_native_exec_bb(dcontext_t *dcontext, build_bb_t *bb);
static bool
at_native_exec_gateway(dcontext_t *dcontext, app_pc start, bool *is_call
_IF_DEBUG(bool xfer_target));
#ifdef DEBUG
static void
report_native_module(dcontext_t *dcontext, app_pc modpc);
#endif
/***************************************************************************
* Image entry
*/
static bool reached_image_entry = false;
static INLINE_FORCED bool
check_for_image_entry(app_pc bb_start)
{
if (!reached_image_entry && bb_start == get_image_entry()) {
LOG(THREAD_GET, LOG_ALL, 1, "Reached image entry point "PFX"\n", bb_start);
set_reached_image_entry();
return true;
}
return false;
}
void
set_reached_image_entry()
{
SELF_UNPROTECT_DATASEC(DATASEC_RARELY_PROT);
reached_image_entry = true;
SELF_PROTECT_DATASEC(DATASEC_RARELY_PROT);
}
bool
reached_image_entry_yet()
{
return reached_image_entry;
}
/***************************************************************************
* Whether to inline or elide callees
*/
/* return true if pc is a call target that should NOT be inlined */
#if defined(DEBUG) || !defined(WINDOWS)
/* cl.exe non-debug won't let other modules use it if inlined */
inline
#endif
bool
must_not_be_inlined(app_pc pc)
{
return (
#ifdef INTERNAL
!dynamo_options.inline_calls
#else
0
#endif
#ifdef WINDOWS
|| pc == (app_pc)intercept_load_dll
|| pc == (app_pc)intercept_unload_dll
/* we're guaranteed to have direct calls to the next routine since our
* own DllMain calls it! */
# ifdef INTERNAL
|| pc == (app_pc) DllMainThreadAttach
# endif
/* check for nudge handling escape from cache */
|| (pc == (app_pc)generic_nudge_handler)
#else
/* PR 200203: long-term we want to control loading of client
* libs, but for now we have to let the loader call _fini()
* in the client, which may end up calling __wrap_free().
* It's simpler to let those be interpreted and make a native
* call to the real heap routine here as this is a direct
* call whereas we'd need native_exec for the others:
*/
|| pc == (app_pc)global_heap_free
#endif
#ifdef DR_APP_EXPORTS
/* i#1237: DR will change dr_app_running_under_dynamorio return value
* on seeing a bb starting at dr_app_running_under_dynamorio.
*/
|| pc == (app_pc) dr_app_running_under_dynamorio
#endif
);
}
/* return true if pc is a direct jmp target that should NOT be elided and followed */
static inline bool
must_not_be_elided(app_pc pc)
{
#ifdef WINDOWS
/* Allow only the return jump in the landing pad to be elided, as we
* interpret the return path from trampolines. The forward jump leads to
* the trampoline and shouldn't be elided. */
if (is_on_interception_initial_route(pc))
return true;
#endif
return (0
#ifdef WINDOWS
/* we insert trampolines by adding direct jmps to our interception code buffer
* we don't want to interpret the code in that buffer, as it may swap to the
* dstack and mess up a return-from-fcache.
* N.B.: if use this routine anywhere else, pay attention to the
* hack for is_syscall_trampoline() in the use here!
*/
|| (is_in_interception_buffer(pc))
#else /* UNIX */
#endif
);
}
#ifdef DR_APP_EXPORTS
/* This function allows automatically injected dynamo to ignore
* dynamo API routines that would really mess things up
*/
static inline bool
must_escape_from(app_pc pc)
{
/* if ever find ourselves at top of one of these, immediately issue
* a ret instruction...haven't set up frame yet so stack fine, only
* problem is return value, go ahead and overwrite xax, it's caller-saved
* FIXME: is this ok?
*/
/* Note that we can't just look for direct calls to these functions
* because of stubs, etc. that end up doing indirect jumps to them!
*/
bool res = false
#ifdef DR_APP_EXPORTS
|| (automatic_startup &&
(pc == (app_pc)dynamorio_app_init ||
pc == (app_pc)dr_app_start ||
pc == (app_pc)dynamo_thread_init ||
pc == (app_pc)dynamorio_app_exit ||
/* dr_app_stop is a nop already */
pc == (app_pc)dynamo_thread_exit))
#endif
;
#ifdef DEBUG
if (res) {
# ifdef DR_APP_EXPORTS
LOG(THREAD_GET, LOG_INTERP, 3, "must_escape_from: found ");
if (pc == (app_pc)dynamorio_app_init)
LOG(THREAD_GET, LOG_INTERP, 3, "dynamorio_app_init\n");
else if (pc == (app_pc)dr_app_start)
LOG(THREAD_GET, LOG_INTERP, 3, "dr_app_start\n");
/* FIXME: are dynamo_thread_* still needed hered? */
else if (pc == (app_pc)dynamo_thread_init)
LOG(THREAD_GET, LOG_INTERP, 3, "dynamo_thread_init\n");
else if (pc == (app_pc)dynamorio_app_exit)
LOG(THREAD_GET, LOG_INTERP, 3, "dynamorio_app_exit\n");
else if (pc == (app_pc)dynamo_thread_exit)
LOG(THREAD_GET, LOG_INTERP, 3, "dynamo_thread_exit\n");
# endif
}
#endif
return res;
}
#endif /* DR_APP_EXPORTS */
/* Adds bb->instr, which must be a direct call or jmp, to bb->ilist for native
* execution. Makes sure its target is reachable from the code cache, which
* is critical for jmps b/c they're native for our hooks of app code which may
* not be reachable from the code cache. Also needed for calls b/c in the future
* (i#774) the DR lib (and thus our must_not_be_inlined() calls) won't be reachable
* from the cache.
*/
static void
bb_add_native_direct_xfer(dcontext_t *dcontext, build_bb_t *bb, bool appended)
{
#ifdef X64
/* i#922: we're going to run this jmp from our code cache so we have to
* make sure it still reaches its target. We could try to check
* reachability from the likely code cache slot, but these should be
* rare enough that making them indirect won't matter and then we have
* fewer reachability dependences.
* We do this here rather than in mangle() b/c we'd have a hard time
* distinguishing native jmp/call due to DR's own operations from a
* client's inserted meta jmp/call.
*/
/* Strategy: write target into xax (DR-reserved) slot and jmp through it.
* Alternative would be to embed the target into the code stream.
* We don't need to set translation b/c these are meta instrs and they
* won't fault.
*/
ptr_uint_t tgt = (ptr_uint_t) opnd_get_pc(instr_get_target(bb->instr));
opnd_t tls_slot = opnd_create_sized_tls_slot(os_tls_offset(TLS_XAX_SLOT), OPSZ_4);
instrlist_meta_append(bb->ilist, INSTR_CREATE_mov_imm
(dcontext, tls_slot, OPND_CREATE_INT32((int)tgt)));
opnd_set_disp(&tls_slot, opnd_get_disp(tls_slot) + 4);
instrlist_meta_append(bb->ilist, INSTR_CREATE_mov_imm
(dcontext, tls_slot, OPND_CREATE_INT32((int)(tgt >> 32))));
if (instr_is_ubr(bb->instr)) {
instrlist_meta_append(bb->ilist, INSTR_CREATE_jmp_ind
(dcontext,
opnd_create_tls_slot(os_tls_offset(TLS_XAX_SLOT))));
bb->exit_type |= instr_branch_type(bb->instr);
} else {
ASSERT(instr_is_call_direct(bb->instr));
instrlist_meta_append(bb->ilist, INSTR_CREATE_call_ind
(dcontext,
opnd_create_tls_slot(os_tls_offset(TLS_XAX_SLOT))));
}
if (appended)
instrlist_remove(bb->ilist, bb->instr);
instr_destroy(dcontext, bb->instr);
bb->instr = NULL;
#else
if (appended) {
/* avoid assert about meta w/ translation but no restore_state callback */
instr_set_translation(bb->instr, NULL);
} else
instrlist_append(bb->ilist, bb->instr);
/* Indicate that relative target must be
* re-encoded, and that it is not an exit cti.
* However, we must mangle this to ensure it reaches (i#992)
* which we special-case in mangle().
*/
instr_set_meta(bb->instr);
instr_set_raw_bits_valid(bb->instr, false);
#endif
}
/* Perform checks such as looking for dynamo stopping points and bad places
* to be. We assume we only have to check after control transfer instructions,
* i.e., we assume that all of these conditions are procedures that are only
* entered by calling or jumping, never falling through.
*/
static inline bool
check_for_stopping_point(dcontext_t *dcontext, build_bb_t *bb)
{
#ifdef DR_APP_EXPORTS
if (must_escape_from(bb->cur_pc)) {
/* x64 will zero-extend to rax, so we use eax here */
reg_id_t reg = IF_X86_ELSE(REG_EAX, DR_REG_R0);
BBPRINT(bb, 3, "interp: emergency exit from "PFX"\n", bb->cur_pc);
/* if ever find ourselves at top of one of these, immediately issue
* a ret instruction...haven't set up frame yet so stack fine, only
* problem is return value, go ahead and overwrite xax, it's
* caller-saved.
* FIXME: is this ok?
*/
/* move 0 into xax/r0 -- our functions return 0 to indicate success */
instrlist_append(bb->ilist,
XINST_CREATE_load_int(dcontext,
opnd_create_reg(reg),
OPND_CREATE_INT32(0)));
/* insert a ret instruction */
instrlist_append(bb->ilist, XINST_CREATE_return(dcontext));
/* should this be treated as a real return? */
bb->exit_type |= LINK_INDIRECT | LINK_RETURN;
bb->exit_target = get_ibl_routine(dcontext, IBL_LINKED, DEFAULT_IBL_BB(), IBL_RETURN);
return true;
}
#endif /* DR_APP_EXPORTS */
#ifdef CHECK_RETURNS_SSE2
if (bb->cur_pc == (app_pc)longjmp) {
SYSLOG_INTERNAL_WARNING("encountered longjmp, which will cause ret mismatch!");
}
#endif
return is_stopping_point(dcontext, bb->cur_pc);
}
/* Arithmetic eflags analysis to see if sequence of instrs reads an
* arithmetic flag prior to writing it.
* Usage: first initialize status to 0 and eflags_6 to 0.
* Then call this routine for each instr in sequence, assigning result to status.
* eflags_6 holds flags written and read so far.
* Uses these flags, defined in instr.h, as status values:
* EFLAGS_WRITE_ARITH = writes all arith flags before reading any
* EFLAGS_WRITE_OF = writes OF before reading it (x86-onlY)
* EFLAGS_READ_ARITH = reads some of arith flags before writing
* EFLAGS_READ_OF = reads OF before writing OF (x86-only)
* 0 = no information yet
* On ARM, Q and GE flags are ignored.
*/
static inline int
eflags_analysis(instr_t *instr, int status, uint *eflags_6)
{
uint e6 = *eflags_6; /* local copy */
uint e6_w2r = EFLAGS_WRITE_TO_READ(e6);
uint instr_eflags = instr_get_arith_flags(instr, DR_QUERY_DEFAULT);
/* Keep going until result is non-zero, also keep going if
* result is writes to OF to see if later writes to rest of flags
* before reading any, and keep going if reads one of the 6 to see
* if later writes to OF before reading it.
*/
if (instr_eflags == 0 || status == EFLAGS_WRITE_ARITH
IF_X86(|| status == EFLAGS_READ_OF))
return status;
/* we ignore interrupts */
if ((instr_eflags & EFLAGS_READ_ARITH) != 0 &&
(!instr_opcode_valid(instr) || !instr_is_interrupt(instr))) {
/* store the flags we're reading */
e6 |= (instr_eflags & EFLAGS_READ_ARITH);
*eflags_6 = e6;
if ((e6_w2r | (instr_eflags & EFLAGS_READ_ARITH)) != e6_w2r) {
/* we're reading a flag that has not been written yet */
status = EFLAGS_READ_ARITH; /* some read before all written */
LOG(THREAD_GET, LOG_INTERP, 4, "\treads flag before writing it!\n");
#ifdef X86
if ((instr_eflags & EFLAGS_READ_OF) != 0 && (e6 & EFLAGS_WRITE_OF) == 0) {
status = EFLAGS_READ_OF; /* reads OF before writing! */
LOG(THREAD_GET, LOG_INTERP, 4, "\t reads OF prior to writing it!\n");
}
#endif
}
} else if ((instr_eflags & EFLAGS_WRITE_ARITH) != 0) {
/* store the flags we're writing */
e6 |= (instr_eflags & EFLAGS_WRITE_ARITH);
*eflags_6 = e6;
/* check if all written but none read yet */
if ((e6 & EFLAGS_WRITE_ARITH) == EFLAGS_WRITE_ARITH &&
(e6 & EFLAGS_READ_ARITH) == 0) {
status = EFLAGS_WRITE_ARITH; /* all written before read */
LOG(THREAD_GET, LOG_INTERP, 4, "\twrote all 6 flags now!\n");
}
#ifdef X86
/* check if at least OF was written but not read */
else if ((e6 & EFLAGS_WRITE_OF) != 0 && (e6 & EFLAGS_READ_OF) == 0) {
status = EFLAGS_WRITE_OF; /* OF written before read */
LOG(THREAD_GET, LOG_INTERP, 4, "\twrote overflow flag before reading it!\n");
}
#endif
}
return status;
}
/* check origins of code for several purposes:
* 1) we need list of areas where this thread's fragments come
* from, for faster flushing on munmaps
* 2) also for faster flushing, each vmarea has a list of fragments
* 3) we need to mark as read-only any writable region that
* has a fragment come from it, to handle self-modifying code
* 4) for PROGRAM_SHEPHERDING restricted code origins for security
* 5) for restricted execution environments: not letting bb cross regions
*/
/*
FIXME CASE 7380:
since report security violation before execute off bad page, can be
false positive due to:
- a faulting instruction in middle of bb would have prevented
getting there
- ignorable syscall in middle
- self-mod code would have ended bb sooner than bad page
One solution is to have check_thread_vm_area() return false and have
bb building stop at checked_end if a violation will occur when we
get there. Then we only raise the violation once building a bb
starting there.
*/
static inline void
check_new_page_start(dcontext_t *dcontext, build_bb_t *bb)
{
DEBUG_DECLARE(bool ok;)
if (!bb->check_vm_area)
return;
DEBUG_DECLARE(ok =) check_thread_vm_area(dcontext, bb->start_pc, bb->start_pc,
(bb->record_vmlist ? &bb->vmlist : NULL),
&bb->flags, &bb->checked_end,
false/*!xfer*/);
ASSERT(ok); /* cannot return false on non-xfer */
bb->last_page = bb->start_pc;
if (bb->overlap_info != NULL)
reset_overlap_info(dcontext, bb);
}
/* Walk forward in straight line from prev_pc to new_pc.
* FIXME: with checked_end we don't need to call this on every contig end
* while bb building like we used to. Should revisit the overlap info and
* walk_app_bb reasons for keeping those contig() calls and see if we can
* optimize them away for bb building at least.
* i#993: new_pc points to the last byte of the current instruction and is not
* an open-ended endpoint.
*/
static inline bool
check_new_page_contig(dcontext_t *dcontext, build_bb_t *bb, app_pc new_pc)
{
bool is_first_instr = (bb->instr_start == bb->start_pc);
if (!bb->check_vm_area)
return true;
if (bb->checked_end == NULL) {
ASSERT(new_pc == bb->start_pc);
} else if (new_pc >= bb->checked_end) {
if (!check_thread_vm_area(dcontext, new_pc, bb->start_pc,
(bb->record_vmlist ? &bb->vmlist : NULL),
&bb->flags, &bb->checked_end,
/* i#989: We don't want to fall through to an
* incompatible vmarea, so we treat fall
* through like a transfer. We can't end the
* bb before the first instruction, so we pass
* false to forcibly merge in the vmarea
* flags.
*/
!is_first_instr/*xfer*/)) {
return false;
}
}
if (bb->overlap_info != NULL)
update_overlap_info(dcontext, bb, new_pc, false/*not jmp*/);
DOLOG(4, LOG_INTERP, {
if (PAGE_START(bb->last_page) != PAGE_START(new_pc))
LOG(THREAD, LOG_INTERP, 4, "page boundary crossed\n");
});
bb->last_page = new_pc; /* update even if not new page, for walk_app_bb */
return true;
}
/* Direct cti from prev_pc to new_pc */
static bool
check_new_page_jmp(dcontext_t *dcontext, build_bb_t *bb, app_pc new_pc)
{
/* For tracking purposes, check the last byte of the cti. */
bool ok = check_new_page_contig(dcontext, bb, bb->cur_pc-1);
ASSERT(ok && "should have checked cur_pc-1 in decode loop");
if (!ok) /* Don't follow the jmp in release build. */
return false;
/* cur sandboxing doesn't handle direct cti
* not good enough to only check this at top of interp -- could walk contig
* from non-selfmod to selfmod page, and then do a direct cti, which
* check_thread_vm_area would allow (no flag changes on direct cti)!
* also not good enough to put this check in check_thread_vm_area, as that
* only checks across pages.
*/
if ((bb->flags & FRAG_SELFMOD_SANDBOXED) != 0)
return false;
if (PAGE_START(bb->last_page) != PAGE_START(new_pc))
LOG(THREAD, LOG_INTERP, 4, "page boundary crossed\n");
/* do not walk into a native exec dll (we assume not currently there,
* though could happen if bypass a gateway -- even then this is a feature
* to allow getting back to native ASAP)
* FIXME: we could assume that such direct calls only
* occur from DGC, and rely on check_thread_vm_area to disallow,
* as an (unsafe) optimization
*/
if (DYNAMO_OPTION(native_exec) &&
DYNAMO_OPTION(native_exec_dircalls) &&
!vmvector_empty(native_exec_areas) &&
is_native_pc(new_pc))
return false;
#ifdef CLIENT_INTERFACE
/* i#805: If we're crossing a module boundary between two modules that are
* and aren't on null_instrument_list, don't elide the jmp.
*/
if ((!!os_module_get_flag(bb->cur_pc, MODULE_NULL_INSTRUMENT)) !=
(!!os_module_get_flag(new_pc, MODULE_NULL_INSTRUMENT)))
return false;
#endif
if (!bb->check_vm_area)
return true;
/* need to check this even if an intra-page jmp b/c we allow sub-page vm regions */
if (!check_thread_vm_area(dcontext, new_pc, bb->start_pc,
(bb->record_vmlist ? &bb->vmlist : NULL),
&bb->flags, &bb->checked_end, true/*xfer*/))
return false;
if (bb->overlap_info != NULL)
update_overlap_info(dcontext, bb, new_pc, true/*jmp*/);
bb->flags |= FRAG_HAS_DIRECT_CTI;
bb->last_page = new_pc; /* update even if not new page, for walk_app_bb */
return true;
}
static inline void
bb_process_invalid_instr(dcontext_t *dcontext, build_bb_t *bb)
{
/* invalid instr: end bb BEFORE the instr, we'll throw exception if we
* reach the instr itself
*/
LOG(THREAD, LOG_INTERP, 2, "interp: invalid instr at "PFX"\n", bb->instr_start);
/* This routine is called by more than just bb builder, also used
* for recreating state, so check bb->app_interp parameter to find out
* if building a real app bb to be executed
*/
if (bb->app_interp && bb->instr_start == bb->start_pc) {
/* This is first instr in bb so it will be executed for sure and
* we need to generate an invalid instruction exception.
* A benefit of being first instr is that the state is easy
* to translate.
*/
#ifdef WINDOWS
/* Copying the invalid bytes and having the processor generate
* the exception would be cleaner in every way except our fear
* of a new processor making those bytes valid and us inadvertently
* executing the unexamined instructions afterward, since we do not
* know the proper amount of bytes to copy. Copying is cleaner
* since Windows splits invalid instructions into different cases,
* an invalid lock prefix and maybe some other distinctions
* (it's all interrupt 6 to the processor), and it is hard to
* duplicate Windows' behavior in our forged exception.
*/
/* FIXME case 10672: provide a runtime option to specify new
* instruction formats to avoid this app exception */
ASSERT(dcontext->bb_build_info == bb);
bb_build_abort(dcontext, true/*clean vm area*/, true/*unlock*/);
/* FIXME : we use illegal instruction here, even though we
* know windows uses different exception codes for different
* types of invalid instructions (for ex. STATUS_INVALID_LOCK
* _SEQUENCE for lock prefix on a jmp instruction)
*/
if (TEST(DUMPCORE_FORGE_ILLEGAL_INST, DYNAMO_OPTION(dumpcore_mask)))
os_dump_core("Warning: Encountered Illegal Instruction");
os_forge_exception(bb->instr_start, ILLEGAL_INSTRUCTION_EXCEPTION);
ASSERT_NOT_REACHED();
#else
/* FIXME: Linux hack until we have a real os_forge_exception implementation:
* copy the bytes and have the process generate the exception.
* Once remove this, also disable check at top of insert_selfmod_sandbox
* FIXME PR 307880: we now have a preliminary
* os_forge_exception impl, but I'm leaving this hack until
* we're more comfortable w/ our forging.
*/
uint sz;
instrlist_append(bb->ilist, bb->instr);
/* pretend raw bits valid to get it encoded
* For now we just do 17 bytes, being wary of unreadable pages.
* FIXME: better solution is to have decoder guess at length (if
* ok opcode just bad lock prefix or something know length, if
* bad opcode just bytes up until know it's bad).
*/
if (!is_readable_without_exception(bb->instr_start, MAX_INSTR_LENGTH)) {
app_pc nxt_page = (app_pc) ALIGN_FORWARD(bb->instr_start, PAGE_SIZE);
sz = nxt_page - bb->instr_start;
} else {
sz = MAX_INSTR_LENGTH;
}
bb->cur_pc += sz; /* just in case, should have a non-self target */
ASSERT(bb->cur_pc > bb->instr_start); /* else still a self target */
instr_set_raw_bits(bb->instr, bb->instr_start, sz);
bb->invalid_instr_hack = true;
#endif
} else {
instr_destroy(dcontext, bb->instr);
bb->instr = NULL;
}
}
/* returns true to indicate "elide and continue" and false to indicate "end bb now"
* should be used both for converted indirect jumps and
* FIXME: for direct jumps by bb_process_ubr
*/
static inline bool
follow_direct_jump(dcontext_t *dcontext, build_bb_t *bb,
app_pc target)
{
if (bb->follow_direct &&
bb->num_elide_jmp < DYNAMO_OPTION(max_elide_jmp) &&
(DYNAMO_OPTION(elide_back_jmps) || bb->cur_pc <= target)) {
if (check_new_page_jmp(dcontext, bb, target)) {
/* Elide unconditional branch and follow target */
bb->num_elide_jmp++;
STATS_INC(total_elided_jmps);
STATS_TRACK_MAX(max_elided_jmps, bb->num_elide_jmp);
bb->cur_pc = target;
BBPRINT(bb, 4, " continuing at target "PFX"\n", bb->cur_pc);
return true; /* keep bb going */
} else {
BBPRINT(bb, 3, " NOT following jmp from "PFX" to "PFX"\n",
bb->instr_start, target);
}
} else {
BBPRINT(bb, 3, " NOT attempting to follow jump from "PFX" to "PFX"\n",
bb->instr_start, target);
}
return false; /* stop bb */
}
/* returns true to indicate "elide and continue" and false to indicate "end bb now" */
static inline bool
bb_process_ubr(dcontext_t *dcontext, build_bb_t *bb)
{
app_pc tgt = (byte *) opnd_get_pc(instr_get_target(bb->instr));
BBPRINT(bb, 4, "interp: direct jump at "PFX"\n", bb->instr_start);
if (must_not_be_elided(tgt)) {
#ifdef WINDOWS
byte *wrapper_start;
if (is_syscall_trampoline(tgt, &wrapper_start)) {
/* HACK to avoid entering the syscall trampoline that is meant
* only for native syscalls -- we replace the jmp with the
* original app mov immed that it replaced
*/
BBPRINT(bb, 3,
"interp: replacing syscall trampoline @"PFX" w/ orig mov @"PFX"\n",
bb->instr_start, wrapper_start);
instr_reset(dcontext, bb->instr);
/* leave bb->cur_pc unchanged */
decode(dcontext, wrapper_start, bb->instr);
/* ASSUMPTION: syscall trampoline puts hooked instruction
* (usually mov_imm but can be lea if hooked_deeper) here */
ASSERT(instr_get_opcode(bb->instr) == OP_mov_imm ||
(instr_get_opcode(bb->instr) == OP_lea &&
DYNAMO_OPTION(native_exec_hook_conflict) ==
HOOKED_TRAMPOLINE_HOOK_DEEPER));
instrlist_append(bb->ilist, bb->instr);
/* translation should point to the trampoline at the
* original application address
*/
if (bb->record_translation)
instr_set_translation(bb->instr, bb->instr_start);
if (instr_get_opcode(bb->instr) == OP_lea) {
app_pc translation = bb->instr_start +
instr_length(dcontext, bb->instr);
ASSERT_CURIOSITY(instr_length(dcontext, bb->instr) == 4);
/* we hooked deep need to add the int 2e instruction */
/* can't use create_syscall_instr because of case 5217 hack */
ASSERT(get_syscall_method() == SYSCALL_METHOD_INT);
bb->instr = INSTR_CREATE_int(dcontext,
opnd_create_immed_int((char)0x2e,
OPSZ_1));
if (bb->record_translation)
instr_set_translation(bb->instr, translation);
ASSERT(instr_is_syscall(bb->instr) &&
instr_get_opcode(bb->instr) == OP_int);
instrlist_append(bb->ilist, bb->instr);
return bb_process_syscall(dcontext, bb);
}
return true; /* keep bb going */
}
#endif
BBPRINT(bb, 3, "interp: NOT following jmp to "PFX"\n", tgt);
/* add instruction to instruction list */
bb_add_native_direct_xfer(dcontext, bb, false/*!appended*/);
/* Case 8711: coarse-grain can't handle non-exit cti */
bb->flags &= ~FRAG_COARSE_GRAIN;
STATS_INC(coarse_prevent_cti);
return false; /* end bb now */
} else {
if (bb->follow_direct &&
bb->num_elide_jmp < DYNAMO_OPTION(max_elide_jmp) &&
(DYNAMO_OPTION(elide_back_jmps) || bb->cur_pc <= tgt)) {
if (check_new_page_jmp(dcontext, bb, tgt)) {
/* Elide unconditional branch and follow target */
bb->num_elide_jmp++;
STATS_INC(total_elided_jmps);
STATS_TRACK_MAX(max_elided_jmps, bb->num_elide_jmp);
bb->cur_pc = tgt;
BBPRINT(bb, 4, " continuing at target "PFX"\n", bb->cur_pc);
/* pretend never saw this ubr: delete instr, then continue */
instr_destroy(dcontext, bb->instr);
bb->instr = NULL;
return true; /* keep bb going */
} else {
BBPRINT(bb, 3, " NOT following direct jmp from "PFX" to "PFX"\n",
bb->instr_start, tgt);
}
}
/* End this bb now */
bb->exit_target = opnd_get_pc(instr_get_target(bb->instr));
instrlist_append(bb->ilist, bb->instr);
return false; /* end bb */
}
return true; /* keep bb going */
}
#ifdef X86
/* returns true if call is elided,
* and false if not following due to hitting a limit or other reason */
static bool
follow_direct_call(dcontext_t *dcontext, build_bb_t *bb, app_pc callee)
{
/* FIXME: This code should be reused in bb_process_convertible_indcall()
* and in bb_process_call_direct()
*/
if (bb->follow_direct &&
bb->num_elide_call < DYNAMO_OPTION(max_elide_call) &&
(DYNAMO_OPTION(elide_back_calls) || bb->cur_pc <= callee)) {
if (check_new_page_jmp(dcontext, bb, callee)) {
bb->num_elide_call++;
STATS_INC(total_elided_calls);
STATS_TRACK_MAX(max_elided_calls, bb->num_elide_call);
bb->cur_pc = callee;
BBPRINT(bb, 4, " continuing in callee at "PFX"\n", bb->cur_pc);
return true; /* keep bb going in callee */
} else {
BBPRINT(bb, 3,
" NOT following direct (or converted) call from "PFX" to "PFX"\n",
bb->instr_start, callee);
}
}
else {
BBPRINT(bb, 3, " NOT attempting to follow call from "PFX" to "PFX"\n",
bb->instr_start, callee);
}
return false; /* stop bb */
}
#endif /* X86 */
static inline void
bb_stop_prior_to_instr(dcontext_t *dcontext, build_bb_t *bb, bool appended)
{
if (appended)
instrlist_remove(bb->ilist, bb->instr);
instr_destroy(dcontext, bb->instr);
bb->instr = NULL;
bb->cur_pc = bb->instr_start;
}
/* returns true to indicate "elide and continue" and false to indicate "end bb now" */
static inline bool
bb_process_call_direct(dcontext_t *dcontext, build_bb_t *bb)
{
byte *callee = (byte *)opnd_get_pc(instr_get_target(bb->instr));
# ifdef CUSTOM_TRACES_RET_REMOVAL
if (callee == bb->instr_start + 5) {
LOG(THREAD, LOG_INTERP, 4, "found call to next instruction\n");
} else
dcontext->num_calls++;
# endif
STATS_INC(num_all_calls);
BBPRINT(bb, 4, "interp: direct call at "PFX"\n", bb->instr_start);
if (must_not_be_inlined(callee)) {
BBPRINT(bb, 3, "interp: NOT inlining call to "PFX"\n", callee);
/* Case 8711: coarse-grain can't handle non-exit cti.
* If we allow this fragment to be coarse we must kill the freeze
* nudge thread!
*/
bb->flags &= ~FRAG_COARSE_GRAIN;
STATS_INC(coarse_prevent_cti);
bb_add_native_direct_xfer(dcontext, bb, true/*appended*/);
return true; /* keep bb going, w/o inlining call */
} else {
if (DYNAMO_OPTION(coarse_split_calls) && DYNAMO_OPTION(coarse_units) &&
TEST(FRAG_COARSE_GRAIN, bb->flags)) {
if (instrlist_first(bb->ilist) != bb->instr) {
/* have call be in its own bb */
bb_stop_prior_to_instr(dcontext, bb, true/*appended already*/);
return false; /* stop bb */
} else {
/* single-call fine-grained bb */
bb->flags &= ~FRAG_COARSE_GRAIN;
STATS_INC(coarse_prevent_cti);
}
}
/* FIXME: use follow_direct_call() */
if (bb->follow_direct &&
bb->num_elide_call < DYNAMO_OPTION(max_elide_call) &&
(DYNAMO_OPTION(elide_back_calls) || bb->cur_pc <= callee)) {
if (check_new_page_jmp(dcontext, bb, callee)) {
bb->num_elide_call++;
STATS_INC(total_elided_calls);
STATS_TRACK_MAX(max_elided_calls, bb->num_elide_call);
bb->cur_pc = callee;
BBPRINT(bb, 4, " continuing in callee at "PFX"\n", bb->cur_pc);
return true; /* keep bb going */
}
}
BBPRINT(bb, 3, " NOT following direct call from "PFX" to "PFX"\n",
bb->instr_start, callee);
/* End this bb now */
bb->exit_target = callee;
return false; /* end bb now */
}
return true; /* keep bb going */
}
#ifdef WINDOWS
/* We check if the instrs call, mov, and sysenter are
* "call (%xdx); mov %xsp -> %xdx" or "call %xdx; mov %xsp -> %xdx"
* and "sysenter".
*/
bool
instr_is_call_sysenter_pattern(instr_t *call, instr_t *mov, instr_t *sysenter)
{
instr_t *instr;
if (call == NULL || mov == NULL || sysenter == NULL)
return false;
if (instr_is_meta(call) || instr_is_meta(mov) ||
instr_is_meta(sysenter))
return false;
if (instr_get_next(call) != mov || instr_get_next(mov) != sysenter)
return false;
/* check sysenter */
if (instr_get_opcode(sysenter) != OP_sysenter)
return false;
/* FIXME Relax the pattern matching on the "mov; call" pair so that small
* changes in the register dataflow and call construct are tolerated. */
/* Did we find a "mov %xsp -> %xdx"? */
instr = mov;
if (!(instr != NULL && instr_get_opcode(instr) == OP_mov_ld &&
instr_num_srcs(instr) == 1 && instr_num_dsts(instr) == 1 &&
opnd_is_reg(instr_get_dst(instr, 0)) &&
opnd_get_reg(instr_get_dst(instr, 0)) == REG_XDX &&
opnd_is_reg(instr_get_src(instr, 0)) &&
opnd_get_reg(instr_get_src(instr, 0)) == REG_XSP)) {
return false;
}
/* Did we find a "call (%xdx) or "call %xdx" that's already marked
* for ind->direct call conversion? */
instr = call;
if (!(instr != NULL && TEST(INSTR_IND_CALL_DIRECT, instr->flags) &&
instr_is_call_indirect(instr) &&
/* The 2nd src operand should always be %xsp. */
opnd_is_reg(instr_get_src(instr, 1)) &&
opnd_get_reg(instr_get_src(instr, 1)) == REG_XSP &&
/* Match 'call (%xdx)' for post-SP2. */
((opnd_is_near_base_disp(instr_get_src(instr, 0)) &&
opnd_get_base(instr_get_src(instr, 0)) == REG_XDX &&
opnd_get_disp(instr_get_src(instr, 0)) == 0) ||
/* Match 'call %xdx' for pre-SP2. */
(opnd_is_reg(instr_get_src(instr, 0)) &&
opnd_get_reg(instr_get_src(instr, 0)) == REG_XDX)))) {
return false;
}
return true;
}
/* Walk up from the bb->instr and verify that the preceding instructions
* match the pattern that we expect to precede a sysenter. */
static instr_t *
bb_verify_sysenter_pattern(dcontext_t *dcontext, build_bb_t *bb)
{
/* Walk back up 2 instructions and verify that there's a
* "call (%xdx); mov %xsp -> %xdx" or "call %xdx; mov %xsp -> %xdx"
* just prior to the sysenter.
* We use "xsp" and "xdx" to be ready for x64 sysenter though we don't
* expect to see it.
*/
instr_t *mov, *call;
mov = instr_get_prev_expanded(dcontext, bb->ilist, bb->instr);
if (mov == NULL)
return NULL;
call = instr_get_prev_expanded(dcontext, bb->ilist, mov);
if (call == NULL)
return NULL;
if (!instr_is_call_sysenter_pattern(call, mov, bb->instr)) {
BBPRINT(bb, 3, "bb_verify_sysenter_pattern -- pattern didn't match\n");
return NULL;
}
return call;
}
/* Only used for the Borland SEH exemption. */
/* FIXME - we can't really tell a push from a pop since both are typically a
* mov to fs:[0], but double processing doesn't hurt. */
/* NOTE we don't see dynamic SEH frame pushes, we only see the first SEH push
* per mov -> fs:[0] instruction in the app. So we don't see modified in place
* handler addresses (see at_Borland_SEH_rct_exemption()) or handler addresses
* that are passed into a shared routine that sets up the frame (not yet seen,
* note that MS dlls that have a _SEH_prolog hardcode the handler address in
* the _SEH_prolog routine, only the data is passed in).
*/
static void
bb_process_SEH_push(dcontext_t *dcontext, build_bb_t *bb, void *value)
{
if (value == NULL || value == (void *)PTR_UINT_MINUS_1) {
/* could be popping off the last frame (leaving -1) of the SEH stack */
STATS_INC(num_endlist_SEH_write);
ASSERT_CURIOSITY(value != NULL);
return;
}
LOG(THREAD, LOG_INTERP, 3, "App moving "PFX" to fs:[0]\n", value);
# ifdef RETURN_AFTER_CALL
if (DYNAMO_OPTION(borland_SEH_rct)) {
/* xref case 5752, the Borland compiler SEH implementation uses a push
* imm ret motif for fall through to the finally of a try finally block
* (very similar to what the Microsoft NT at_SEH_rct_exception() is
* doing). The layout will always look like this :
* push e: (imm32) (e should be in the .E/.F table)
* a:
* ...
* b: ret
* c: jmp rel32 (c should be in the .E/.F table)
* d: jmp a: (rel8/32)
* ... (usually nothing)
* e:
* (where ret at b is targeting e, or a valid after call). The
* exception dispatcher calls c (the SEH frame has c as the handler)
* which jmps to the exception handler which, in turn, calls d to
* execute the finally block. Fall through is as shown above. So,
* we see a .E violation for the handlers call to d and a .C violation
* for the fall trough case of the ret @ b targeting e. We may also
* see a .E violation for a call to a as sometimes the handler computes
* the target of the jmp @ d an passes that to a different exception
* handler.
*
* For try-except we see the following layout :
* I've only seen jmp ind in the case that led to needing
* at_Borland_SEH_rct_exemption() to be added, not that
* it makes any difference.
* [ jmp z: (rel8/32) || (rarely) ret || (very rarely) jmp ind]
* x: jmp rel32 (x should be in the .E/.F table)
* y:
* ...
* call rel32
* [z: ... || ret ]
* Though there may be other optimized layouts (the ret instead of the
* jmp z: is one such) so we may not want to rely on anything other
* then x y. The exception dispatcher calls x (the SEH frame has x as
* the handler) which jmps to the exception handler which, in turn,
* jmps to y to execute the except block. We see a .F violation from
* the handler's jmp to y. at_Borland_SEH_rct_exemption() covers a
* case where the address of x (and thus y) in an existing SEH frame
* is changed in place instead of popping and pushing a new frame.
*
* All addresses (rel and otherwise) should be in the same module. So
* we need to recognize the patter and add d:/y: to the .E/.F table
* as well as a: (sometimes the handler calculates the target of d and
* passes that up to a higher level routine, though I don't see the
* point) and add e: to the .C table.
*
* It would be preferable to handle these exemptions reactively at
* the violation point, but unfortunately, by the time we get to the
* violation the SEH frame information has been popped off the stack
* and is lost, so we have to do it pre-emptively here (pattern
* matching at violation time has proven to difficult in the face of
* certain compiler optimizations). See at_Borland_SEH_rct_exemption()
* in callback.c, that could handle all ind branches to y and ind calls
* to d (see below) at an acceptable level of security if we desired.
* Handling the ret @ b to e reactively would require the ability to
* recreate the exact src cti (so we can use the addr of the ret to
* pattern match) at the violation point (something that can't always
* currently be done, reset flushing etc.). Handling the ind call to
* a (which I've never acutally seen, though I've seen the address
* computed and it looks like it could likely be hit) reactively is
* more tricky. Prob. the only way to handle that is to allow .E/.F
* transistions to any address after a push imm32 of an address in the
* same module, but that might be too permissive. FIXME - should still
* revisit doing the exemptions reactively at some point, esp. once we
* can reliably get the src cti.
*/
extern bool seen_Borland_SEH; /* set for callback.c */
/* First read in the SEH frame, this is the observed structure and
* the first two fields (which are all that we use) are constrained by
* ntdll exception dispatcher (see EXCEPTION_REGISTRATION decleration
* in ntdll.h). */
/* FIXME - could just use EXCEPTION_REGISTRATION period since all we
* need is the handler address and it would allow simpler curiosity
* [see 8181] below. If, as is expected, other options make use of
* this routine we'll probably have one shared get of the SEH frame
* anyways. */
typedef struct _borland_seh_frame_t {
EXCEPTION_REGISTRATION reg;
reg_t xbp; /* not used by us */
} borland_seh_frame_t;
borland_seh_frame_t frame;
/* will hold [b,e] or [x-1,y] */
byte target_buf[RET_0_LENGTH + 2 * JMP_LONG_LENGTH];
app_pc handler_jmp_target = NULL;
if (!safe_read(value, sizeof(frame), &frame)) {
/* We already checked for NULL and -1 above so this should be
* a valid SEH frame. Xref 8181, borland_seh_frame_t struct is
* bigger then EXCEPTION_REGISTRATION (which is all that is
* required) so verify smaller size is readable. */
ASSERT_CURIOSITY(sizeof(EXCEPTION_REGISTRATION) < sizeof(frame) &&
safe_read(value, sizeof(EXCEPTION_REGISTRATION),
&frame));
goto post_borland;
}
/* frame.reg.handler is c or y, read extra prior bytes to look for b */
if (!safe_read((app_pc)frame.reg.handler - RET_0_LENGTH,
sizeof(target_buf), target_buf)) {
goto post_borland;
}
if (is_jmp_rel32(&target_buf[RET_0_LENGTH], (app_pc)frame.reg.handler,
&handler_jmp_target)) {
/* we have a possible match, now do the more expensive checking */
app_pc base;
LOG(THREAD, LOG_INTERP, 3,
"Read possible borland SEH frame @"PFX"\n\t"
"next="PFX" handler="PFX" xbp="PFX"\n\t",
value, frame.reg.prev, frame.reg.handler, frame.xbp);
DOLOG(3, LOG_INTERP, {
dump_buffer_as_bytes(THREAD, target_buf, sizeof(target_buf), 0);
});
/* optimize check if we've already processed this frame once */
if ((DYNAMO_OPTION(rct_ind_jump) != OPTION_DISABLED ||
DYNAMO_OPTION(rct_ind_call) != OPTION_DISABLED) &&
rct_ind_branch_target_lookup(dcontext,
(app_pc)frame.reg.handler +
JMP_LONG_LENGTH)) {
/* we already processed this SEH frame once, this is prob. a
* frame pop, no need to continue */
STATS_INC(num_borland_SEH_dup_frame);
LOG(THREAD, LOG_INTERP, 3,
"Processing duplicate Borland SEH frame\n");
goto post_borland;
}
base = get_module_base((app_pc)frame.reg.handler);
STATS_INC(num_borland_SEH_initial_match);
/* Perf opt, we use the cheaper get_allocation_base() below instead
* of get_module_base(). We are checking the result against a
* known module base (base) so no need to duplicate the is module
* check. FIXME - the checks prob. aren't even necessary given the
* later is_in_code_section checks. Xref case 8171. */
/* FIXME - (perf) we could cache the region from the first
* is_in_code_section() call and check against that before falling
* back on is_in_code_section in case of multiple code sections. */
if (base != NULL &&
get_allocation_base(handler_jmp_target) == base &&
get_allocation_base(bb->instr_start) == base &&
/* FIXME - with -rct_analyze_at_load we should be able to
* verify that frame->handler (x: c:) is on the .E/.F
* table already. We could also try to match known pre x:
* post y: patterns. */
is_in_code_section(base, bb->instr_start, NULL, NULL) &&
is_in_code_section(base, handler_jmp_target, NULL, NULL) &&
is_range_in_code_section(base, (app_pc)frame.reg.handler,
(app_pc)frame.reg.handler+JMP_LONG_LENGTH+1,
NULL, NULL)) {
app_pc finally_target;
byte push_imm_buf[PUSH_IMM32_LENGTH];
DEBUG_DECLARE(bool ok;)
/* we have a match, add handler+JMP_LONG_LENGTH (y: d:)
* to .E/.F table */
STATS_INC(num_borland_SEH_try_match);
LOG(THREAD, LOG_INTERP, 2,
"Found Borland SEH frame adding "PFX" to .E/.F table\n",
(app_pc)frame.reg.handler+JMP_LONG_LENGTH);
if ((DYNAMO_OPTION(rct_ind_jump) != OPTION_DISABLED ||
DYNAMO_OPTION(rct_ind_call) != OPTION_DISABLED)) {
mutex_lock(&rct_module_lock);
rct_add_valid_ind_branch_target(dcontext,
(app_pc)frame.reg.handler +
JMP_LONG_LENGTH);
mutex_unlock(&rct_module_lock);
}
/* we set this as an enabler for another exemption in
* callback .C, see notes there */
if (!seen_Borland_SEH) {
SELF_UNPROTECT_DATASEC(DATASEC_RARELY_PROT);
seen_Borland_SEH = true;
SELF_PROTECT_DATASEC(DATASEC_RARELY_PROT);
}
/* case 8648: used to decide which RCT entries to persist */
DEBUG_DECLARE(ok =) os_module_set_flag(base, MODULE_HAS_BORLAND_SEH);
ASSERT(ok);
/* look for .C addresses for try finally */
if (target_buf[0] == RAW_OPCODE_ret &&
(is_jmp_rel32(&target_buf[RET_0_LENGTH+JMP_LONG_LENGTH],
(app_pc)frame.reg.handler+JMP_LONG_LENGTH,
&finally_target) ||
is_jmp_rel8(&target_buf[RET_0_LENGTH+JMP_LONG_LENGTH],
(app_pc)frame.reg.handler+JMP_LONG_LENGTH,
&finally_target)) &&
safe_read(finally_target - sizeof(push_imm_buf),
sizeof(push_imm_buf), push_imm_buf) &&
push_imm_buf[0] == RAW_OPCODE_push_imm32) {
app_pc push_val = *(app_pc *)&push_imm_buf[1];
/* do a few more, expensive, sanity checks */
/* FIXME - (perf) see earlier note on get_allocation_base()
* and is_in_code_section() usage. */
if (get_allocation_base(finally_target) == base &&
is_in_code_section(base, finally_target, NULL, NULL) &&
get_allocation_base(push_val) == base &&
/* FIXME - could also check that push_val is in
* .E/.F table, at least for -rct_analyze_at_load */
is_in_code_section(base, push_val, NULL, NULL)) {
/* Full match, add push_val (e:) to the .C table
* and finally_target (a:) to the .E/.F table */
STATS_INC(num_borland_SEH_finally_match);
LOG(THREAD, LOG_INTERP, 2,
"Found Borland SEH finally frame adding "PFX" to"
" .C table and "PFX" to .E/.F table\n",
push_val, finally_target);
if ((DYNAMO_OPTION(rct_ind_jump) != OPTION_DISABLED ||
DYNAMO_OPTION(rct_ind_call) != OPTION_DISABLED)) {
mutex_lock(&rct_module_lock);
rct_add_valid_ind_branch_target(dcontext,
finally_target);
mutex_unlock(&rct_module_lock);
}
if (DYNAMO_OPTION(ret_after_call)) {
fragment_add_after_call(dcontext, push_val);
}
} else {
ASSERT_CURIOSITY(false &&
"partial borland seh finally match");
}
}
}
}
}
post_borland:
# endif /* RETURN_AFTER_CALL */
return;
}
/* helper routine for bb_process_fs_ref
* return true if bb should be continued, false if it shouldn't */
static bool
bb_process_fs_ref_opnd(dcontext_t *dcontext, build_bb_t *bb, opnd_t dst,
bool *is_to_fs0)
{
ASSERT(is_to_fs0 != NULL);
*is_to_fs0 = false;
if (opnd_is_far_base_disp(dst) && /* FIXME - check size? */
opnd_get_segment(dst) == SEG_FS) {
/* is a write to fs:[*] */
if (bb->instr_start != bb->start_pc) {
/* Not first instruction in the bb, end bb before this
* instruction, so we can see it as the first instruction of a
* new bb where we can use the register state. */
/* As is, always ending the bb here has a mixed effect on mem usage
* with default options. We do end up with slightly more bb's
* (and associated bookeeping costs), but frequently with MS dlls
* we reduce code cache dupliaction from jmp/call ellision
* (_SEH_[Pro,Epi]log otherwise ends up frequently duplicated for
* instance). */
/* FIXME - we must stop the bb here even if there's already
* a bb built for the next instruction, as we have to have
* reproducible bb building for recreate app state. We should
* only get here through code duplication (typically jmp/call
* inlining, though can also be through multiple entry points into
* the same block of non cti instructions). */
bb_stop_prior_to_instr(dcontext, bb, false/*not appended yet*/);
return false; /* stop bb */
}
/* Only process the push if building a new bb for cache, can't check
* this any earlier since have to preserve bb building/ending behavior
* even when not for cache (for recreation etc.). */
if (bb->app_interp) {
/* check is write to fs:[0] */
/* XXX: this won't identify all memory references (need to switch to
* instr_compute_address_ex_priv() in order to handle VSIB) but the
* current usage is just to identify the Borland pattern so that's ok.
*/
if (opnd_compute_address_priv(dst, get_mcontext(dcontext)) == NULL) {
/* we have new mov to fs:[0] */
*is_to_fs0 = true;
}
}
}
return true;
}
/* While currently only used for Borland SEH exemptions, this analysis could
* also be helpful for other SEH tasks (xref case 5824). */
static bool
bb_process_fs_ref(dcontext_t *dcontext, build_bb_t *bb)
{
ASSERT(DYNAMO_OPTION(process_SEH_push) &&
instr_get_prefix_flag(bb->instr, PREFIX_SEG_FS));
/* If this is the first instruction of a bb for the cache we
* want to fully decode it, check if it's pushing an SEH frame
* and, if so, pass it to the SEH checking routines (currently
* just used for the Borland SEH rct handling). If this is not
* the first instruction of the bb then we want to stop the bb
* just before this instruction so that when we do process this
* instruction it will be the first in the bb (allowing us to
* use the register state). */
if (!bb->full_decode) {
instr_decode(dcontext, bb->instr);
/* is possible this is an invalid instr that made it through the fast
* decode, FIXME is there a better way to handle this? */
if (!instr_valid(bb->instr)) {
ASSERT_NOT_TESTED();
if (bb->cur_pc == NULL)
bb->cur_pc = bb->instr_start;
bb_process_invalid_instr(dcontext, bb);
return false; /* stop bb */
}
ASSERT(instr_get_prefix_flag(bb->instr, PREFIX_SEG_FS));
}
/* expect to see only simple mov's to fs:[0] for new SEH frames
* FIXME - might we see other types we'd want to intercept?
* do we want to proccess pop instructions (usually just for removing
* a frame)? */
if (instr_get_opcode(bb->instr) == OP_mov_st) {
bool is_to_fs0;
opnd_t dst = instr_get_dst(bb->instr, 0);
if (!bb_process_fs_ref_opnd(dcontext, bb, dst, &is_to_fs0))
return false; /* end bb */
/* Only process the push if building a new bb for cache, can't check
* this any earlier since have to preserve bb building/ending behavior
* even when not for cache (for recreation etc.). */
if (bb->app_interp) {
if (is_to_fs0) {
ptr_int_t value = 0;
opnd_t src = instr_get_src(bb->instr, 0);
if (opnd_is_immed_int(src)) {
value = opnd_get_immed_int(src);
} else if (opnd_is_reg(src)) {
value = reg_get_value_priv(opnd_get_reg(src), get_mcontext(dcontext));
} else {
ASSERT_NOT_REACHED();
}
STATS_INC(num_SEH_pushes_processed);
LOG(THREAD, LOG_INTERP, 3, "found mov to fs:[0] @ "PFX"\n",
bb->instr_start);
bb_process_SEH_push(dcontext, bb, (void *)value);
} else {
STATS_INC(num_fs_movs_not_SEH);
}
}
}
# if defined(DEBUG) && defined(INTERNAL)
else if (INTERNAL_OPTION(check_for_SEH_push)) {
/* Debug build Sanity check that we aren't missing SEH frame pushes */
int i;
int num_dsts = instr_num_dsts(bb->instr);
for (i = 0; i < num_dsts; i++) {
bool is_to_fs0;
opnd_t dst = instr_get_dst(bb->instr, i);
if (!bb_process_fs_ref_opnd(dcontext, bb, dst, &is_to_fs0)) {
STATS_INC(num_process_SEH_bb_early_terminate_debug);
return false; /* end bb */
}
/* common case is pop instructions to fs:[0] when popping an
* SEH frame stored on tos */
if (is_to_fs0) {
if (instr_get_opcode(bb->instr) == OP_pop) {
LOG(THREAD, LOG_INTERP, 4,
"found pop to fs:[0] @ "PFX"\n", bb->instr_start);
STATS_INC(num_process_SEH_pop_fs0);
} else {
/* an unexpected SEH frame push */
LOG(THREAD, LOG_INTERP, 1,
"found unexpected write to fs:[0] @"PFX"\n",
bb->instr_start);
DOLOG(1, LOG_INTERP, {
loginst(dcontext, 1, bb->instr, "");
});
ASSERT_CURIOSITY(!is_to_fs0);
}
}
}
}
# endif
return true; /* continue bb */
}
#endif /* win32 */
#if defined(UNIX) && !defined(DGC_DIAGNOSTICS) && defined(X86)
/* The basic strategy for mangling mov_seg instruction is:
* For mov fs/gs => reg/[mem], simply mangle it to write
* the app's fs/gs selector value into dst.
* For mov reg/mem => fs/gs, we make it as the first instruction
* of bb, and mark that bb not linked and has mov_seg instr,
* and change that instruction to be a nop.
* Then whenever before entering code cache, we check if that's the bb
* has mov_seg. If yes, we will update the information we maintained
* about the app's fs/gs.
*/
/* check if the basic block building should continue on a mov_seg instr. */
static bool
bb_process_mov_seg(dcontext_t *dcontext, build_bb_t *bb)
{
reg_id_t seg;
if (!INTERNAL_OPTION(mangle_app_seg))
return true; /* continue bb */
/* if it is a read, we only need mangle the instruction. */
ASSERT(instr_num_srcs(bb->instr) == 1);
if (opnd_is_reg(instr_get_src(bb->instr, 0)) &&
reg_is_segment(opnd_get_reg(instr_get_src(bb->instr, 0))))
return true; /* continue bb */
/* it is an update, we need set to be the first instr of bb */
ASSERT(instr_num_dsts(bb->instr) == 1);
ASSERT(opnd_is_reg(instr_get_dst(bb->instr, 0)));
seg = opnd_get_reg(instr_get_dst(bb->instr, 0));
ASSERT(reg_is_segment(seg));
/* we only need handle fs/gs */
if (seg != SEG_GS && seg != SEG_FS)
return true; /* continue bb */
/* if no private loader, we only need mangle the non-tls seg */
if (seg == IF_X64_ELSE(SEG_FS, SEG_FS) &&
IF_CLIENT_INTERFACE_ELSE(!INTERNAL_OPTION(private_loader), true))
return true; /* continue bb */
if (bb->instr_start == bb->start_pc) {
/* the first instruction, we can continue build bb. */
/* this bb cannot be part of trace! */
bb->flags |= FRAG_CANNOT_BE_TRACE;
bb->flags |= FRAG_HAS_MOV_SEG;
return true; /* continue bb */
}
LOG(THREAD, LOG_INTERP, 3, "ending bb before mov_seg\n");
/* Set cur_pc back to the start of this instruction and delete this
* instruction from the bb ilist.
*/
bb->cur_pc = instr_get_raw_bits(bb->instr);
instrlist_remove(bb->ilist, bb->instr);
instr_destroy(dcontext, bb->instr);
/* Set instr to NULL in order to get translation of exit cti correct. */
bb->instr = NULL;
/* this block must be the last one in a trace
* breaking traces here shouldn't be a perf issue b/c this is so rare,
* it should happen only once per thread on setting up tls.
*/
bb->flags |= FRAG_MUST_END_TRACE;
return false; /* stop bb here */
}
#endif /* UNIX && X86 */
/* Returns true to indicate that ignorable syscall processing is completed
* with *continue_bb indicating if the bb should be continued or not.
* When returning false, continue_bb isn't pertinent.
*/
static bool
bb_process_ignorable_syscall(dcontext_t *dcontext, build_bb_t *bb,
int sysnum, bool *continue_bb)
{
STATS_INC(ignorable_syscalls);
BBPRINT(bb, 3, "found ignorable system call 0x%04x\n", sysnum);
#ifdef WINDOWS
if (get_syscall_method() != SYSCALL_METHOD_SYSENTER) {
DOCHECK(1, {
if (get_syscall_method() == SYSCALL_METHOD_WOW64)
ASSERT_NOT_TESTED();
});
if (continue_bb != NULL)
*continue_bb = true;
return true;
}
else {
/* Can we continue interp after the sysenter at the instruction
* after the call to sysenter? */
instr_t *call = bb_verify_sysenter_pattern(dcontext, bb);
if (call != NULL) {
/* If we're continuing code discovery at the after-call address,
* change the cur_pc to continue at the after-call addr. This is
* safe since the preceding call is in the fragment and
* %xsp/(%xsp) hasn't changed since the call. Obviously, we assume
* that the sysenter breaks control flow in fashion such any
* instruction that follows it isn't reached by DR.
*/
if (DYNAMO_OPTION(ignore_syscalls_follow_sysenter)) {
bb->cur_pc =
instr_get_raw_bits(call) + instr_length(dcontext, call);
if (continue_bb != NULL)
*continue_bb = true;
return true;
}
else {
/* End this bb now. We set the exit target so that control
* skips the vsyscall 'ret' that's executed natively after the
* syscall and ends up at the correct place.
*/
/* FIXME Assigning exit_target causes the fragment to end
* with a direct exit stub to the after-call address, which
* is fine. If bb->exit_target < bb->start_pc, the future
* fragment for exit_target is marked as a trace head which
* isn't intended. A potentially undesirable side effect
* is that exit_target's fragment can't be included in
* trace for start_pc.
*/
bb->exit_target =
instr_get_raw_bits(call) + instr_length(dcontext, call);
if (continue_bb != NULL)
*continue_bb = false;
return true;
}
}
STATS_INC(ignorable_syscalls_failed_sysenter_pattern);
/* Pattern match failed but the syscall is ignorable so maybe we
* can try shared syscall? */
/* Decrement the stat to prevent double counting. We rarely expect to hit
* this case. */
STATS_DEC(ignorable_syscalls);
return false;
}
#elif defined (MACOS)
if (instr_get_opcode(bb->instr) == OP_sysenter) {
/* To continue after the sysenter we need to go to the ret ibl, as user-mode
* sysenter wrappers put the retaddr into edx as the post-kernel continuation.
*/
bb->exit_type |= LINK_INDIRECT|LINK_RETURN;
bb->ibl_branch_type = IBL_RETURN;
bb->exit_target = get_ibl_routine(dcontext, get_ibl_entry_type(bb->exit_type),
DEFAULT_IBL_BB(), bb->ibl_branch_type);
LOG(THREAD, LOG_INTERP, 4, "sysenter exit target = "PFX"\n", bb->exit_target);
if (continue_bb != NULL)
*continue_bb = false;
} else if (continue_bb != NULL)
*continue_bb = true;
return true;
#else
if (continue_bb != NULL)
*continue_bb = true;
return true;
#endif
}
#ifdef WINDOWS
/* Process a syscall that is executed via shared syscall. */
static void
bb_process_shared_syscall(dcontext_t *dcontext, build_bb_t *bb, int sysnum)
{
ASSERT(DYNAMO_OPTION(shared_syscalls));
DODEBUG({
if (ignorable_system_call(sysnum, bb->instr, NULL))
STATS_INC(ignorable_syscalls);
else
STATS_INC(optimizable_syscalls);
});
BBPRINT(bb, 3, "found %soptimizable system call 0x%04x\n",
INTERNAL_OPTION(shared_eq_ignore) ? "ignorable-" : "",
sysnum);
LOG(THREAD, LOG_INTERP, 3,
"ending bb at syscall & NOT removing the interrupt itself\n");
/* Mark the instruction as pointing to shared syscall */
bb->instr->flags |= INSTR_SHARED_SYSCALL;
/* this block must be the last one in a trace */
bb->flags |= FRAG_MUST_END_TRACE;
/* we redirect all optimizable syscalls to a single shared piece of code.
* Once a fragment reaches the shared syscall code, it can be safely
* deleted, for example, if the thread is interrupted for a callback and
* DR needs to delete fragments for cache management.
*
* Note that w/shared syscall, syscalls can be executed from TWO
* places -- shared_syscall and do_syscall.
*/
bb->exit_target = shared_syscall_routine(dcontext);
/* make sure translation for ending jmp ends up right, mangle will
* remove this instruction, so set to NULL so translation does the
* right thing */
bb->instr = NULL;
}
#endif
static bool
bb_process_non_ignorable_syscall(dcontext_t *dcontext, build_bb_t *bb,
int sysnum)
{
BBPRINT(bb, 3, "found non-ignorable system call 0x%04x\n", sysnum);
STATS_INC(non_ignorable_syscalls);
bb->exit_type |= LINK_NI_SYSCALL;
/* destroy the interrupt instruction */
LOG(THREAD, LOG_INTERP, 3,
"ending bb at syscall & removing the interrupt itself\n");
/* Indicate that this is a non-ignorable syscall so mangle will remove */
/* FIXME i#1551: maybe we should union int80 and svc as both are inline syscall? */
#ifdef UNIX
if (instr_get_opcode(bb->instr) == IF_X86_ELSE(OP_int, OP_svc)) {
# if defined(MACOS) && defined(X86)
int num = instr_get_interrupt_number(bb->instr);
if (num == 0x81 || num == 0x82) {
bb->exit_type |= LINK_SPECIAL_EXIT;
bb->instr->flags |= INSTR_BRANCH_SPECIAL_EXIT;
} else {
ASSERT(num == 0x80);
# endif /* MACOS && X86 */
bb->exit_type |= LINK_NI_SYSCALL_INT;
bb->instr->flags |= INSTR_NI_SYSCALL_INT;
# ifdef MACOS
}
# endif
} else
#endif
bb->instr->flags |= INSTR_NI_SYSCALL;
/* Set instr to NULL in order to get translation of exit cti correct. */
bb->instr = NULL;
/* this block must be the last one in a trace */
bb->flags |= FRAG_MUST_END_TRACE;
return false; /* end bb now */
}
/* returns true to indicate "continue bb" and false to indicate "end bb now" */
static inline bool
bb_process_syscall(dcontext_t *dcontext, build_bb_t *bb)
{
int sysnum;
#ifdef CLIENT_INTERFACE
/* PR 307284: for simplicity do syscall/int processing post-client.
* We give up on inlining but we can still use ignorable/shared syscalls
* and trace continuation.
*/
if (bb->pass_to_client && !bb->post_client)
return false;
#endif
#ifdef DGC_DIAGNOSTICS
if (TEST(FRAG_DYNGEN, bb->flags) && !is_dyngen_vsyscall(bb->instr_start)) {
LOG(THREAD, LOG_INTERP, 1, "WARNING: syscall @ "PFX" in dyngen code!\n",
bb->instr_start);
}
#endif
BBPRINT(bb, 4, "interp: syscall @ "PFX"\n", bb->instr_start);
check_syscall_method(dcontext, bb->instr);
bb->flags |= FRAG_HAS_SYSCALL;
/* if we can identify syscall number and it is an ignorable syscall,
* we let bb keep going, else we end bb and flag it
*/
sysnum = find_syscall_num(dcontext, bb->ilist, bb->instr);
#ifdef VMX86_SERVER
DOSTATS({
if (instr_get_opcode(bb->instr) == OP_int &&
instr_get_interrupt_number(bb->instr) == VMKUW_SYSCALL_GATEWAY) {
STATS_INC(vmkuw_syscall_sites);
LOG(THREAD, LOG_SYSCALLS, 2, "vmkuw system call site: #=%d\n", sysnum);
}
});
#endif
BBPRINT(bb, 3, "syscall # is %d\n", sysnum);
#ifdef CLIENT_INTERFACE
if (sysnum != -1 && instrument_filter_syscall(dcontext, sysnum)) {
BBPRINT(bb, 3, "client asking to intercept => pretending syscall # %d is -1\n",
sysnum);
sysnum = -1;
}
#endif
if (sysnum != -1 &&
DYNAMO_OPTION(ignore_syscalls) &&
ignorable_system_call(sysnum, bb->instr, NULL)
#ifdef X86
/* PR 288101: On Linux we do not yet support inlined sysenter instrs as we
* do not have in-cache support for the post-sysenter continuation: we rely
* for now on very simple sysenter handling where dispatch uses asynch_target
* to know where to go next.
*/
IF_LINUX(&& instr_get_opcode(bb->instr) != OP_sysenter)
#endif /* X86 */
) {
bool continue_bb;
if (bb_process_ignorable_syscall(dcontext, bb, sysnum, &continue_bb)) {
if (!DYNAMO_OPTION(inline_ignored_syscalls))
continue_bb = false;
return continue_bb;
}
}
#ifdef WINDOWS
if (sysnum != -1 && DYNAMO_OPTION(shared_syscalls) &&
optimizable_system_call(sysnum)) {
bb_process_shared_syscall(dcontext, bb, sysnum);
return false;
}
#endif
/* Fall thru and handle as a non-ignorable syscall. */
return bb_process_non_ignorable_syscall(dcontext, bb, sysnum);
}
/* Case 3922: for wow64 we treat "call *fs:0xc0" as a system call.
* Only sets continue_bb if it returns true.
*/
static bool
bb_process_indcall_syscall(dcontext_t *dcontext, build_bb_t *bb,
bool *continue_bb)
{
ASSERT(continue_bb != NULL);
#ifdef WINDOWS
if (instr_is_wow64_syscall(bb->instr)) {
/* we could check the preceding instrs but we don't bother */
*continue_bb = bb_process_syscall(dcontext, bb);
return true;
}
#endif
return false;
}
/* returns true to indicate "continue bb" and false to indicate "end bb now" */
static inline bool
bb_process_interrupt(dcontext_t *dcontext, build_bb_t *bb)
{
#if defined(DEBUG) || defined(INTERNAL) || defined(WINDOWS)
int num = instr_get_interrupt_number(bb->instr);
#endif
#ifdef CLIENT_INTERFACE
/* PR 307284: for simplicity do syscall/int processing post-client.
* We give up on inlining but we can still use ignorable/shared syscalls
* and trace continuation.
* PR 550752: we cannot end at int 0x2d: we live w/ client consequences
*/
if (bb->pass_to_client && !bb->post_client IF_WINDOWS(&& num != 0x2d))
return false;
#endif
BBPRINT(bb, 3, "int 0x%x @ "PFX"\n", num, bb->instr_start);
#ifdef WINDOWS
if (num == 0x2b) {
/* interrupt 0x2B signals return from callback */
/* end block here and come back to dynamo to perform interrupt */
bb->exit_type |= LINK_CALLBACK_RETURN;
BBPRINT(bb, 3, "ending bb at cb ret & removing the interrupt itself\n");
/* Set instr to NULL in order to get translation of exit cti
* correct. mangle will destroy the instruction */
bb->instr = NULL;
bb->flags |= FRAG_MUST_END_TRACE;
STATS_INC(num_int2b);
return false;
} else {
SYSLOG_INTERNAL_INFO_ONCE("non-syscall, non-int2b 0x%x @ "PFX" from "PFX,
num, bb->instr_start, bb->start_pc);
}
#endif /* WINDOWS */
return true;
}
/* If the current instr in the BB is an indirect call that can be converted into a
* direct call, process it and return true, else, return false.
* FIXME PR 288327: put in linux call* to vsyscall page
*/
static bool
bb_process_convertible_indcall(dcontext_t *dcontext, build_bb_t *bb)
{
#ifdef X86
/* We perform several levels of checking, each increasingly more stringent
* and expensive, with a false return should any fail.
*/
instr_t *instr;
opnd_t src0;
instr_t *call_instr;
int call_src_reg;
app_pc callee;
bool vsyscall = false;
/* Check if this BB can be extended and the instr is a (near) indirect call */
if (instr_get_opcode(bb->instr) != OP_call_ind)
return false;
/* Check if we have a "mov <imm> -> %reg; call %reg" or a
* "mov <imm> -> %reg; call (%reg)" pair. First check for the call.
*/
/* The 'if' conditions are broken up to make the code more readable
* while #ifdef-ing the WINDOWS case. It's still ugly though.
*/
instr = bb->instr;
if (!(
# ifdef WINDOWS
/* Match 'call (%xdx)' for a post-SP2 indirect call to sysenter. */
(opnd_is_near_base_disp(instr_get_src(instr, 0)) &&
opnd_get_base(instr_get_src(instr, 0)) == REG_XDX &&
opnd_get_disp(instr_get_src(instr, 0)) == 0) ||
# endif
/* Match 'call %reg'. */
opnd_is_reg(instr_get_src(instr, 0))))
return false;
/* If there's no CTI in the BB, we can check if there are 5+ preceding
* bytes and if they could hold a "mov" instruction.
*/
if (!TEST(FRAG_HAS_DIRECT_CTI, bb->flags) &&
bb->instr_start - 5 >= bb->start_pc) {
byte opcode = *((byte *) bb->instr_start - 5);
/* Check the opcode. Do we see a "mov ... -> %reg"? Valid opcodes are in
* the 0xb8-0xbf range (Intel IA-32 ISA ref, v.2) and specify the
* destination register, i.e., 0xb8 means that %xax is the destination.
*/
if (opcode < 0xb8 || opcode > 0xbf)
return false;
}
/* Check the previous instruction -- is it really a "mov"? */
src0 = instr_get_src(instr, 0);
call_instr = instr;
instr = instr_get_prev_expanded(dcontext, bb->ilist, bb->instr);
call_src_reg = opnd_is_near_base_disp(src0) ? opnd_get_base(src0) :
opnd_get_reg(src0);
if (instr == NULL || instr_get_opcode(instr) != OP_mov_imm ||
opnd_get_reg(instr_get_dst(instr, 0)) != call_src_reg)
return false;
/* For the general case, we don't try to optimize a call
* thru memory -- just check that the call uses a register.
*/
callee = NULL;
if (opnd_is_reg(src0)) {
/* Extract the target address. */
callee = (app_pc) opnd_get_immed_int(instr_get_src(instr, 0));
# ifdef WINDOWS
# ifdef PROGRAM_SHEPHERDING
/* FIXME - is checking for on vsyscall page better or is checking == to
* VSYSCALL_BOOTSTRAP_ADDR? Both are hacky. */
if (is_dyngen_vsyscall((app_pc)opnd_get_immed_int(instr_get_src(instr, 0)))) {
LOG(THREAD, LOG_INTERP, 4, "Pre-SP2 style indirect call "
"to sysenter found at "PFX"\n", bb->instr_start);
STATS_INC(num_sysenter_indcalls);
vsyscall = true;
ASSERT(opnd_get_immed_int(instr_get_src(instr, 0)) ==
(ptr_int_t)VSYSCALL_BOOTSTRAP_ADDR);
ASSERT(!use_ki_syscall_routines()); /* double check our determination */
}
else
# endif
# endif
STATS_INC(num_convertible_indcalls);
}
# ifdef WINDOWS
/* Match the "call (%xdx)" to sysenter case for SP2-patched os's. Memory at
* address VSYSCALL_BOOTSTRAP_ADDR (0x7ffe0300) holds the address of
* KiFastSystemCall or (FIXME - not handled) on older platforms KiIntSystemCall.
* FIXME It's unsavory to hard-code 0x7ffe0300, but the constant has little
* context in an SP2 os. It's a hold-over from pre-SP2.
*/
else if (get_syscall_method() == SYSCALL_METHOD_SYSENTER
&& call_src_reg == REG_XDX
&& opnd_get_immed_int(instr_get_src(instr, 0)) ==
(ptr_int_t)VSYSCALL_BOOTSTRAP_ADDR) {
/* Extract the target address. We expect that the memory read using the
* value in the immediate field is ok as it's the vsyscall page
* which 1) cannot be made unreadable and 2) cannot be made writable so
* the stored value will not change. Of course, it's possible that the
* os could change the page contents.
*/
callee = (app_pc)
*((ptr_uint_t *) opnd_get_immed_int(instr_get_src(instr, 0)));
if (get_app_sysenter_addr() == NULL) {
/* For the first call* we've yet to decode an app syscall, yet we
* cannot have later recreations have differing behavior, so we must
* handle that case (even though it doesn't matter performance-wise
* as the first call* is usually in runtime init code that's
* executed once). So we do a raw byte compare to:
* ntdll!KiFastSystemCall:
* 7c82ed50 8bd4 mov xdx,xsp
* 7c82ed52 0f34 sysenter
*/
uint raw;
if (!safe_read(callee, sizeof(raw), &raw) || raw != 0x340fd48b)
callee = NULL;
} else {
/* The callee should be a 2 byte "mov %xsp -> %xdx" followed by the
* sysenter -- check the sysenter's address as 2 bytes past the callee.
*/
if (callee + 2 != get_app_sysenter_addr())
callee = NULL;
}
vsyscall = (callee != NULL);
ASSERT(use_ki_syscall_routines()); /* double check our determination */
DODEBUG({
if (callee == NULL)
ASSERT_CURIOSITY(false && "call* to vsyscall unexpected mismatch");
else {
LOG(THREAD, LOG_INTERP, 4, "Post-SP2 style indirect call "
"to sysenter found at "PFX"\n", bb->instr_start);
STATS_INC(num_sysenter_indcalls);
}
});
}
# endif
/* Check if register dataflow matched and we were able to extract
* the callee address.
*/
if (callee == NULL)
return false;
if (vsyscall) {
/* Case 8917: abandon coarse-grainness in favor of performance */
bb->flags &= ~FRAG_COARSE_GRAIN;
STATS_INC(coarse_prevent_indcall);
}
LOG(THREAD, LOG_INTERP, 4, "interp: possible convertible"
" indirect call from "PFX" to "PFX"\n",
bb->instr_start, callee);
if (must_not_be_inlined(callee)) {
BBPRINT(bb, 3, " NOT inlining indirect call to "PFX"\n", callee);
/* Case 8711: coarse-grain can't handle non-exit cti */
bb->flags &= ~FRAG_COARSE_GRAIN;
STATS_INC(coarse_prevent_cti);
ASSERT_CURIOSITY_ONCE(!vsyscall && "leaving call* to vsyscall");
/* no need for bb_add_native_direct_xfer() b/c it's already indirect */
return true; /* keep bb going, w/o inlining call */
}
if (bb->follow_direct &&
bb->num_elide_call < DYNAMO_OPTION(max_elide_call) &&
(DYNAMO_OPTION(elide_back_calls) || bb->cur_pc <= callee)) {
/* FIXME This is identical to the code for evaluating a
* direct call's callee. If such code appears in another
* (3rd) place, we should outline it.
* FIXME: use follow_direct_call()
*/
if (vsyscall) {
/* As a flag to allow our xfer from now-non-coarse to coarse
* (for vsyscall-in-ntdll) we pre-emptively mark as has-syscall.
*/
ASSERT(!TEST(FRAG_HAS_SYSCALL, bb->flags));
bb->flags |= FRAG_HAS_SYSCALL;
}
if (check_new_page_jmp(dcontext, bb, callee)) {
if (vsyscall) /* Restore */
bb->flags &= ~FRAG_HAS_SYSCALL;
bb->num_elide_call++;
STATS_INC(total_elided_calls);
STATS_TRACK_MAX(max_elided_calls, bb->num_elide_call);
bb->cur_pc = callee;
/* FIXME: when using follow_direct_call don't forget to set this */
call_instr->flags |= INSTR_IND_CALL_DIRECT;
BBPRINT(bb, 4, " continuing in callee at "PFX"\n", bb->cur_pc);
return true; /* keep bb going */
}
if (vsyscall) {
/* Case 8917: Restore, just in case, though we certainly expect to have
* this flag set as soon as we decode a few more instrs and hit the
* syscall itself -- but for pre-sp2 we currently could be elsewhere on
* the same page, so let's be safe here.
*/
bb->flags &= ~FRAG_HAS_SYSCALL;
}
}
/* FIXME: we're also not converting to a direct call - was this intended? */
BBPRINT(bb, 3, " NOT following indirect call from "PFX" to "PFX"\n",
bb->instr_start, callee);
DODEBUG({
if (vsyscall) {
DO_ONCE({
/* Case 9095: don't complain so loudly if user asked for no elision */
if (DYNAMO_OPTION(max_elide_call) <= 2)
SYSLOG_INTERNAL_WARNING("leaving call* to vsyscall");
else
ASSERT_CURIOSITY(false && "leaving call* to vsyscall");
});
}
});;
#elif defined(ARM)
/* FIXME i#1551: NYI on ARM */
ASSERT_NOT_IMPLEMENTED(false);
#endif /* X86 */
return false; /* stop bb */
}
/* if we make the IAT sections unreadable we will need to map to proper location */
static inline app_pc
read_from_IAT(app_pc iat_reference)
{
/* FIXME: we should have looked up where the real IAT should be at
* the time of checking whether is_in_IAT
*/
return *(app_pc*) iat_reference;
}
#ifdef X86
/* returns whether target is an IAT of a module that we convert. Note
* users still have to check the referred to value to verify targeting
* a native module.
*/
static bool
is_targeting_convertible_IAT(dcontext_t *dcontext, instr_t *instr,
app_pc *iat_reference /* OUT */)
{
/* FIXME: we could give up on optimizing a particular module,
* if too many writes to its IAT are found,
* even 1 may be too much to handle!
*/
/* We only allow constant address,
* any registers used for effective address calculation
* can not be guaranteed to be constant dynamically.
*/
/* FIXME: yet a 'call %reg' if that value is an export would be a
* good sign that we should go backwards and look for a possible
* mov IAT[func] -> %reg and then optimize that as well - case 1948
*/
app_pc memory_reference = NULL;
opnd_t opnd = instr_get_target(instr);
LOG(THREAD, LOG_INTERP, 4, "is_targeting_convertible_IAT: ");
/* A typical example of a proper call
* ff 15 8810807c call dword ptr [kernel32+0x1088 (7c801088)]
* where
* [7c801088] = 7c90f04c ntdll!RtlAnsiStringToUnicodeString
*
* The ModR/M byte for a displacement only with no SIB should be
* 15 for CALL, 25 for JMP, (no far versions for IAT)
*/
if (opnd_is_near_base_disp(opnd)) {
/* FIXME PR 253930: pattern-match x64 IAT calls */
IF_X64(ASSERT_NOT_IMPLEMENTED(false));
memory_reference = (app_pc)(ptr_uint_t)opnd_get_disp(opnd);
/* now should check all other fields */
if (opnd_get_base(opnd) != REG_NULL ||
opnd_get_index(opnd) != REG_NULL) {
/* this is not a pure memory reference, can't be IAT */
return false;
}
ASSERT(opnd_get_scale(opnd) == 0);
} else {
return false;
}
LOG(THREAD, LOG_INTERP, 3, "is_targeting_convertible_IAT: memory_reference "PFX"\n",
memory_reference);
/* FIXME: if we'd need some more additional structures those can
* be looked up in a separate hashtable based on the IAT base, or
* we'd have to extend the vmareas with custom fields
*/
ASSERT(DYNAMO_OPTION(IAT_convert));
if (vmvector_overlap(IAT_areas, memory_reference, memory_reference+1)) {
/* IAT has to be in the same module as current instruction,
* but even in the unlikely reference by address from another
* module there is really no problem, so not worth checking
*/
ASSERT_CURIOSITY(get_module_base(instr->bytes) == get_module_base(memory_reference));
/* FIXME: now that we know it is in IAT/GOT,
* we have to READ the contents and return that
* safely to the caller so they can convert accordingly
*/
/* FIXME: we would want to add the IAT section to the vmareas
* of a region that has a converted block. Then on a write to
* IAT we can flush efficiently only blocks affected by a
* particular module, for a first hack though flushing
* everything on a hooker will do.
*/
*iat_reference = memory_reference;
return true;
} else {
/* plain global function
* e.g. ntdll!RtlUnicodeStringToAnsiString+0x4c:
* ff15c009917c call dword ptr [ntdll!RtlAllocateStringRoutine (7c9109c0)]
*/
return false;
}
}
#endif /* X86 */
/* If the current instr in the BB is an indirect call through IAT that
* can be converted into a direct call, process it and return true,
* else, return false.
*/
static bool
bb_process_IAT_convertible_indjmp(dcontext_t *dcontext, build_bb_t *bb,
bool *elide_continue)
{
#ifdef X86
app_pc iat_reference;
app_pc target;
ASSERT(DYNAMO_OPTION(IAT_convert));
/* Check if the instr is a (near) indirect jump */
if (instr_get_opcode(bb->instr) != OP_jmp_ind) {
ASSERT_CURIOSITY(false && "far ind jump");
return false; /* not matching, stop bb */
}
if (!is_targeting_convertible_IAT(dcontext, bb->instr,
&iat_reference)) {
DOSTATS({
if (EXIT_IS_IND_JMP_PLT(bb->exit_type)) {
/* see how often we mark as likely a PLT a JMP which in
* fact is not going through IAT
*/
STATS_INC(num_indirect_jumps_PLT_not_IAT);
LOG(THREAD, LOG_INTERP, 3,
"bb_process_IAT_convertible_indjmp: indirect jmp not PLT instr="
PFX"\n", bb->instr->bytes);
}
});
return false; /* not matching, stop bb */
}
target = read_from_IAT(iat_reference);
DOLOG(4, LOG_INTERP, {
char name[MAXIMUM_SYMBOL_LENGTH];
print_symbolic_address(target, name, sizeof(name), false);
LOG(THREAD, LOG_INTERP, 4,
"bb_process_IAT_convertible_indjmp: target="PFX" %s\n", target, name);
});
STATS_INC(num_indirect_jumps_IAT);
DOSTATS({
if (!EXIT_IS_IND_JMP_PLT(bb->exit_type)) {
/* count any other known uses for an indirect jump to go
* through the IAT other than PLT uses, although a block
* reaching max_elide_call would prevent the above
* match */
STATS_INC(num_indirect_jumps_IAT_not_PLT);
/* FIXME: case 6459 for further inquiry */
LOG(THREAD, LOG_INTERP, 4,
"bb_process_IAT_convertible_indjmp: indirect jmp not PLT target="PFX"\n",
target);
}
});
if (must_not_be_elided(target)) {
ASSERT_NOT_TESTED();
BBPRINT(bb, 3,
" NOT inlining indirect jmp to must_not_be_elided "PFX"\n", target);
return false; /* do not convert indirect jump, will stop bb */
}
/* Verify not targeting native exec DLLs, note that the IATs of
* any module may have imported a native DLL. Note it may be
* possible to optimize with a range check on IAT subregions, but
* this check isn't much slower.
*/
/* IAT_elide should definitely not touch native_exec modules.
*
* FIXME: we also prevent IAT_convert from optimizing imports in
* native_exec_list DLLs, although we could let that convert to a
* direct jump and require native_exec_dircalls to be always on to
* intercept those jmps.
*/
if (DYNAMO_OPTION(native_exec) &&
is_native_pc(target)) {
BBPRINT(bb, 3,
" NOT inlining indirect jump to native exec module "PFX"\n", target);
STATS_INC(num_indirect_jumps_IAT_native);
return false; /* do not convert indirect jump, stop bb */
}
/* mangle mostly as such as direct jumps would be mangled in
* bb_process_ubr(dcontext, bb) but note bb->instr has already
* been appended so has to reverse some of its actions
*/
/* pretend never saw an indirect JMP, we'll either add a new
direct JMP or we'll just continue in target */
instrlist_remove(bb->ilist, bb->instr); /* bb->instr has been appended already */
instr_destroy(dcontext, bb->instr);
bb->instr = NULL;
if (DYNAMO_OPTION(IAT_elide)) {
/* try to elide just as a direct jmp would have been elided */
/* We could have used follow_direct_call instead since
* commonly this really is a disguised CALL*. Yet for PLT use
* of the form of CALL PLT[foo]; JMP* IAT[foo] we would have
* already counted the CALL. If we have tail call elimination
* that converts a CALL* into a JMP* it is also OK to treat as
* a JMP instead of a CALL just as if sharing tails.
*/
if (follow_direct_jump(dcontext, bb, target)) {
LOG(THREAD, LOG_INTERP, 4,
"bb_process_IAT_convertible_indjmp: eliding jmp* target="PFX"\n",
target);
STATS_INC(num_indirect_jumps_IAT_elided);
*elide_continue = true; /* do not stop bb */
return true; /* converted indirect to direct */
}
}
/* otherwise convert to direct jump without eliding */
/* we set bb->instr to NULL so unlike bb_process_ubr
* we get the final exit_target added by build_bb_ilist
* FIXME: case 85: which will work only when we're using bb->mangle_ilist
* FIXME: what are callers supposed to see when we do NOT mangle?
*/
LOG(THREAD, LOG_INTERP, 4,
"bb_process_IAT_convertible_indjmp: converting jmp* target="PFX"\n",
target);
STATS_INC(num_indirect_jumps_IAT_converted);
/* end basic block with a direct JMP to target */
bb->exit_target = target;
*elide_continue = false; /* matching, but should stop bb */
return true; /* matching */
#elif defined(ARM)
/* FIXME i#1551: NYI on ARM */
ASSERT_NOT_IMPLEMENTED(false);
return false;
#endif /* X86/ARM */
}
/* Returns true if the current instr in the BB is an indirect call
* through IAT that can be converted into a direct call, process it
* and sets elide_continue. Otherwise function return false.
* OUT elide_continue is set when bb building should continue in target,
* and not set when bb building should be stopped.
*/
bool
bb_process_IAT_convertible_indcall(dcontext_t *dcontext, build_bb_t *bb,
bool *elide_continue)
{
#ifdef X86
app_pc iat_reference;
app_pc target;
ASSERT(DYNAMO_OPTION(IAT_convert));
/* FIXME: the code structure is the same as
* bb_process_IAT_convertible_indjmp, could fuse the two
*/
/* We perform several levels of checking, each increasingly more stringent
* and expensive, with a false return should any fail.
*/
/* Check if the instr is a (near) indirect call */
if (instr_get_opcode(bb->instr) != OP_call_ind) {
ASSERT_CURIOSITY(false && "far call");
return false; /* not matching, stop bb */
}
if (!is_targeting_convertible_IAT(dcontext, bb->instr,
&iat_reference)) {
return false; /* not matching, stop bb */
}
target = read_from_IAT(iat_reference);
DOLOG(4, LOG_INTERP, {
char name[MAXIMUM_SYMBOL_LENGTH];
print_symbolic_address(target, name, sizeof(name), false);
LOG(THREAD, LOG_INTERP, 4,
"bb_process_IAT_convertible_indcall: target="PFX" %s\n", target, name);
});
STATS_INC(num_indirect_calls_IAT);
/* mangle mostly as such as direct calls are mangled with
* bb_process_call_direct(dcontext, bb)
*/
if (must_not_be_inlined(target)) {
ASSERT_NOT_TESTED();
BBPRINT(bb, 3,
" NOT inlining indirect call to must_not_be_inlined "PFX"\n", target);
return false; /* do not convert indirect call, stop bb */
}
/* Verify not targeting native exec DLLs, note that the IATs of
* any module may have imported a native DLL. Note it may be
* possible to optimize with a range check on IAT subregions, but
* this check isn't much slower.
*/
if (DYNAMO_OPTION(native_exec) &&
is_native_pc(target)) {
BBPRINT(bb, 3,
" NOT inlining indirect call to native exec module "PFX"\n", target);
STATS_INC(num_indirect_calls_IAT_native);
return false; /* do not convert indirect call, stop bb */
}
/* mangle_indirect_call and calculate return address as of
* bb->instr and will remove bb->instr
* FIXME: it would have been
* better to replace in instrlist with a direct call and have
* mangle_{in,}direct_call use other than the raw bytes, but this for now does the job.
*/
bb->instr->flags |= INSTR_IND_CALL_DIRECT;
if (DYNAMO_OPTION(IAT_elide)) {
/* try to elide just as a direct call would have been elided */
if (follow_direct_call(dcontext, bb, target)) {
LOG(THREAD, LOG_INTERP, 4,
"bb_process_IAT_convertible_indcall: eliding call* flags=0x%08x target="
PFX"\n", bb->instr->flags, target);
STATS_INC(num_indirect_calls_IAT_elided);
*elide_continue = true; /* do not stop bb */
return true; /* converted indirect to direct */
}
}
/* otherwise convert to direct call without eliding */
LOG(THREAD, LOG_INTERP, 4,
"bb_process_IAT_convertible_indcall: converting call* flags=0x%08x target="PFX
"\n", bb->instr->flags, target);
STATS_INC(num_indirect_calls_IAT_converted);
/* bb->instr has been appended already, and will get removed by
* mangle_indirect_call. We don't need to set to NULL, since this
* instr is a CTI and the final jump's translation target should
* still be the original indirect call.
*/
bb->exit_target = target;
/* end basic block with a direct CALL to target. With default
* options it should get mangled to a PUSH; JMP
*/
*elide_continue = false; /* matching, but should stop bb */
return true; /* converted indirect to direct */
#elif defined(ARM)
/* FIXME i#1551: NYI on ARM */
ASSERT_NOT_IMPLEMENTED(false);
return false;
#endif /* X86/ARM */
}
/* Called on instructions that save the FPU state */
static void
bb_process_float_pc(dcontext_t *dcontext, build_bb_t *bb)
{
/* i#698: for instructions that save the floating-point state
* (e.g., fxsave), we go back to dispatch to translate the fp pc.
* We rule out being in a trace (and thus a potential alternative
* would be to use a FRAG_ flag). These are rare instructions so that
* shouldn't have a significant perf impact: except we've been hitting
* libm code that uses fnstenv and is not rare, so we have non-inlined
* translation under an option for now.
*/
if (DYNAMO_OPTION(translate_fpu_pc)) {
bb->exit_type |= LINK_SPECIAL_EXIT;
bb->flags |= FRAG_CANNOT_BE_TRACE;
}
/* If we inline the pc update, we can't persist. Simplest to keep fine-grained. */
bb->flags &= ~FRAG_COARSE_GRAIN;
}
static bool
instr_will_be_exit_cti(instr_t *inst)
{
/* can't use instr_is_exit_cti() on pre-mangled instrs */
return (instr_is_app(inst) &&
instr_is_cti(inst) &&
(!instr_is_near_call_direct(inst) ||
!must_not_be_inlined(instr_get_branch_target_pc(inst)))
/* PR 239470: ignore wow64 syscall, which is an ind call */
IF_WINDOWS(&& !instr_is_wow64_syscall(inst)));
}
#ifdef CLIENT_INTERFACE
/* PR 215217: check syscall restrictions */
static bool
client_check_syscall(instrlist_t *ilist, instr_t *inst,
bool *found_syscall, bool *found_int)
{
int op_int = IF_X86_ELSE(OP_int, OP_svc);
/* We do consider the wow64 call* a syscall here (it is both
* a syscall and a call*: PR 240258).
*/
if (instr_is_syscall(inst) || instr_get_opcode(inst) == op_int) {
if (instr_is_syscall(inst) && found_syscall != NULL)
*found_syscall = true;
/* Xref PR 313869 - we should be ignoring int 3 here. */
if (instr_get_opcode(inst) == op_int && found_int != NULL)
*found_int = true;
/* For linux an ignorable syscall is not a problem. Our
* pre-syscall-exit jmp is added post client mangling so should
* be robust.
* FIXME: now that we have -no_inline_ignored_syscalls should
* we assert on ignorable also? Probably we'd have to have
* an exception for the middle of a trace?
*/
if (IF_UNIX(TEST(INSTR_NI_SYSCALL, inst->flags))
/* PR 243391: only block-ending interrupt 2b matters */
IF_WINDOWS(instr_is_syscall(inst) ||
((instr_get_opcode(inst) == OP_int &&
instr_get_interrupt_number(inst) == 0x2b)))) {
/* This check means we shouldn't hit the exit_type flags
* check below but we leave it in place in case we add
* other flags in future
*/
if (inst != instrlist_last(ilist)) {
CLIENT_ASSERT(false, "a syscall or interrupt must terminate the block");
return false;
}
/* should we forcibly delete the subsequent instrs?
* or the client has to deal w/ bad behavior in release build?
*/
}
}
return true;
}
/* Pass bb to client, and afterward check for criteria we require and rescan for
* eflags and other flags that might have changed.
* Returns true normally; returns false to indicate "go native".
*/
static bool
client_process_bb(dcontext_t *dcontext, build_bb_t *bb)
{
dr_emit_flags_t emitflags = DR_EMIT_DEFAULT;
instr_t *inst;
bool found_exit_cti = false;
bool found_syscall = false;
bool found_int = false;
#ifdef ANNOTATIONS
app_pc trailing_annotation_pc = NULL;
#endif
instr_t *last_app_instr = NULL;
/* This routine is called by more than just bb builder, also used
* for recreating state, so only call if caller requested it
* (usually that coincides w/ bb->app_interp being set, but not
* when recreating state on a fault (PR 214962)).
* FIXME: hot patches shouldn't be injected during state recreations;
* does predicating on bb->app_interp take care of this issue?
*/
if (!bb->pass_to_client)
return true;
/* i#995: DR may build a bb with one invalid instruction, which won't be
* passed to cliennt.
* FIXME: i#1000, we should present the bb to the client.
* i#1000-c#1: the bb->ilist could be empty.
*/
if (instrlist_first(bb->ilist) == NULL)
return true;
if (!instr_opcode_valid(instrlist_first(bb->ilist)) &&
/* For -fast_client_decode we can have level 0 instrs so check
* to ensure this is a single-instr bb that was built just to
* raise the fault for us.
* XXX i#1000: shouldn't we pass this to the client? It might not handle an
* invalid instr properly though.
*/
instrlist_first(bb->ilist) == instrlist_last(bb->ilist)) {
return true;
}
/* Call the bb creation callback(s) */
if (!instrument_basic_block(dcontext, (app_pc) bb->start_pc, bb->ilist,
bb->for_trace, !bb->app_interp, &emitflags)) {
/* although no callback was called we must process syscalls/ints (PR 307284) */
}
if (bb->for_cache && TEST(DR_EMIT_GO_NATIVE, emitflags)) {
LOG(THREAD, LOG_INTERP, 2, "client requested that we go native\n");
SYSLOG_INTERNAL_INFO("thread "TIDFMT" is going native at client request",
get_thread_id());
/* we leverage the existing native_exec mechanism */
dcontext->native_exec_postsyscall = bb->start_pc;
dcontext->next_tag = BACK_TO_NATIVE_AFTER_SYSCALL;
dynamo_thread_not_under_dynamo(dcontext);
return false;
}
bb->post_client = true;
/* FIXME: instrumentor may totally mess us up -- our flags
* or syscall info might be wrong. xref PR 215217
*/
/* PR 215217, PR 240265:
* We need to check for client changes that require a new exit
* target. We can't practically analyze the instrlist to decipher
* the exit, so we'll search backwards and require that the last
* cti is the exit cti. Typically, the last instruction in the
* block should be the exit. Post-mbr and post-syscall positions
* are particularly fragile, as our mangling code sets state up for
* the exit that could be messed up by instrs inserted after the
* mbr/syscall. We thus disallow such instrs (except for
* dr_insert_mbr_instrumentation()). xref cases 10503, 10782, 10784
*
* Here's what we support:
* - more than one exit cti; all but the last must be a ubr
* - an exit cbr or call must be the final instr in the block
* - only one mbr; must be the final instr in the block and the exit target
* - clients can't change the exit of blocks ending in a syscall
* (or int), and the syscall must be the final instr in the block;
* client can, however, remove the syscall and then add a different exit
* - client can't add a translation target that's outside of the original
* source code bounds, or else our cache consistency breaks down
* (the one exception to this is that a jump can translate to its target)
*/
/* we set to NULL to have a default of fall-through */
bb->exit_target = NULL;
bb->exit_type = 0;
/* N.B.: we're walking backward */
for (inst = instrlist_last(bb->ilist); inst != NULL; inst = instr_get_prev(inst)) {
if (!instr_opcode_valid(inst))
continue;
if (instr_is_cti(inst) && inst != instrlist_last(bb->ilist)) {
/* PR 213005: coarse_units can't handle added ctis (meta or not)
* since decode_fragment(), used for state recreation, can't
* distinguish from exit cti.
* i#665: we now support intra-fragment meta ctis
* to make persistence usable for clients
*/
if (!opnd_is_instr(instr_get_target(inst)) || instr_is_app(inst)) {
bb->flags &= ~FRAG_COARSE_GRAIN;
STATS_INC(coarse_prevent_client);
}
}
if (instr_is_meta(inst)) {
#ifdef ANNOTATIONS
/* Save the trailing_annotation_pc in case a client truncates the bb there. */
if (is_annotation_label(inst) && last_app_instr == NULL) {
dr_instr_label_data_t *label_data = instr_get_label_data_area(inst);
trailing_annotation_pc = GET_ANNOTATION_APP_PC(label_data);
}
#endif
continue;
}
/* in case bb was truncated, find last non-meta fall-through */
if (last_app_instr == NULL)
last_app_instr = inst;
/* PR 215217: client should not add new source code regions, else our
* cache consistency (both page prot and selfmod) will fail
*/
ASSERT(!bb->for_cache || bb->vmlist != NULL);
/* For selfmod recreation we don't check vmareas so we don't have vmlist.
* We live w/o the checks there.
*/
CLIENT_ASSERT(!bb->for_cache ||
vm_list_overlaps(dcontext, bb->vmlist,
instr_get_translation(inst),
instr_get_translation(inst)+1) ||
(instr_is_ubr(inst) && opnd_is_pc(instr_get_target(inst)) &&
instr_get_translation(inst) == opnd_get_pc(instr_get_target(inst)))
/* the displaced code and jmp return from intercept buffer
* has translation fields set to hooked app routine */
IF_WINDOWS(|| dr_fragment_app_pc(bb->start_pc) != bb->start_pc),
"block's app sources (instr_set_translation() targets) "
"must remain within original bounds");
/* PR 307284: we didn't process syscalls and ints pre-client
* so do so now to get bb->flags and bb->exit_type set
*/
if (instr_is_syscall(inst) ||
instr_get_opcode(inst) == IF_X86_ELSE(OP_int, OP_svc)) {
instr_t *tmp = bb->instr;
bb->instr = inst;
if (instr_is_syscall(bb->instr))
bb_process_syscall(dcontext, bb);
else if (instr_get_opcode(bb->instr) == IF_X86_ELSE(OP_int, OP_svc)) {
/* non-syscall int */
bb_process_interrupt(dcontext, bb);
}
if (inst != instrlist_last(bb->ilist))
bb->instr = tmp;
}
/* ensure syscall/int2b terminates block */
client_check_syscall(bb->ilist, inst, &found_syscall, &found_int);
if (instr_will_be_exit_cti(inst)) {
if (!found_exit_cti) {
/* We're about to clobber the exit_type and could lose any
* special flags set above, even if the client doesn't change
* the exit target. We undo such flags after this ilist walk
* to support client removal of syscalls/ints.
* EXIT_IS_IND_JMP_PLT() is used for -IAT_{convert,elide}, which
* is off by default for CI; it's also used for native_exec,
* but we're not sure if we want to support that with CI.
* xref case 10846 and i#198
*/
CLIENT_ASSERT(!TEST(~(LINK_DIRECT | LINK_INDIRECT | LINK_CALL |
LINK_RETURN | LINK_JMP |
LINK_NI_SYSCALL_ALL | LINK_SPECIAL_EXIT
IF_WINDOWS(| LINK_CALLBACK_RETURN)),
bb->exit_type) &&
!EXIT_IS_IND_JMP_PLT(bb->exit_type),
"client unsupported block exit type internal error");
found_exit_cti = true;
bb->instr = inst;
if (instr_is_near_ubr(inst) || instr_is_near_call_direct(inst)) {
CLIENT_ASSERT(instr_is_near_ubr(inst) ||
inst == instrlist_last(bb->ilist) ||
/* for elision we assume calls are followed
* by their callee target code
*/
DYNAMO_OPTION(max_elide_call) > 0,
"an exit call must terminate the block");
/* a ubr need not be the final instr */
if (inst == last_app_instr) {
bb->exit_target = instr_get_branch_target_pc(inst);
bb->exit_type = instr_branch_type(inst);
}
}
else if (instr_is_cbr(inst)) {
CLIENT_ASSERT(inst == instrlist_last(bb->ilist),
"an exit cbr must terminate the block");
/* A null exit target specifies a cbr (see below). */
bb->exit_target = NULL;
bb->exit_type = 0;
instr_exit_branch_set_type(bb->instr,
instr_branch_type(inst));
}
else {
ASSERT(instr_is_mbr(inst) || instr_is_far_cti(inst));
CLIENT_ASSERT(inst == instrlist_last(bb->ilist),
"an exit mbr or far cti must terminate the block");
bb->exit_type = instr_branch_type(inst);
bb->exit_target = get_ibl_routine(dcontext,
get_ibl_entry_type(bb->exit_type),
DEFAULT_IBL_BB(),
get_ibl_branch_type(inst));
}
/* since we're walking backward, at the first exit cti
* we can check for post-cti code
*/
if (inst != instrlist_last(bb->ilist)) {
if (TEST(FRAG_COARSE_GRAIN, bb->flags)) {
/* PR 213005: coarse can't handle code beyond ctis */
bb->flags &= ~FRAG_COARSE_GRAIN;
STATS_INC(coarse_prevent_client);
}
/* decode_fragment can't handle code beyond ctis */
if (!instr_is_near_call_direct(inst) ||
DYNAMO_OPTION(max_elide_call) == 0)
bb->flags |= FRAG_CANNOT_BE_TRACE;
}
}
/* Case 10784: Clients can confound trace building when they
* introduce more than one exit cti; we'll just disable traces
* for these fragments.
* PR 215179: we're currently later marking them no-trace for pad_jmps
* reasons as well.
*/
else {
CLIENT_ASSERT(instr_is_near_ubr(inst) ||
(instr_is_near_call_direct(inst) &&
/* for elision we assume calls are followed
* by their callee target code
*/
DYNAMO_OPTION(max_elide_call) > 0),
"a second exit cti must be a ubr");
if (!instr_is_near_call_direct(inst) ||
DYNAMO_OPTION(max_elide_call) == 0)
bb->flags |= FRAG_CANNOT_BE_TRACE;
/* our cti check above should have already turned off coarse */
ASSERT(!TEST(FRAG_COARSE_GRAIN, bb->flags));
}
}
}
/* To handle the client modifying syscall numbers we cannot inline
* syscalls in the middle of a bb.
*/
ASSERT(!DYNAMO_OPTION(inline_ignored_syscalls));
ASSERT((TEST(FRAG_HAS_SYSCALL, bb->flags) && found_syscall) ||
(!TEST(FRAG_HAS_SYSCALL, bb->flags) && !found_syscall));
IF_WINDOWS(ASSERT(!TEST(LINK_CALLBACK_RETURN, bb->exit_type) || found_int));
/* Note that we do NOT remove, or set, FRAG_HAS_DIRECT_CTI based on
* client modifications: setting it for a selfmod fragment could
* result in an infinite loop, and it is mainly used for elision, which we
* are not doing for client ctis. Clients are not supposed add new
* app source regions (PR 215217).
*/
/* Client might have truncated: re-set fall-through. */
if (last_app_instr != NULL) {
#ifdef ANNOTATIONS
if (trailing_annotation_pc != NULL) {
/* If the client truncated at an annotation, include the annotation. */
bb->cur_pc = trailing_annotation_pc;
} else {
#endif
/* We do not take instr_length of what the client put in, but rather
* the length of the translation target
*/
app_pc last_app_pc = instr_get_translation(last_app_instr);
bb->cur_pc = decode_next_pc(dcontext, last_app_pc);
#ifdef ANNOTATIONS
}
#endif
LOG(THREAD, LOG_INTERP, 3,
"setting cur_pc (for fall-through) to" PFX"\n", bb->cur_pc);
/* don't set bb->instr if last instr is still syscall/int.
* FIXME: I'm not 100% convinced the logic here covers everything
* build_bb_ilist does.
* FIXME: what about if last instr was invalid, or if client adds
* some invalid instrs: xref bb_process_invalid_instr()
*/
if (bb->instr != NULL || (!found_int && !found_syscall))
bb->instr = last_app_instr;
} else
bb->instr = NULL; /* no app instrs left */
/* PR 215217: re-scan for accurate eflags.
* FIXME: should we not do eflags tracking while decoding, then, and always
* do it afterward?
*/
/* for -fast_client_decode, we don't support the client changing the app code */
if (!INTERNAL_OPTION(fast_client_decode)) {
bb->eflags = forward_eflags_analysis(dcontext, bb->ilist,
instrlist_first(bb->ilist));
}
if (TEST(DR_EMIT_STORE_TRANSLATIONS, emitflags)) {
/* PR 214962: let client request storage instead of recreation */
bb->flags |= FRAG_HAS_TRANSLATION_INFO;
/* if we didn't have record on from start, can't store translation info */
CLIENT_ASSERT(!INTERNAL_OPTION(fast_client_decode),
"-fast_client_decode not compatible with "
"DR_EMIT_STORE_TRANSLATIONS");
ASSERT(bb->record_translation && bb->full_decode);
}
if (DYNAMO_OPTION(coarse_enable_freeze)) {
/* If we're not persisting, ignore the presence or absence of the flag
* so we avoid undoing savings from -opt_memory with a tool that
* doesn't support persistence.
*/
if (!TEST(DR_EMIT_PERSISTABLE, emitflags)) {
bb->flags &= ~FRAG_COARSE_GRAIN;
STATS_INC(coarse_prevent_client);
}
}
if (TEST(DR_EMIT_MUST_END_TRACE, emitflags)) {
/* i#848: let client terminate traces */
bb->flags |= FRAG_MUST_END_TRACE;
}
return true;
}
#endif /* CLIENT_INTERFACE */
#ifdef DR_APP_EXPORTS
static void
mangle_pre_client(dcontext_t *dcontext, build_bb_t *bb)
{
if (bb->start_pc == (app_pc) dr_app_running_under_dynamorio) {
/* i#1237: set return value to be true in dr_app_running_under_dynamorio */
instr_t *ret = instrlist_last(bb->ilist);
instr_t *mov = instr_get_prev(ret);
ASSERT(ret != NULL && instr_is_return(ret) &&
mov != NULL &&
IF_X86(instr_get_opcode(mov) == OP_mov_imm &&)
IF_ARM(instr_get_opcode(mov) == OP_mov &&
OPND_IS_IMMED_INT(instr_get_src(mov, 0)) &&)
(bb->start_pc == instr_get_raw_bits(mov) ||
/* the translation field might be NULL */
bb->start_pc == instr_get_translation(mov)));
instr_set_src(mov, 0, OPND_CREATE_INT32(1));
}
}
#endif /* DR_APP_EXPORTS */
/* Interprets the application's instructions until the end of a basic
* block is found, and prepares the resulting instrlist for creation of
* a fragment, but does not create the fragment, just returns the instrlist.
* Caller is responsible for freeing the list and its instrs!
*
* Input parameters in bb control aspects of creation:
* If app_interp is true, this is considered real app code.
* If pass_to_client is true,
* calls instrument routine on bb->ilist before mangling
* If mangle_ilist is true, mangles the ilist, else leaves it in app form
* If record_vmlist is true, updates the vmareas data structures
* If for_cache is true, bb building lock is assumed to be held.
* record_vmlist should also be true.
* Caller must set and later clear dcontext->bb_build_info.
* For !for_cache, build_bb_ilist() sets and clears it, making the
* assumption that the caller is doing no other reading from the region.
* If record_translation is true, records translation for inserted instrs
* If outf != NULL, does full disassembly with comments to outf
* If overlap_info != NULL, records overlap information for the block in
* the overlap_info (caller must fill in region_start and region_end).
*
* FIXME: now that we have better control over following direct ctis,
* should we have adaptive mechanism to decided whether to follow direct
* ctis, since some bmarks are better doing so (gap, vortex, wupwise)
* and others are worse (apsi, perlbmk)?
*/
static void
build_bb_ilist(dcontext_t *dcontext, build_bb_t *bb)
{
/* Design decision: we will not try to identify branches that target
* instructions in this basic block, when we take those branches we will
* just make a new basic block and duplicate part of this one
*/
int total_branches = 0;
uint total_instrs = 0;
uint total_writes = 0; /* only used for selfmod */
instr_t *non_cti; /* used if !full_decode */
byte *non_cti_start_pc; /* used if !full_decode */
uint eflags_6 = 0; /* holds arith eflags written so far (in read slots) */
#ifdef HOT_PATCHING_INTERFACE
bool hotp_should_inject = false, hotp_injected = false;
#endif
app_pc page_start_pc = (app_pc) NULL;
bool bb_build_nested = false;
/* Caller will free objects allocated here so we must use the passed-in
* dcontext for allocation; we need separate var for non-global dcontext.
*/
dcontext_t *my_dcontext = get_thread_private_dcontext();
DEBUG_DECLARE(bool regenerated = false;)
bool stop_bb_on_fallthrough = false;
ASSERT(bb->initialized);
/* note that it's ok for bb->start_pc to be NULL as our check_new_page_start
* will catch it
*/
/* vmlist must start out empty (or N/A) */
ASSERT(bb->vmlist == NULL || !bb->record_vmlist);
ASSERT(!bb->for_cache || bb->record_vmlist); /* for_cache assumes record_vmlist */
#ifdef CUSTOM_TRACES_RET_REMOVAL
my_dcontext->num_calls = 0;
my_dcontext->num_rets = 0;
#endif
/* Support bb abort on decode fault */
if (my_dcontext != NULL) {
if (bb->for_cache) {
/* Caller should have set! */
ASSERT(bb == (build_bb_t *) my_dcontext->bb_build_info);
} else if (my_dcontext->bb_build_info == NULL) {
my_dcontext->bb_build_info = (void *) bb;
} else {
/* For nested we leave the original, which should be the only vmlist,
* and we give up on freeing dangling instr_t and instrlist_t from this decode.
* We need the original's for_cache so we know to free the bb_building_lock.
* FIXME: use TRY to handle decode exceptions locally? Shouldn't have
* violation remediations on a !for_cache build.
*/
ASSERT(bb->vmlist == NULL && !bb->for_cache &&
((build_bb_t *)my_dcontext->bb_build_info)->for_cache);
/* FIXME: add nested as a field so we can have stat on nested faults */
bb_build_nested = true;
}
} else
ASSERT(dynamo_exited);
if ((bb->record_translation
IF_CLIENT_INTERFACE(&& !INTERNAL_OPTION(fast_client_decode))) ||
!bb->for_cache
/* to split riprel, need to decode every instr */
/* in x86_to_x64, need to translate every x86 instr */
IF_X64(|| DYNAMO_OPTION(coarse_split_riprel) || DYNAMO_OPTION(x86_to_x64))
IF_CLIENT_INTERFACE(|| INTERNAL_OPTION(full_decode)))
bb->full_decode = true;
else {
#if defined(STEAL_REGISTER) || defined(CHECK_RETURNS_SSE2)
bb->full_decode = true;
#endif
}
LOG(THREAD, LOG_INTERP, 3, "\ninterp%s: ",
IF_X64_ELSE(X64_MODE_DC(dcontext) ? "" : " (x86 mode)", ""));
BBPRINT(bb, 3, "start_pc = "PFX"\n", bb->start_pc);