| /* ****************************************************************************** |
| * Copyright (c) 2010-2014 Google, Inc. All rights reserved. |
| * Copyright (c) 2010 Massachusetts Institute of Technology All rights reserved. |
| * Copyright (c) 2000-2010 VMware, Inc. All rights reserved. |
| * ******************************************************************************/ |
| |
| /* |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are met: |
| * |
| * * Redistributions of source code must retain the above copyright notice, |
| * this list of conditions and the following disclaimer. |
| * |
| * * Redistributions in binary form must reproduce the above copyright notice, |
| * this list of conditions and the following disclaimer in the documentation |
| * and/or other materials provided with the distribution. |
| * |
| * * Neither the name of VMware, Inc. nor the names of its contributors may be |
| * used to endorse or promote products derived from this software without |
| * specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| * ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE |
| * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
| * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
| * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH |
| * DAMAGE. |
| */ |
| |
| /* Copyright (c) 2003-2007 Determina Corp. */ |
| /* Copyright (c) 2001-2003 Massachusetts Institute of Technology */ |
| /* Copyright (c) 2000-2001 Hewlett-Packard Company */ |
| |
| /* file "cleancallopt.c" */ |
| |
| #include "../globals.h" |
| #include "arch.h" |
| #include "instrument.h" |
| #include "../hashtable.h" |
| #include "disassemble.h" |
| #include "instr_create.h" |
| |
| /* make code more readable by shortening long lines |
| * we mark everything we add as a meta-instr to avoid hitting |
| * client asserts on setting translation fields |
| */ |
| #define POST instrlist_meta_postinsert |
| #define PRE instrlist_meta_preinsert |
| |
| |
| /**************************************************************************** |
| * clean call callee info table for i#42 and i#43 |
| */ |
| |
| #ifdef CLIENT_INTERFACE |
| /* hashtable for storing analyzed callee info */ |
| static generic_table_t *callee_info_table; |
| /* we only free callee info at exit, when callee_info_table_exit is true. */ |
| static bool callee_info_table_exit = false; |
| #define INIT_HTABLE_SIZE_CALLEE 6 /* should remain small */ |
| |
| static void |
| callee_info_init(callee_info_t *ci) |
| { |
| uint i; |
| memset(ci, 0, sizeof(*ci)); |
| ci->bailout = true; |
| /* to be conservative */ |
| ci->has_locals = true; |
| ci->write_aflags = true; |
| ci->read_aflags = true; |
| ci->tls_used = true; |
| /* We use loop here and memset in analyze_callee_regs_usage later. |
| * We could reverse the logic and use memset to set the value below, |
| * but then later in analyze_callee_regs_usage, we have to use the loop. |
| */ |
| /* assuming all xmm registers are used */ |
| ci->num_xmms_used = NUM_XMM_REGS; |
| for (i = 0; i < NUM_XMM_REGS; i++) |
| ci->xmm_used[i] = true; |
| for (i = 0; i < NUM_GP_REGS; i++) |
| ci->reg_used[i] = true; |
| ci->spill_reg = DR_REG_INVALID; |
| } |
| |
| static void |
| callee_info_free(callee_info_t *ci) |
| { |
| ASSERT(callee_info_table_exit); |
| if (ci->ilist != NULL) { |
| ASSERT(ci->opt_inline); |
| instrlist_clear_and_destroy(GLOBAL_DCONTEXT, ci->ilist); |
| } |
| HEAP_TYPE_FREE(GLOBAL_DCONTEXT, ci, callee_info_t, |
| ACCT_CLEANCALL, PROTECTED); |
| } |
| |
| static callee_info_t * |
| callee_info_create(app_pc start, uint num_args) |
| { |
| callee_info_t *info; |
| |
| info = HEAP_TYPE_ALLOC(GLOBAL_DCONTEXT, callee_info_t, |
| ACCT_CLEANCALL, PROTECTED); |
| callee_info_init(info); |
| info->start = start; |
| info->num_args = num_args; |
| return info; |
| } |
| |
| static void |
| callee_info_reserve_slot(callee_info_t *ci, slot_kind_t kind, reg_id_t value) |
| { |
| if (ci->slots_used < BUFFER_SIZE_ELEMENTS(ci->scratch_slots)) { |
| if (kind == SLOT_REG) |
| value = dr_reg_fixer[value]; |
| ci->scratch_slots[ci->slots_used].kind = kind; |
| ci->scratch_slots[ci->slots_used].value = value; |
| } else { |
| LOG(THREAD_GET, LOG_CLEANCALL, 2, |
| "CLEANCALL: unable to fulfill callee_info_reserve_slot for " |
| "kind %d value %d\n", kind, value); |
| } |
| /* We check if slots_used > CLEANCALL_NUM_INLINE_SLOTS to detect failure. */ |
| ci->slots_used++; |
| } |
| |
| static opnd_t |
| callee_info_slot_opnd(callee_info_t *ci, slot_kind_t kind, reg_id_t value) |
| { |
| uint i; |
| if (kind == SLOT_REG) |
| value = dr_reg_fixer[value]; |
| for (i = 0; i < BUFFER_SIZE_ELEMENTS(ci->scratch_slots); i++) { |
| if (ci->scratch_slots[i].kind == kind && |
| ci->scratch_slots[i].value == value) { |
| int disp = (int)offsetof(unprotected_context_t, |
| inline_spill_slots[i]); |
| return opnd_create_base_disp(ci->spill_reg, DR_REG_NULL, 0, disp, |
| OPSZ_PTR); |
| } |
| } |
| ASSERT_MESSAGE(CHKLVL_ASSERTS, "Tried to find scratch slot for value " |
| "without calling callee_info_reserve_slot for it", false); |
| return opnd_create_null(); |
| } |
| |
| static void |
| callee_info_table_init(void) |
| { |
| callee_info_table = |
| generic_hash_create(GLOBAL_DCONTEXT, |
| INIT_HTABLE_SIZE_CALLEE, |
| 80 /* load factor: not perf-critical */, |
| HASHTABLE_SHARED | HASHTABLE_PERSISTENT, |
| (void(*)(void*)) callee_info_free |
| _IF_DEBUG("callee-info table")); |
| } |
| |
| static void |
| callee_info_table_destroy(void) |
| { |
| callee_info_table_exit = true; |
| generic_hash_destroy(GLOBAL_DCONTEXT, callee_info_table); |
| } |
| |
| static callee_info_t * |
| callee_info_table_lookup(void *callee) |
| { |
| callee_info_t *ci; |
| TABLE_RWLOCK(callee_info_table, read, lock); |
| ci = generic_hash_lookup(GLOBAL_DCONTEXT, callee_info_table, |
| (ptr_uint_t)callee); |
| TABLE_RWLOCK(callee_info_table, read, unlock); |
| /* We only delete the callee_info from the callee_info_table |
| * when destroy the table on exit, so we can keep the ci |
| * without holding the lock. |
| */ |
| return ci; |
| } |
| |
| static callee_info_t * |
| callee_info_table_add(callee_info_t *ci) |
| { |
| callee_info_t *info; |
| TABLE_RWLOCK(callee_info_table, write, lock); |
| info = generic_hash_lookup(GLOBAL_DCONTEXT, callee_info_table, |
| (ptr_uint_t)ci->start); |
| if (info == NULL) { |
| info = ci; |
| generic_hash_add(GLOBAL_DCONTEXT, callee_info_table, |
| (ptr_uint_t)ci->start, (void *)ci); |
| } else { |
| /* Have one in the table, free the new one and use existing one. |
| * We cannot free the existing one in the table as it might be used by |
| * other thread without holding the lock. |
| * Since we assume callee should never be changed, they should have |
| * the same content of ci. |
| */ |
| callee_info_free(ci); |
| } |
| TABLE_RWLOCK(callee_info_table, write, unlock); |
| return info; |
| } |
| |
| /****************************************************************************/ |
| /* clean call optimization code */ |
| |
| /* The max number of instructions try to decode from a function. */ |
| #define MAX_NUM_FUNC_INSTRS 4096 |
| /* the max number of instructions the callee can have for inline. */ |
| #define MAX_NUM_INLINE_INSTRS 20 |
| |
| /* Decode instruction from callee and return the next_pc to be decoded. */ |
| static app_pc |
| decode_callee_instr(dcontext_t *dcontext, callee_info_t *ci, app_pc instr_pc) |
| { |
| instrlist_t *ilist = ci->ilist; |
| instr_t *instr; |
| app_pc next_pc = NULL; |
| |
| instr = instr_create(GLOBAL_DCONTEXT); |
| instrlist_append(ilist, instr); |
| ci->num_instrs++; |
| TRY_EXCEPT(dcontext, { |
| next_pc = decode(GLOBAL_DCONTEXT, instr_pc, instr); |
| }, { /* EXCEPT */ |
| LOG(THREAD, LOG_CLEANCALL, 2, |
| "CLEANCALL: crash on decoding callee instruction at: "PFX"\n", |
| instr_pc); |
| ASSERT_CURIOSITY(false && "crashed while decoding clean call"); |
| ci->bailout = true; |
| return NULL; |
| }); |
| if (!instr_valid(instr)) { |
| LOG(THREAD, LOG_CLEANCALL, 2, |
| "CLEANCALL: decoding invalid instruction at: "PFX"\n", instr_pc); |
| ci->bailout = true; |
| return NULL; |
| } |
| instr_set_translation(instr, instr_pc); |
| DOLOG(3, LOG_CLEANCALL, { |
| disassemble_with_bytes(dcontext, instr_pc, THREAD); |
| }); |
| return next_pc; |
| } |
| |
| /* check newly decoded instruction from callee */ |
| static app_pc |
| check_callee_instr(dcontext_t *dcontext, callee_info_t *ci, app_pc next_pc) |
| { |
| instrlist_t *ilist = ci->ilist; |
| instr_t *instr; |
| app_pc cur_pc, tgt_pc; |
| |
| if (next_pc == NULL) |
| return NULL; |
| instr = instrlist_last(ilist); |
| cur_pc = instr_get_app_pc(instr); |
| ASSERT(next_pc == cur_pc + instr_length(dcontext, instr)); |
| if (!instr_is_cti(instr)) { |
| /* special instructions, bail out. */ |
| if (instr_is_syscall(instr) || instr_is_interrupt(instr)) { |
| LOG(THREAD, LOG_CLEANCALL, 2, |
| "CLEANCALL: bail out on syscall or interrupt at: "PFX"\n", |
| cur_pc); |
| ci->bailout = true; |
| return NULL; |
| } |
| return next_pc; |
| } else { /* cti instruc */ |
| if (instr_is_mbr(instr)) { |
| /* check if instr is return, and if return is the last instr. */ |
| if (!instr_is_return(instr) || ci->fwd_tgt > cur_pc) { |
| LOG(THREAD, LOG_CLEANCALL, 2, |
| "CLEANCALL: bail out on indirect branch at: "PFX"\n", |
| cur_pc); |
| ci->bailout = true; |
| } |
| return NULL; |
| } else if (instr_is_call(instr)) { |
| tgt_pc = opnd_get_pc(instr_get_target(instr)); |
| /* remove and destroy the call instruction */ |
| ci->bailout = true; |
| instrlist_remove(ilist, instr); |
| instr_destroy(GLOBAL_DCONTEXT, instr); |
| instr = NULL; |
| ci->num_instrs--; |
| LOG(THREAD, LOG_CLEANCALL, 2, |
| "CLEANCALL: callee calls out at: "PFX" to "PFX"\n", |
| cur_pc, tgt_pc); |
| /* check special PIC code: |
| * 1. call next_pc; pop r1; |
| * or |
| * 2. call pic_func; |
| * and in pic_func: mov [%xsp] %r1; ret; |
| */ |
| if (INTERNAL_OPTION(opt_cleancall) >= 1) { |
| instr_t ins; |
| app_pc tmp_pc; |
| opnd_t src = OPND_CREATE_INTPTR(next_pc); |
| instr_init(dcontext, &ins); |
| TRY_EXCEPT(dcontext, { |
| tmp_pc = decode(dcontext, tgt_pc, &ins); |
| }, { |
| ASSERT_CURIOSITY(false && |
| "crashed while decoding clean call"); |
| instr_free(dcontext, &ins); |
| return NULL; |
| }); |
| DOLOG(3, LOG_CLEANCALL, { |
| disassemble_with_bytes(dcontext, tgt_pc, THREAD); |
| }); |
| /* "pop %r1" or "mov [%rsp] %r1" */ |
| if (!(((instr_get_opcode(&ins) == OP_pop) || |
| (instr_get_opcode(&ins) == OP_mov_ld && |
| opnd_same(instr_get_src(&ins, 0), |
| OPND_CREATE_MEMPTR(REG_XSP, 0)))) && |
| opnd_is_reg(instr_get_dst(&ins, 0)))) { |
| LOG(THREAD, LOG_CLEANCALL, 2, |
| "CLEANCALL: callee calls out is not PIC code, " |
| "bailout\n"); |
| instr_free(dcontext, &ins); |
| return NULL; |
| } |
| /* replace with "mov next_pc r1" */ |
| /* XXX: the memory on top of stack will not be next_pc. */ |
| instr = INSTR_CREATE_mov_imm |
| (GLOBAL_DCONTEXT, instr_get_dst(&ins, 0), src); |
| instr_set_translation(instr, cur_pc); |
| instrlist_append(ilist, instr); |
| ci->num_instrs++; |
| instr_reset(dcontext, &ins); |
| if (tgt_pc != next_pc) { /* a callout */ |
| TRY_EXCEPT(dcontext, { |
| tmp_pc = decode(dcontext, tmp_pc, &ins); |
| }, { |
| ASSERT_CURIOSITY(false && |
| "crashed while decoding clean call"); |
| instr_free(dcontext, &ins); |
| return NULL; |
| }); |
| if (!instr_is_return(&ins)) { |
| instr_free(dcontext, &ins); |
| return NULL; |
| } |
| instr_free(dcontext, &ins); |
| } |
| LOG(THREAD, LOG_CLEANCALL, 2, |
| "CLEANCALL: special PIC code at: "PFX"\n", |
| cur_pc); |
| ci->bailout = false; |
| instr_free(dcontext, &ins); |
| if (tgt_pc == next_pc) |
| return tmp_pc; |
| else |
| return next_pc; |
| } |
| } else { /* ubr or cbr */ |
| tgt_pc = opnd_get_pc(instr_get_target(instr)); |
| if (tgt_pc < cur_pc) { /* backward branch */ |
| if (tgt_pc < ci->start) { |
| LOG(THREAD, LOG_CLEANCALL, 2, |
| "CLEANCALL: bail out on out-of-range branch at: "PFX |
| "to "PFX"\n", cur_pc, tgt_pc); |
| ci->bailout = true; |
| return NULL; |
| } else if (ci->bwd_tgt == NULL || tgt_pc < ci->bwd_tgt) { |
| ci->bwd_tgt = tgt_pc; |
| } |
| } else { /* forward branch */ |
| if (ci->fwd_tgt == NULL || tgt_pc > ci->fwd_tgt) { |
| ci->fwd_tgt = tgt_pc; |
| } |
| } |
| } |
| } |
| return next_pc; |
| } |
| |
| static void |
| check_callee_ilist(dcontext_t *dcontext, callee_info_t *ci) |
| { |
| instrlist_t *ilist = ci->ilist; |
| instr_t *cti, *tgt, *ret; |
| app_pc tgt_pc; |
| if (!ci->bailout) { |
| /* no target pc of any branch is in a middle of an instruction, |
| * replace target pc with target instr |
| */ |
| ret = instrlist_last(ilist); |
| /* must be RETURN, otherwise, bugs in decode_callee_ilist */ |
| ASSERT(instr_is_return(ret)); |
| for (cti = instrlist_first(ilist); |
| cti != ret; |
| cti = instr_get_next(cti)) { |
| if (!instr_is_cti(cti)) |
| continue; |
| ASSERT(!instr_is_mbr(cti)); |
| tgt_pc = opnd_get_pc(instr_get_target(cti)); |
| for (tgt = instrlist_first(ilist); |
| tgt != NULL; |
| tgt = instr_get_next(tgt)) { |
| if (tgt_pc == instr_get_app_pc(tgt)) |
| break; |
| } |
| if (tgt == NULL) { |
| /* cannot find a target instruction, bail out */ |
| LOG(THREAD, LOG_CLEANCALL, 2, |
| "CLEANCALL: bail out on strange internal branch at: "PFX |
| "to "PFX"\n", instr_get_app_pc(cti), tgt_pc); |
| ci->bailout = true; |
| break; |
| } |
| } |
| /* remove RETURN as we do not need it any more */ |
| instrlist_remove(ilist, ret); |
| instr_destroy(GLOBAL_DCONTEXT, ret); |
| } |
| if (ci->bailout) { |
| instrlist_clear_and_destroy(GLOBAL_DCONTEXT, ilist); |
| ci->ilist = NULL; |
| } |
| } |
| |
| static void |
| decode_callee_ilist(dcontext_t *dcontext, callee_info_t *ci) |
| { |
| app_pc cur_pc; |
| |
| ci->ilist = instrlist_create(GLOBAL_DCONTEXT); |
| cur_pc = ci->start; |
| |
| LOG(THREAD, LOG_CLEANCALL, 2, |
| "CLEANCALL: decoding callee starting at: "PFX"\n", ci->start); |
| ci->bailout = false; |
| while (cur_pc != NULL) { |
| cur_pc = decode_callee_instr(dcontext, ci, cur_pc); |
| cur_pc = check_callee_instr(dcontext, ci, cur_pc); |
| } |
| check_callee_ilist(dcontext, ci); |
| } |
| |
| static void |
| analyze_callee_regs_usage(dcontext_t *dcontext, callee_info_t *ci) |
| { |
| instrlist_t *ilist = ci->ilist; |
| instr_t *instr; |
| uint i, num_regparm; |
| |
| ci->num_xmms_used = 0; |
| memset(ci->xmm_used, 0, sizeof(bool) * NUM_XMM_REGS); |
| memset(ci->reg_used, 0, sizeof(bool) * NUM_GP_REGS); |
| ci->write_aflags = false; |
| for (instr = instrlist_first(ilist); |
| instr != NULL; |
| instr = instr_get_next(instr)) { |
| /* XXX: this is not efficient as instr_uses_reg will iterate over |
| * every operands, and the total would be (NUM_REGS * NUM_OPNDS) |
| * for each instruction. However, since this will be only called |
| * once for each clean call callee, it will have little performance |
| * impact unless there are a lot of different clean call callees. |
| */ |
| /* XMM registers usage */ |
| for (i = 0; i < NUM_XMM_REGS; i++) { |
| if (!ci->xmm_used[i] && |
| instr_uses_reg(instr, (DR_REG_XMM0 + (reg_id_t)i))) { |
| LOG(THREAD, LOG_CLEANCALL, 2, |
| "CLEANCALL: callee "PFX" uses XMM%d at "PFX"\n", |
| ci->start, i, instr_get_app_pc(instr)); |
| ci->xmm_used[i] = true; |
| ci->num_xmms_used++; |
| } |
| } |
| /* General purpose registers */ |
| for (i = 0; i < NUM_GP_REGS; i++) { |
| reg_id_t reg = DR_REG_XAX + (reg_id_t)i; |
| if (!ci->reg_used[i] && |
| /* Later we'll rewrite stack accesses to not use XSP or XBP. */ |
| reg != DR_REG_XSP && |
| (reg != DR_REG_XBP || !ci->xbp_is_fp) && |
| instr_uses_reg(instr, reg)) { |
| LOG(THREAD, LOG_CLEANCALL, 2, |
| "CLEANCALL: callee "PFX" uses REG %s at "PFX"\n", |
| ci->start, reg_names[reg], |
| instr_get_app_pc(instr)); |
| ci->reg_used[i] = true; |
| callee_info_reserve_slot(ci, SLOT_REG, reg); |
| } |
| } |
| /* callee update aflags */ |
| if (!ci->write_aflags) { |
| if (TESTANY(EFLAGS_WRITE_6, |
| instr_get_arith_flags(instr, DR_QUERY_INCLUDE_ALL))) { |
| LOG(THREAD, LOG_CLEANCALL, 2, |
| "CLEANCALL: callee "PFX" updates aflags\n", ci->start); |
| ci->write_aflags = true; |
| } |
| } |
| } |
| |
| /* check if callee read aflags from caller */ |
| /* set it false for the case of empty callee. */ |
| ci->read_aflags = false; |
| for (instr = instrlist_first(ilist); |
| instr != NULL; |
| instr = instr_get_next(instr)) { |
| uint flags = instr_get_arith_flags(instr, DR_QUERY_DEFAULT); |
| if (TESTANY(EFLAGS_READ_6, flags)) { |
| ci->read_aflags = true; |
| break; |
| } |
| if (TESTALL(EFLAGS_WRITE_6, flags)) |
| break; |
| if (instr_is_return(instr)) |
| break; |
| if (instr_is_cti(instr)) { |
| ci->read_aflags = true; |
| break; |
| } |
| } |
| if (ci->read_aflags) { |
| LOG(THREAD, LOG_CLEANCALL, 2, |
| "CLEANCALL: callee "PFX" reads aflags from caller\n", ci->start); |
| } |
| |
| /* If we read or write aflags, we need to reserve a slot to save them. |
| * We may or may not use the slot at the call site, but it needs to be |
| * reserved just in case. |
| */ |
| if (ci->read_aflags || ci->write_aflags) { |
| /* XXX: We can optimize away the flags spill to memory if the callee |
| * does not use xax. |
| */ |
| callee_info_reserve_slot(ci, SLOT_FLAGS, 0); |
| /* Spilling flags clobbers xax, so we need to spill the app xax first. |
| * If the callee used xax, then the slot will already be reserved. |
| */ |
| if (!ci->reg_used[DR_REG_XAX - DR_REG_XAX]) { |
| callee_info_reserve_slot(ci, SLOT_REG, DR_REG_XAX); |
| } |
| } |
| |
| /* i#987, i#988: reg might be used for arg passing but not used in callee */ |
| num_regparm = MIN(ci->num_args, NUM_REGPARM); |
| for (i = 0; i < num_regparm; i++) { |
| reg_id_t reg = regparms[i]; |
| if (!ci->reg_used[reg - DR_REG_XAX]) { |
| LOG(THREAD, LOG_CLEANCALL, 2, |
| "CLEANCALL: callee "PFX" uses REG %s for arg passing\n", |
| ci->start, reg_names[reg]); |
| ci->reg_used[reg - DR_REG_XAX] = true; |
| callee_info_reserve_slot(ci, SLOT_REG, reg); |
| } |
| } |
| } |
| |
| /* We use push/pop pattern to detect callee saved registers, |
| * and assume that the code later won't change those saved value |
| * on the stack. |
| */ |
| static void |
| analyze_callee_save_reg(dcontext_t *dcontext, callee_info_t *ci) |
| { |
| instrlist_t *ilist = ci->ilist; |
| instr_t *top, *bot, *push_xbp, *pop_xbp, *instr, *enter, *leave; |
| |
| ASSERT(ilist != NULL); |
| ci->num_callee_save_regs = 0; |
| top = instrlist_first(ilist); |
| bot = instrlist_last(ilist); |
| if (top == bot) { |
| /* zero or one instruction only, no callee save */ |
| return; |
| } |
| /* 1. frame pointer usage analysis. */ |
| /* i#392-c#4: frame pointer code might be in the middle |
| * 0xf771f390 <compiler_inscount>: call 0xf7723a19 <get_pc_thunk> |
| * 0xf771f395 <compiler_inscount+5>: add $0x6c5f,%ecx |
| * 0xf771f39b <compiler_inscount+11>: push %ebp |
| * 0xf771f39c <compiler_inscount+12>: mov %esp,%ebp |
| * 0xf771f39e <compiler_inscount+14>: mov 0x8(%ebp),%eax |
| * 0xf771f3a1 <compiler_inscount+17>: pop %ebp |
| * 0xf771f3a2 <compiler_inscount+18>: add %eax,0x494(%ecx) |
| * 0xf771f3a8 <compiler_inscount+24>: ret |
| */ |
| /* for easy of comparison, create push xbp, pop xbp */ |
| push_xbp = INSTR_CREATE_push(dcontext, opnd_create_reg(DR_REG_XBP)); |
| pop_xbp = INSTR_CREATE_pop(dcontext, opnd_create_reg(DR_REG_XBP)); |
| /* i#392-c#4: search for frame enter/leave pair */ |
| enter = NULL; |
| leave = NULL; |
| for (instr = top; instr != bot; instr = instr_get_next(instr)) { |
| if (instr_get_opcode(instr) == OP_enter || |
| instr_same(push_xbp, instr)) { |
| enter = instr; |
| break; |
| } |
| } |
| if (enter != NULL) { |
| for (instr = bot; instr != enter; instr = instr_get_prev(instr)) { |
| if (instr_get_opcode(instr) == OP_leave || |
| instr_same(pop_xbp, instr)) { |
| leave = instr; |
| break; |
| } |
| } |
| } |
| /* Check enter/leave pair */ |
| if (enter != NULL && leave != NULL && |
| (ci->bwd_tgt == NULL || instr_get_app_pc(enter) < ci->bwd_tgt) && |
| (ci->fwd_tgt == NULL || instr_get_app_pc(leave) >= ci->fwd_tgt)) { |
| /* check if xbp is fp */ |
| if (instr_get_opcode(enter) == OP_enter) { |
| ci->xbp_is_fp = true; |
| } else { |
| /* i#392-c#2: mov xsp => xbp might not be right after push_xbp */ |
| for (instr = instr_get_next(enter); |
| instr != leave; |
| instr = instr_get_next(instr)) { |
| if (instr != NULL && |
| /* we want to use instr_same to find "mov xsp => xbp", |
| * but it could be OP_mov_ld or OP_mov_st, so use opnds |
| * for comparison instead. |
| */ |
| instr_num_srcs(instr) == 1 && |
| instr_num_dsts(instr) == 1 && |
| opnd_is_reg(instr_get_src(instr, 0)) && |
| opnd_get_reg(instr_get_src(instr, 0)) == DR_REG_XSP && |
| opnd_is_reg(instr_get_dst(instr, 0)) && |
| opnd_get_reg(instr_get_dst(instr, 0)) == DR_REG_XBP) { |
| /* found mov xsp => xbp */ |
| ci->xbp_is_fp = true; |
| /* remove it */ |
| instrlist_remove(ilist, instr); |
| instr_destroy(GLOBAL_DCONTEXT, instr); |
| break; |
| } |
| } |
| } |
| if (ci->xbp_is_fp) { |
| LOG(THREAD, LOG_CLEANCALL, 2, |
| "CLEANCALL: callee "PFX" use XBP as frame pointer\n", ci->start); |
| } else { |
| LOG(THREAD, LOG_CLEANCALL, 2, |
| "CLEANCALL: callee "PFX" callee-saves reg xbp at "PFX" and "PFX"\n", |
| ci->start, instr_get_app_pc(enter), instr_get_app_pc(leave)); |
| ci->callee_save_regs |
| [DR_REG_XBP - DR_REG_XAX] = true; |
| ci->num_callee_save_regs++; |
| } |
| /* remove enter/leave or push/pop xbp pair */ |
| instrlist_remove(ilist, enter); |
| instrlist_remove(ilist, leave); |
| instr_destroy(GLOBAL_DCONTEXT, enter); |
| instr_destroy(GLOBAL_DCONTEXT, leave); |
| top = instrlist_first(ilist); |
| bot = instrlist_last(ilist); |
| } |
| instr_destroy(dcontext, push_xbp); |
| instr_destroy(dcontext, pop_xbp); |
| |
| /* get the rest callee save regs */ |
| /* XXX: the callee save may be corrupted by memory update on the stack. */ |
| /* XXX: the callee save may use mov instead of push/pop */ |
| while (top != NULL && bot != NULL) { |
| /* if not in the first/last bb, break */ |
| if ((ci->bwd_tgt != NULL && instr_get_app_pc(top) >= ci->bwd_tgt) || |
| (ci->fwd_tgt != NULL && instr_get_app_pc(bot) < ci->fwd_tgt) || |
| instr_is_cti(top) || instr_is_cti(bot)) |
| break; |
| /* XXX: I saw some compiler inserts nop, need to handle. */ |
| /* push/pop pair check */ |
| if (instr_get_opcode(top) != OP_push || |
| instr_get_opcode(bot) != OP_pop || |
| !opnd_same(instr_get_src(top, 0), instr_get_dst(bot, 0)) || |
| !opnd_is_reg(instr_get_src(top, 0)) || |
| opnd_get_reg(instr_get_src(top, 0)) == REG_XSP) |
| break; |
| /* It is a callee saved reg, we will do our own save for it. */ |
| LOG(THREAD, LOG_CLEANCALL, 2, |
| "CLEANCALL: callee "PFX" callee-saves reg %s at "PFX" and "PFX"\n", |
| ci->start, reg_names[opnd_get_reg(instr_get_src(top, 0))], |
| instr_get_app_pc(top), instr_get_app_pc(bot)); |
| ci->callee_save_regs |
| [opnd_get_reg(instr_get_src(top, 0)) - DR_REG_XAX] = true; |
| ci->num_callee_save_regs++; |
| /* remove & destroy the push/pop pairs */ |
| instrlist_remove(ilist, top); |
| instr_destroy(GLOBAL_DCONTEXT, top); |
| instrlist_remove(ilist, bot); |
| instr_destroy(GLOBAL_DCONTEXT, bot); |
| /* get next pair */ |
| top = instrlist_first(ilist); |
| bot = instrlist_last(ilist); |
| } |
| } |
| |
| static void |
| analyze_callee_tls(dcontext_t *dcontext, callee_info_t *ci) |
| { |
| /* access to TLS means we do need to swap/preserve TEB/PEB fields |
| * for library isolation (errno, etc.) |
| */ |
| instr_t *instr; |
| int i; |
| ci->tls_used = false; |
| for (instr = instrlist_first(ci->ilist); |
| instr != NULL; |
| instr = instr_get_next(instr)) { |
| /* we assume any access via app's tls is to app errno. */ |
| for (i = 0; i < instr_num_srcs(instr); i++) { |
| opnd_t opnd = instr_get_src(instr, i); |
| if (opnd_is_far_base_disp(opnd) && |
| opnd_get_segment(opnd) == LIB_SEG_TLS) |
| ci->tls_used = true; |
| } |
| for (i = 0; i < instr_num_dsts(instr); i++) { |
| opnd_t opnd = instr_get_dst(instr, i); |
| if (opnd_is_far_base_disp(opnd) && |
| opnd_get_segment(opnd) == LIB_SEG_TLS) |
| ci->tls_used = true; |
| } |
| } |
| if (ci->tls_used) { |
| LOG(THREAD, LOG_CLEANCALL, 2, |
| "CLEANCALL: callee "PFX" accesses far memory\n", ci->start); |
| } |
| } |
| |
| /* Pick a register to use as a base register pointing to our spill slots. |
| * We can't use a register that is: |
| * - DR_XSP (need a valid stack in case of fault) |
| * - DR_XAX (could be used for args or aflags) |
| * - REGPARM_0 on X64 (RDI on Lin and RCX on Win; for N>1 args, avoid REGPARM_<=N) |
| * - used by the callee |
| */ |
| static void |
| analyze_callee_pick_spill_reg(dcontext_t *dcontext, callee_info_t *ci) |
| { |
| uint i; |
| for (i = 0; i < NUM_GP_REGS; i++) { |
| reg_id_t reg = DR_REG_XAX + (reg_id_t)i; |
| if (reg == DR_REG_XSP || |
| reg == DR_REG_XAX IF_X64(|| reg == REGPARM_0)) |
| continue; |
| if (!ci->reg_used[i]) { |
| LOG(THREAD, LOG_CLEANCALL, 2, |
| "CLEANCALL: picking spill reg %s for callee "PFX"\n", |
| reg_names[reg], ci->start); |
| ci->spill_reg = reg; |
| return; |
| } |
| } |
| |
| /* This won't happen unless someone increases CLEANCALL_NUM_INLINE_SLOTS or |
| * handles calls with more arguments. There are at least 8 GPRs, 4 spills, |
| * and 3 other regs we can't touch, so one will be available. |
| */ |
| LOG(THREAD, LOG_CLEANCALL, 2, |
| "CLEANCALL: failed to pick spill reg for callee "PFX"\n", ci->start); |
| /* Fail to inline by setting ci->spill_reg == DR_REG_INVALID. */ |
| ci->spill_reg = DR_REG_INVALID; |
| } |
| |
| static void |
| analyze_callee_inline(dcontext_t *dcontext, callee_info_t *ci) |
| { |
| instr_t *instr, *next_instr; |
| opnd_t opnd, mem_ref, slot; |
| bool opt_inline = true; |
| int i; |
| |
| mem_ref = opnd_create_null(); |
| /* a set of condition checks */ |
| if (INTERNAL_OPTION(opt_cleancall) < 2) { |
| LOG(THREAD, LOG_CLEANCALL, 1, |
| "CLEANCALL: callee "PFX" cannot be inlined: opt_cleancall: %d.\n", |
| ci->start, INTERNAL_OPTION(opt_cleancall)); |
| opt_inline = false; |
| } |
| if (ci->num_instrs > MAX_NUM_INLINE_INSTRS) { |
| LOG(THREAD, LOG_CLEANCALL, 1, |
| "CLEANCALL: callee "PFX" cannot be inlined: num of instrs: %d.\n", |
| ci->start, ci->num_instrs); |
| opt_inline = false; |
| } |
| if (ci->bwd_tgt != NULL || ci->fwd_tgt != NULL) { |
| LOG(THREAD, LOG_CLEANCALL, 1, |
| "CLEANCALL: callee "PFX" cannot be inlined: has control flow.\n", |
| ci->start); |
| opt_inline = false; |
| } |
| if (ci->num_xmms_used != 0) { |
| LOG(THREAD, LOG_CLEANCALL, 1, |
| "CLEANCALL: callee "PFX" cannot be inlined: uses XMM.\n", |
| ci->start); |
| opt_inline = false; |
| } |
| if (ci->tls_used) { |
| LOG(THREAD, LOG_CLEANCALL, 1, |
| "CLEANCALL: callee "PFX" cannot be inlined: accesses TLS.\n", |
| ci->start); |
| opt_inline = false; |
| } |
| if (ci->spill_reg == DR_REG_INVALID) { |
| LOG(THREAD, LOG_CLEANCALL, 1, |
| "CLEANCALL: callee "PFX" cannot be inlined:" |
| " unable to pick spill reg.\n", ci->start); |
| opt_inline = false; |
| } |
| if (!SCRATCH_ALWAYS_TLS() || ci->slots_used > CLEANCALL_NUM_INLINE_SLOTS) { |
| LOG(THREAD, LOG_CLEANCALL, 1, |
| "CLEANCALL: callee "PFX" cannot be inlined:" |
| " not enough scratch slots.\n", ci->start); |
| opt_inline = false; |
| } |
| if (!opt_inline) { |
| instrlist_clear_and_destroy(GLOBAL_DCONTEXT, ci->ilist); |
| ci->ilist = NULL; |
| return; |
| } |
| |
| /* Now we need scan instructions in the list, |
| * check if possible for inline, and convert memory reference |
| */ |
| ci->has_locals = false; |
| for (instr = instrlist_first(ci->ilist); |
| instr != NULL; |
| instr = next_instr) { |
| uint opc = instr_get_opcode(instr); |
| next_instr = instr_get_next(instr); |
| /* sanity checks on stack usage */ |
| if (instr_writes_to_reg(instr, DR_REG_XBP, DR_QUERY_INCLUDE_ALL) && |
| ci->xbp_is_fp) { |
| /* xbp must not be changed if xbp is used for frame pointer */ |
| LOG(THREAD, LOG_CLEANCALL, 1, |
| "CLEANCALL: callee "PFX" cannot be inlined: XBP is updated.\n", |
| ci->start); |
| opt_inline = false; |
| break; |
| } else if (instr_writes_to_reg(instr, DR_REG_XSP, DR_QUERY_INCLUDE_ALL)) { |
| /* stack pointer update, we only allow: |
| * lea [xsp, disp] => xsp |
| * xsp + imm_int => xsp |
| * xsp - imm_int => xsp |
| */ |
| if (ci->has_locals) { |
| /* we do not allow stack adjustment after accessing the stack */ |
| opt_inline = false; |
| } |
| if (opc == OP_lea) { |
| /* lea [xsp, disp] => xsp */ |
| opnd = instr_get_src(instr, 0); |
| if (!opnd_is_base_disp(opnd) || |
| opnd_get_base(opnd) != DR_REG_XSP || |
| opnd_get_index(opnd) != DR_REG_NULL) |
| opt_inline = false; |
| } else if (opc == OP_sub || opc == OP_add) { |
| /* xsp +/- int => xsp */ |
| if (!opnd_is_immed_int(instr_get_src(instr, 0))) |
| opt_inline = false; |
| } else { |
| /* other cases like push/pop are not allowed */ |
| opt_inline = false; |
| } |
| if (opt_inline) { |
| LOG(THREAD, LOG_CLEANCALL, 3, |
| "CLEANCALL: removing frame adjustment at "PFX".\n", |
| instr_get_app_pc(instr)); |
| instrlist_remove(ci->ilist, instr); |
| instr_destroy(GLOBAL_DCONTEXT, instr); |
| continue; |
| } else { |
| LOG(THREAD, LOG_CLEANCALL, 1, |
| "CLEANCALL: callee "PFX" cannot be inlined: " |
| "complicated stack pointer update at "PFX".\n", |
| ci->start, instr_get_app_pc(instr)); |
| break; |
| } |
| } else if (instr_reg_in_src(instr, DR_REG_XSP) || |
| (instr_reg_in_src(instr, DR_REG_XBP) && ci->xbp_is_fp)) { |
| /* Detect stack address leakage */ |
| /* lea [xsp/xbp] */ |
| if (opc == OP_lea) |
| opt_inline = false; |
| /* any direct use reg xsp or xbp */ |
| for (i = 0; i < instr_num_srcs(instr); i++) { |
| opnd_t src = instr_get_src(instr, i); |
| if (opnd_is_reg(src) && |
| (reg_overlap(REG_XSP, opnd_get_reg(src)) || |
| (reg_overlap(REG_XBP, opnd_get_reg(src)) && ci->xbp_is_fp))) |
| break; |
| } |
| if (i != instr_num_srcs(instr)) |
| opt_inline = false; |
| if (!opt_inline) { |
| LOG(THREAD, LOG_CLEANCALL, 1, |
| "CLEANCALL: callee "PFX" cannot be inlined: " |
| "stack pointer leaked "PFX".\n", |
| ci->start, instr_get_app_pc(instr)); |
| break; |
| } |
| } |
| /* Check how many stack variables the callee has. |
| * We will not inline the callee if it has more than one stack variable. |
| */ |
| if (instr_reads_memory(instr)) { |
| for (i = 0; i < instr_num_srcs(instr); i++) { |
| opnd = instr_get_src(instr, i); |
| if (!opnd_is_base_disp(opnd)) |
| continue; |
| if (opnd_get_base(opnd) != DR_REG_XSP && |
| (opnd_get_base(opnd) != DR_REG_XBP || !ci->xbp_is_fp)) |
| continue; |
| if (!ci->has_locals) { |
| /* We see the first one, remember it. */ |
| mem_ref = opnd; |
| callee_info_reserve_slot(ci, SLOT_LOCAL, 0); |
| if (ci->slots_used > CLEANCALL_NUM_INLINE_SLOTS) { |
| LOG(THREAD, LOG_CLEANCALL, 1, |
| "CLEANCALL: callee "PFX" cannot be inlined: " |
| "not enough slots for local.\n", |
| ci->start); |
| break; |
| } |
| ci->has_locals = true; |
| } else if (!opnd_same(opnd, mem_ref)) { |
| /* Check if it is the same stack var as the one we saw. |
| * If different, no inline. |
| */ |
| LOG(THREAD, LOG_CLEANCALL, 1, |
| "CLEANCALL: callee "PFX" cannot be inlined: " |
| "more than one stack location is accessed "PFX".\n", |
| ci->start, instr_get_app_pc(instr)); |
| break; |
| } |
| /* replace the stack location with the scratch slot. */ |
| slot = callee_info_slot_opnd(ci, SLOT_LOCAL, 0); |
| opnd_set_size(&slot, opnd_get_size(mem_ref)); |
| instr_set_src(instr, i, slot); |
| } |
| if (i != instr_num_srcs(instr)) { |
| opt_inline = false; |
| break; |
| } |
| } |
| if (instr_writes_memory(instr)) { |
| for (i = 0; i < instr_num_dsts(instr); i++) { |
| opnd = instr_get_dst(instr, i); |
| if (!opnd_is_base_disp(opnd)) |
| continue; |
| if (opnd_get_base(opnd) != DR_REG_XSP && |
| (opnd_get_base(opnd) != DR_REG_XBP || !ci->xbp_is_fp)) |
| continue; |
| if (!ci->has_locals) { |
| mem_ref = opnd; |
| callee_info_reserve_slot(ci, SLOT_LOCAL, 0); |
| if (ci->slots_used > CLEANCALL_NUM_INLINE_SLOTS) { |
| LOG(THREAD, LOG_CLEANCALL, 1, |
| "CLEANCALL: callee "PFX" cannot be inlined: " |
| "not enough slots for local.\n", |
| ci->start); |
| break; |
| } |
| ci->has_locals = true; |
| } else if (!opnd_same(opnd, mem_ref)) { |
| /* currently we only allows one stack refs */ |
| LOG(THREAD, LOG_CLEANCALL, 1, |
| "CLEANCALL: callee "PFX" cannot be inlined: " |
| "more than one stack location is accessed "PFX".\n", |
| ci->start, instr_get_app_pc(instr)); |
| break; |
| } |
| /* replace the stack location with the scratch slot. */ |
| slot = callee_info_slot_opnd(ci, SLOT_LOCAL, 0); |
| opnd_set_size(&slot, opnd_get_size(mem_ref)); |
| instr_set_dst(instr, i, slot); |
| } |
| if (i != instr_num_dsts(instr)) { |
| opt_inline = false; |
| break; |
| } |
| } |
| } |
| if (instr == NULL && opt_inline) { |
| ci->opt_inline = true; |
| LOG(THREAD, LOG_CLEANCALL, 1, |
| "CLEANCALL: callee "PFX" can be inlined.\n", ci->start); |
| } else { |
| /* not inline callee, so ilist is not needed. */ |
| LOG(THREAD, LOG_CLEANCALL, 1, |
| "CLEANCALL: callee "PFX" cannot be inlined.\n", ci->start); |
| instrlist_clear_and_destroy(GLOBAL_DCONTEXT, ci->ilist); |
| ci->ilist = NULL; |
| } |
| } |
| |
| static void |
| analyze_callee_ilist(dcontext_t *dcontext, callee_info_t *ci) |
| { |
| ASSERT(!ci->bailout && ci->ilist != NULL); |
| /* Remove frame setup and reg pushes before analyzing reg usage. */ |
| if (INTERNAL_OPTION(opt_cleancall) >= 1) { |
| analyze_callee_save_reg(dcontext, ci); |
| } |
| analyze_callee_regs_usage(dcontext, ci); |
| if (INTERNAL_OPTION(opt_cleancall) < 1) { |
| instrlist_clear_and_destroy(GLOBAL_DCONTEXT, ci->ilist); |
| ci->ilist = NULL; |
| } else { |
| analyze_callee_tls(dcontext, ci); |
| analyze_callee_pick_spill_reg(dcontext, ci); |
| analyze_callee_inline(dcontext, ci); |
| } |
| } |
| |
| static void |
| analyze_clean_call_aflags(dcontext_t *dcontext, |
| clean_call_info_t *cci, instr_t *where) |
| { |
| callee_info_t *ci = cci->callee_info; |
| instr_t *instr; |
| |
| /* If there's a flags read, we clear the flags. If there's a write or read, |
| * we save them, because a read creates a clear which is a write. */ |
| cci->skip_clear_eflags = !ci->read_aflags; |
| cci->skip_save_aflags = !(ci->write_aflags || ci->read_aflags); |
| /* XXX: this is a more aggressive optimization by analyzing the ilist |
| * to be instrumented. The client may change the ilist, which violate |
| * the analysis result. For example, |
| * I do not need save the aflags now if an instruction |
| * after "where" updating all aflags, but later the client can |
| * insert an instruction reads the aflags before that instruction. |
| */ |
| if (INTERNAL_OPTION(opt_cleancall) > 1 && !cci->skip_save_aflags) { |
| for (instr = where; instr != NULL; instr = instr_get_next(instr)) { |
| uint flags = instr_get_arith_flags(instr, DR_QUERY_DEFAULT); |
| if (TESTANY(EFLAGS_READ_6, flags) || instr_is_cti(instr)) |
| break; |
| if (TESTALL(EFLAGS_WRITE_6, flags)) { |
| LOG(THREAD, LOG_CLEANCALL, 2, |
| "CLEANCALL: inserting clean call "PFX |
| ", skip saving aflags.\n", ci->start); |
| cci->skip_save_aflags = true; |
| break; |
| } |
| } |
| } |
| } |
| |
| static void |
| analyze_clean_call_regs(dcontext_t *dcontext, clean_call_info_t *cci) |
| { |
| uint i, num_regparm; |
| callee_info_t *info = cci->callee_info; |
| |
| /* 1. xmm registers */ |
| for (i = 0; i < NUM_XMM_REGS; i++) { |
| if (info->xmm_used[i]) { |
| cci->xmm_skip[i] = false; |
| } else { |
| LOG(THREAD, LOG_CLEANCALL, 3, |
| "CLEANCALL: if inserting clean call "PFX |
| ", skip saving XMM%d.\n", info->start, i); |
| cci->xmm_skip[i] = true; |
| cci->num_xmms_skip++; |
| } |
| } |
| if (INTERNAL_OPTION(opt_cleancall) > 2 && cci->num_xmms_skip != NUM_XMM_REGS) |
| cci->should_align = false; |
| /* 2. general purpose registers */ |
| /* set regs not to be saved for clean call */ |
| for (i = 0; i < NUM_GP_REGS; i++) { |
| if (info->reg_used[i]) { |
| cci->reg_skip[i] = false; |
| } else { |
| LOG(THREAD, LOG_CLEANCALL, 3, |
| "CLEANCALL: if inserting clean call "PFX |
| ", skip saving reg %s.\n", |
| info->start, reg_names[DR_REG_XAX + (reg_id_t)i]); |
| cci->reg_skip[i] = true; |
| cci->num_regs_skip++; |
| } |
| } |
| /* we need save/restore rax if save aflags because rax is used */ |
| if (!cci->skip_save_aflags && cci->reg_skip[0]) { |
| LOG(THREAD, LOG_CLEANCALL, 3, |
| "CLEANCALL: if inserting clean call "PFX |
| ", cannot skip saving reg xax.\n", info->start); |
| cci->reg_skip[0] = false; |
| cci->num_regs_skip++; |
| } |
| /* i#987: args are passed via regs in 64-bit, which will clober those regs, |
| * so we should not skip any regs that are used for arg passing. |
| * XXX: we do not support args passing via XMMs, |
| * see docs for dr_insert_clean_call |
| * XXX: we can elminate the arg passing instead since it is not used |
| * if marked for skip. However, we have to handle cases like some args |
| * are used and some are not. |
| */ |
| num_regparm = cci->num_args < NUM_REGPARM ? cci->num_args : NUM_REGPARM; |
| for (i = 0; i < num_regparm; i++) { |
| if (cci->reg_skip[regparms[i] - DR_REG_XAX]) { |
| LOG(THREAD, LOG_CLEANCALL, 3, |
| "CLEANCALL: if inserting clean call "PFX |
| ", cannot skip saving reg %s due to param passing.\n", |
| info->start, reg_names[regparms[i]]); |
| cci->reg_skip[regparms[i] - DR_REG_XAX] = false; |
| cci->num_regs_skip--; |
| /* We cannot call callee_info_reserve_slot for reserving slot |
| * on inlining the callee here, because we are in clean call |
| * analysis not callee anaysis. |
| * Also the reg for arg passing should be first handled in |
| * analyze_callee_regs_usage on callee_info creation. |
| * If we still reach here, it means the number args changes |
| * for the same clean call, so we will not inline it and do not |
| * need call callee_info_reserve_slot either. |
| */ |
| } |
| } |
| } |
| |
| static void |
| analyze_clean_call_args(dcontext_t *dcontext, |
| clean_call_info_t *cci, |
| opnd_t *args) |
| { |
| uint i, j, num_regparm; |
| |
| num_regparm = cci->num_args < NUM_REGPARM ? cci->num_args : NUM_REGPARM; |
| /* If a param uses a reg, DR need restore register value, which assumes |
| * the full context switch with priv_mcontext_t layout, |
| * in which case we need keep priv_mcontext_t layout. |
| */ |
| cci->save_all_regs = false; |
| for (i = 0; i < cci->num_args; i++) { |
| if (opnd_is_reg(args[i])) |
| cci->save_all_regs = true; |
| for (j = 0; j < num_regparm; j++) { |
| if (opnd_uses_reg(args[i], regparms[j])) |
| cci->save_all_regs = true; |
| } |
| } |
| /* We only set cci->reg_skip all to false later if we fail to inline. We |
| * only need to preserve the layout if we're not inlining. |
| */ |
| } |
| |
| static bool |
| analyze_clean_call_inline(dcontext_t *dcontext, clean_call_info_t *cci) |
| { |
| callee_info_t *info = cci->callee_info; |
| bool opt_inline = true; |
| |
| if (INTERNAL_OPTION(opt_cleancall) <= 1) { |
| LOG(THREAD, LOG_CLEANCALL, 2, |
| "CLEANCALL: fail inlining clean call "PFX", opt_cleancall %d.\n", |
| info->start, INTERNAL_OPTION(opt_cleancall)); |
| opt_inline = false; |
| } |
| if (cci->num_args > 1) { |
| LOG(THREAD, LOG_CLEANCALL, 2, |
| "CLEANCALL: fail inlining clean call "PFX", number of args %d > 1.\n", |
| info->start, cci->num_args); |
| opt_inline = false; |
| } |
| if (cci->num_args > info->num_args) { |
| LOG(THREAD, LOG_CLEANCALL, 2, |
| "CLEANCALL: fail inlining clean call "PFX |
| ", number of args increases.\n", |
| info->start); |
| opt_inline = false; |
| } |
| if (cci->save_fpstate) { |
| LOG(THREAD, LOG_CLEANCALL, 2, |
| "CLEANCALL: fail inlining clean call "PFX", saving fpstate.\n", |
| info->start); |
| opt_inline = false; |
| } |
| if (!info->opt_inline) { |
| LOG(THREAD, LOG_CLEANCALL, 2, |
| "CLEANCALL: fail inlining clean call "PFX", complex callee.\n", |
| info->start); |
| opt_inline = false; |
| } |
| if (info->slots_used > CLEANCALL_NUM_INLINE_SLOTS) { |
| LOG(THREAD, LOG_CLEANCALL, 2, |
| "CLEANCALL: fail inlining clean call "PFX", used %d slots, " |
| "> %d available slots.\n", |
| info->start, info->slots_used, CLEANCALL_NUM_INLINE_SLOTS); |
| opt_inline = false; |
| } |
| if (!opt_inline) { |
| if (cci->save_all_regs) { |
| LOG(THREAD, LOG_CLEANCALL, 2, |
| "CLEANCALL: inserting clean call "PFX |
| ", save all regs in priv_mcontext_t layout.\n", |
| info->start); |
| cci->num_regs_skip = 0; |
| memset(cci->reg_skip, 0, sizeof(bool) * NUM_GP_REGS); |
| cci->should_align = true; |
| } else { |
| uint i; |
| for (i = 0; i < NUM_GP_REGS; i++) { |
| if (!cci->reg_skip[i] && info->callee_save_regs[i]) { |
| cci->reg_skip[i] = true; |
| cci->num_regs_skip++; |
| } |
| } |
| } |
| if (cci->num_xmms_skip == NUM_XMM_REGS) { |
| STATS_INC(cleancall_xmm_skipped); |
| } |
| if (cci->skip_save_aflags) { |
| STATS_INC(cleancall_aflags_save_skipped); |
| } |
| if (cci->skip_clear_eflags) { |
| STATS_INC(cleancall_aflags_clear_skipped); |
| } |
| } else { |
| cci->ilist = instrlist_clone(dcontext, info->ilist); |
| } |
| return opt_inline; |
| } |
| |
| bool |
| analyze_clean_call(dcontext_t *dcontext, clean_call_info_t *cci, instr_t *where, |
| void *callee, bool save_fpstate, uint num_args, opnd_t *args) |
| { |
| callee_info_t *ci; |
| /* by default, no inline optimization */ |
| bool should_inline = false; |
| |
| CLIENT_ASSERT(callee != NULL, "Clean call target is NULL"); |
| /* 1. init clean_call_info */ |
| clean_call_info_init(cci, callee, save_fpstate, num_args); |
| /* 2. check runtime optimization options */ |
| if (INTERNAL_OPTION(opt_cleancall) > 0) { |
| /* 3. search if callee was analyzed before */ |
| ci = callee_info_table_lookup(callee); |
| /* 4. this callee is not seen before */ |
| if (ci == NULL) { |
| STATS_INC(cleancall_analyzed); |
| LOG(THREAD, LOG_CLEANCALL, 2, "CLEANCALL: analyze callee "PFX"\n", callee); |
| /* 4.1. create func_info */ |
| ci = callee_info_create((app_pc)callee, num_args); |
| /* 4.2. decode the callee */ |
| decode_callee_ilist(dcontext, ci); |
| /* 4.3. analyze the instrlist */ |
| if (!ci->bailout) |
| analyze_callee_ilist(dcontext, ci); |
| /* 4.4. add info into callee list */ |
| ci = callee_info_table_add(ci); |
| } |
| cci->callee_info = ci; |
| if (ci->bailout) { |
| callee_info_init(ci); |
| ci->start = (app_pc)callee; |
| LOG(THREAD, LOG_CLEANCALL, 2, "CLEANCALL: bailout "PFX"\n", callee); |
| } else { |
| /* 5. aflags optimization analysis */ |
| analyze_clean_call_aflags(dcontext, cci, where); |
| /* 6. register optimization analysis */ |
| analyze_clean_call_regs(dcontext, cci); |
| /* 7. check arguments */ |
| analyze_clean_call_args(dcontext, cci, args); |
| /* 8. inline optimization analysis */ |
| should_inline = analyze_clean_call_inline(dcontext, cci); |
| } |
| } |
| /* 9. derived fields */ |
| if (cci->num_xmms_skip == 0 /* save all xmms */ && |
| cci->num_regs_skip == 0 /* save all regs */ && |
| !cci->skip_save_aflags) |
| cci->out_of_line_swap = true; |
| |
| return should_inline; |
| } |
| |
| static void |
| insert_inline_reg_save(dcontext_t *dcontext, clean_call_info_t *cci, |
| instrlist_t *ilist, instr_t *where, opnd_t *args) |
| { |
| callee_info_t *ci = cci->callee_info; |
| int i; |
| |
| /* Don't spill anything if we don't have to. */ |
| if (cci->num_regs_skip == NUM_GP_REGS && cci->skip_save_aflags && |
| !ci->has_locals) { |
| return; |
| } |
| |
| /* Spill a register to TLS and point it at our unprotected_context_t. */ |
| PRE(ilist, where, instr_create_save_to_tls |
| (dcontext, ci->spill_reg, TLS_XAX_SLOT)); |
| insert_get_mcontext_base(dcontext, ilist, where, ci->spill_reg); |
| |
| /* Save used registers. */ |
| ASSERT(cci->num_xmms_skip == NUM_XMM_REGS); |
| for (i = 0; i < NUM_GP_REGS; i++) { |
| if (!cci->reg_skip[i]) { |
| reg_id_t reg_id = DR_REG_XAX + (reg_id_t)i; |
| LOG(THREAD, LOG_CLEANCALL, 2, |
| "CLEANCALL: inlining clean call "PFX", saving reg %s.\n", |
| ci->start, reg_names[reg_id]); |
| PRE(ilist, where, INSTR_CREATE_mov_st |
| (dcontext, callee_info_slot_opnd(ci, SLOT_REG, reg_id), |
| opnd_create_reg(reg_id))); |
| } |
| } |
| |
| /* Save aflags if necessary via XAX, which was just saved if needed. */ |
| if (!cci->skip_save_aflags) { |
| ASSERT(!cci->reg_skip[DR_REG_XAX - DR_REG_XAX]); |
| dr_save_arith_flags_to_xax(dcontext, ilist, where); |
| PRE(ilist, where, INSTR_CREATE_mov_st |
| (dcontext, callee_info_slot_opnd(ci, SLOT_FLAGS, 0), |
| opnd_create_reg(DR_REG_XAX))); |
| /* Restore app XAX here if it's needed to materialize the argument. */ |
| if (cci->num_args > 0 && opnd_uses_reg(args[0], DR_REG_XAX)) { |
| PRE(ilist, where, INSTR_CREATE_mov_ld |
| (dcontext, opnd_create_reg(DR_REG_XAX), |
| callee_info_slot_opnd(ci, SLOT_REG, DR_REG_XAX))); |
| } |
| } |
| } |
| |
| static void |
| insert_inline_reg_restore(dcontext_t *dcontext, clean_call_info_t *cci, |
| instrlist_t *ilist, instr_t *where) |
| { |
| int i; |
| callee_info_t *ci = cci->callee_info; |
| |
| /* Don't restore regs if we don't have to. */ |
| if (cci->num_regs_skip == NUM_GP_REGS && cci->skip_save_aflags && |
| !ci->has_locals) { |
| return; |
| } |
| |
| /* Restore aflags before regs because it uses xax. */ |
| if (!cci->skip_save_aflags) { |
| PRE(ilist, where, INSTR_CREATE_mov_ld |
| (dcontext, opnd_create_reg(DR_REG_XAX), |
| callee_info_slot_opnd(ci, SLOT_FLAGS, 0))); |
| dr_restore_arith_flags_from_xax(dcontext, ilist, where); |
| } |
| |
| /* Now restore all registers. */ |
| for (i = NUM_GP_REGS - 1; i >= 0; i--) { |
| if (!cci->reg_skip[i]) { |
| reg_id_t reg_id = DR_REG_XAX + (reg_id_t)i; |
| LOG(THREAD, LOG_CLEANCALL, 2, |
| "CLEANCALL: inlining clean call "PFX", restoring reg %s.\n", |
| ci->start, reg_names[reg_id]); |
| PRE(ilist, where, INSTR_CREATE_mov_ld |
| (dcontext, opnd_create_reg(reg_id), |
| callee_info_slot_opnd(ci, SLOT_REG, reg_id))); |
| } |
| } |
| |
| /* Restore reg used for unprotected_context_t pointer. */ |
| PRE(ilist, where, instr_create_restore_from_tls |
| (dcontext, ci->spill_reg, TLS_XAX_SLOT)); |
| } |
| |
| static void |
| insert_inline_arg_setup(dcontext_t *dcontext, clean_call_info_t *cci, |
| instrlist_t *ilist, instr_t *where, opnd_t *args) |
| { |
| reg_id_t regparm; |
| callee_info_t *ci = cci->callee_info; |
| opnd_t arg; |
| bool restored_spill_reg = false; |
| |
| if (cci->num_args == 0) |
| return; |
| |
| /* If the arg is un-referenced, don't set it up. This is actually necessary |
| * for correctness because we will not have spilled regparm[0] on x64 or |
| * reserved SLOT_LOCAL for x86_32. |
| */ |
| if (IF_X64_ELSE(!ci->reg_used[regparms[0] - DR_REG_XAX], |
| !ci->has_locals)) { |
| LOG(THREAD, LOG_CLEANCALL, 2, |
| "CLEANCALL: callee "PFX" doesn't read arg, skipping arg setup.\n", |
| ci->start); |
| return; |
| } |
| |
| ASSERT(cci->num_args == 1); |
| arg = args[0]; |
| regparm = shrink_reg_for_param(IF_X64_ELSE(regparms[0], DR_REG_XAX), arg); |
| |
| if (opnd_uses_reg(arg, ci->spill_reg)) { |
| if (opnd_is_reg(arg)) { |
| /* Trying to pass the spill reg (or a subreg) as the arg. */ |
| reg_id_t arg_reg = opnd_get_reg(arg); |
| arg = opnd_create_tls_slot(os_tls_offset(TLS_XAX_SLOT)); |
| opnd_set_size(&arg, reg_get_size(arg_reg)); |
| if (arg_reg == DR_REG_AH || /* Don't rely on ordering. */ |
| arg_reg == DR_REG_BH || |
| arg_reg == DR_REG_CH || |
| arg_reg == DR_REG_DH) { |
| /* If it's one of the high sub-registers, add 1 to offset. */ |
| opnd_set_disp(&arg, opnd_get_disp(arg) + 1); |
| } |
| } else { |
| /* Too complicated to rewrite if it's embedded in the operand. Just |
| * restore spill_reg during the arg materialization. Hopefully this |
| * doesn't happen very often. |
| */ |
| PRE(ilist, where, instr_create_restore_from_tls |
| (dcontext, ci->spill_reg, TLS_XAX_SLOT)); |
| DOLOG(2, LOG_CLEANCALL, { |
| char disas_arg[MAX_OPND_DIS_SZ]; |
| opnd_disassemble_to_buffer(dcontext, arg, disas_arg, |
| BUFFER_SIZE_ELEMENTS(disas_arg)); |
| LOG(THREAD, LOG_CLEANCALL, 2, |
| "CLEANCALL: passing arg %s using spill reg %s to callee "PFX" " |
| "requires extra spills, consider using a different register.\n", |
| disas_arg, reg_names[ci->spill_reg], ci->start); |
| }); |
| restored_spill_reg = true; |
| } |
| } |
| |
| LOG(THREAD, LOG_CLEANCALL, 2, |
| "CLEANCALL: inlining clean call "PFX", passing arg via reg %s.\n", |
| ci->start, reg_names[regparm]); |
| if (opnd_is_immed_int(arg)) { |
| PRE(ilist, where, INSTR_CREATE_mov_imm |
| (dcontext, opnd_create_reg(regparm), arg)); |
| } else { |
| PRE(ilist, where, INSTR_CREATE_mov_ld |
| (dcontext, opnd_create_reg(regparm), arg)); |
| } |
| |
| /* Put the unprotected_context_t pointer back in spill_reg if we needed to |
| * restore the app value. |
| */ |
| if (restored_spill_reg) { |
| insert_get_mcontext_base(dcontext, ilist, where, ci->spill_reg); |
| } |
| |
| #ifndef X64 |
| ASSERT(!cci->reg_skip[0]); |
| /* Move xax to the scratch slot of the local. We only allow at most one |
| * local stack access, so the callee either does not use the argument, or |
| * the local stack access is the arg. |
| */ |
| LOG(THREAD, LOG_CLEANCALL, 2, |
| "CLEANCALL: inlining clean call "PFX", passing arg via slot.\n", |
| ci->start); |
| PRE(ilist, where, INSTR_CREATE_mov_st |
| (dcontext, callee_info_slot_opnd(ci, SLOT_LOCAL, 0), |
| opnd_create_reg(DR_REG_XAX))); |
| #endif |
| } |
| |
| void |
| insert_inline_clean_call(dcontext_t *dcontext, clean_call_info_t *cci, |
| instrlist_t *ilist, instr_t *where, opnd_t *args) |
| { |
| instrlist_t *callee = cci->ilist; |
| instr_t *instr; |
| |
| ASSERT(cci->ilist != NULL); |
| ASSERT(SCRATCH_ALWAYS_TLS()); |
| /* 0. update stats */ |
| STATS_INC(cleancall_inlined); |
| /* 1. save registers */ |
| insert_inline_reg_save(dcontext, cci, ilist, where, args); |
| /* 2. setup parameters */ |
| insert_inline_arg_setup(dcontext, cci, ilist, where, args); |
| /* 3. inline clean call ilist */ |
| instr = instrlist_first(callee); |
| while (instr != NULL) { |
| instrlist_remove(callee, instr); |
| /* XXX: if client has a xl8 handler we assume it will handle any faults |
| * in the callee (which should already have a translation set to the |
| * callee): and if not we assume there will be no such faults. |
| * We can't have a translation with no handler. |
| */ |
| if (IF_CLIENT_INTERFACE_ELSE(!dr_xl8_hook_exists(), true)) |
| instr_set_translation(instr, NULL); |
| instrlist_meta_preinsert(ilist, where, instr); |
| instr = instrlist_first(callee); |
| } |
| instrlist_destroy(dcontext, callee); |
| cci->ilist = NULL; |
| /* 4. restore registers */ |
| insert_inline_reg_restore(dcontext, cci, ilist, where); |
| /* XXX: the inlined code looks like this |
| * mov %rax -> %gs:0x00 |
| * mov %rdi -> %gs:0x01 |
| * mov $0x00000003 -> %edi |
| * mov <rel> 0x0000000072200c00 -> %rax |
| * movsxd %edi -> %rdi |
| * add %rdi (%rax) -> (%rax) |
| * mov %gs:0x00 -> %rax |
| * mov %gs:0x01 -> %rdi |
| * ... |
| * we can do some constant propagation optimization here, |
| * leave it for higher optimization level. |
| */ |
| } |
| |
| void |
| clean_call_opt_init(void) |
| { |
| callee_info_init(&default_callee_info); |
| callee_info_table_init(); |
| } |
| |
| void |
| clean_call_opt_exit(void) |
| { |
| callee_info_table_destroy(); |
| } |
| |
| #else /* CLIENT_INTERFACE */ |
| |
| /* Stub implementation ifndef CLIENT_INTERFACE. Initializes cci and returns |
| * false for no inlining. We use dr_insert_clean_call internally, but we don't |
| * need it to do inlining. |
| */ |
| bool |
| analyze_clean_call(dcontext_t *dcontext, clean_call_info_t *cci, instr_t *where, |
| void *callee, bool save_fpstate, uint num_args, opnd_t *args) |
| { |
| CLIENT_ASSERT(callee != NULL, "Clean call target is NULL"); |
| /* 1. init clean_call_info */ |
| clean_call_info_init(cci, callee, save_fpstate, num_args); |
| return false; |
| } |
| |
| #endif /* CLIENT_INTERFACE */ |
| |
| /***************************************************************************/ |