| /* ********************************************************** |
| * Copyright (c) 2013-2014 Google, Inc. All rights reserved. |
| * Copyright (c) 2002-2010 VMware, Inc. All rights reserved. |
| * **********************************************************/ |
| |
| /* |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are met: |
| * |
| * * Redistributions of source code must retain the above copyright notice, |
| * this list of conditions and the following disclaimer. |
| * |
| * * Redistributions in binary form must reproduce the above copyright notice, |
| * this list of conditions and the following disclaimer in the documentation |
| * and/or other materials provided with the distribution. |
| * |
| * * Neither the name of VMware, Inc. nor the names of its contributors may be |
| * used to endorse or promote products derived from this software without |
| * specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| * ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE |
| * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
| * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
| * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH |
| * DAMAGE. |
| */ |
| |
| /* Copyright (c) 2003-2007 Determina Corp. */ |
| /* Copyright (c) 2002-2003 Massachusetts Institute of Technology */ |
| |
| /* optimize.c |
| * |
| * routines to support optimization of traces |
| * (old offline optimization stuff is in mangle.c) |
| */ |
| |
| #include "../globals.h" /* just to disable warning C4206 about an empty file */ |
| |
| #ifdef INTERNAL /* around whole file */ |
| |
| #include "../globals.h" |
| #include "arch.h" |
| #include "instr.h" |
| #include "instr_create.h" |
| #include "instrlist.h" |
| #include "decode.h" |
| #include "decode_fast.h" |
| /* XXX i#1551: eliminate PREFIX_{DATA,ADDR} refs and then remove this include */ |
| #include "x86/decode_private.h" |
| #include "../fragment.h" |
| #include "disassemble.h" |
| #include "proc.h" |
| #include <string.h> /* for memset */ |
| |
| /* IMPORTANT INSTRUCTIONS FOR WRITING OPTIMIZATIONS: |
| * |
| * 1) can assume that all instructions are fully decoded -- that is, |
| * instr_operands_valid(instr) will return true |
| * 2) optimizations MUST BE DETERMINISTIC! they are re-executed to |
| * reconstruct the Pc (and in the future the rest of the machine state, |
| * hopefully) on exceptions/signals |
| */ |
| |
| /****************************************************************************/ |
| /* forward declarations */ |
| /* if these are useful elsewhere, un-static them */ |
| |
| /* optimizations */ |
| static void instr_counts(dcontext_t *dcontext, app_pc tag, instrlist_t *trace, bool pre); |
| static void constant_propagation(dcontext_t *dcontext, app_pc tag, instrlist_t *trace); |
| static void call_return_matching(dcontext_t *dcontext, app_pc tag, instrlist_t *trace); |
| static void remove_unnecessary_zeroing(dcontext_t *dcontext, app_pc tag, |
| instrlist_t *trace); |
| static void stack_adjust_combiner(dcontext_t *dcontext, app_pc tag, instrlist_t *trace); |
| |
| static void prefetch_optimize_trace(dcontext_t *dcontext, |
| app_pc tag, instrlist_t *trace); |
| void remove_redundant_loads(dcontext_t *dcontext, app_pc tag, |
| instrlist_t *trace); |
| |
| static void peephole_optimize(dcontext_t *dcontext, app_pc tag, instrlist_t *trace); |
| |
| static void identify_for_loop(dcontext_t *dcontext, |
| app_pc tag, instrlist_t *trace); |
| static void unroll_loops(dcontext_t *dcontext, app_pc tag, instrlist_t *trace); |
| #ifdef IA32_ON_IA64 |
| static void test_i64(dcontext_t *dcontext, app_pc tag, instrlist_t *trace); |
| #endif |
| |
| /* utility routines */ |
| bool is_store_to_ecxoff(dcontext_t *dcontext, instr_t *inst); |
| bool is_load_from_ecxoff(dcontext_t *dcontext, instr_t *inst); |
| bool opnd_is_constant_address(opnd_t address); |
| static bool is_zeroing_instr(instr_t *inst); |
| static bool replace_inc_with_add(dcontext_t *dcontext, instr_t *inst, |
| instrlist_t *trace); |
| static bool safe_write(instr_t *mem_writer); |
| static bool instruction_affects_mem_access(instr_t *instr,opnd_t mem_access); |
| static bool is_dead_register(reg_id_t reg,instr_t *where); |
| static reg_id_t find_dead_register_across_instrs(instr_t *start,instr_t *end); |
| static bool is_nop(instr_t *inst); |
| void remove_inst(dcontext_t *dcontext, instrlist_t *ilist, instr_t *inst); |
| |
| /* for using a 24 entry bool array to represent some property about |
| * about normal registers and sub register (eax -> dl) */ |
| /* propagates the value into all sub registers, doesn't propagate up |
| * into enclosing registers, index value is checked for bounds */ |
| static void propagate_down(bool *reg_rep, int index, bool value); |
| /* checks the 24 entry array and returns true if it and all sub |
| * registers of the index are true and 0 <= index < 24 */ |
| static bool check_down(bool *reg_rep, int index); |
| |
| /* given a cbr, finds the previous instr that writes the flag the cbr reads */ |
| static instr_t *get_decision_instr(instr_t *jmp); |
| |
| /* exported utility routines */ |
| void loginst(dcontext_t *dcontext, uint level, instr_t *instr, const char *string); |
| void logopnd(dcontext_t *dcontext, uint level, opnd_t opnd, const char *string); |
| void logtrace(dcontext_t *dcontext, uint level, instrlist_t *trace, const char *string); |
| |
| |
| |
| |
| /****************************************************************************/ |
| /* master routine */ |
| |
| void |
| optimize_trace(dcontext_t *dcontext, app_pc tag, instrlist_t *trace) |
| { |
| /* we have un-truncation-check 32-bit casts for opnd_get_immed_int(), for |
| * one thing, here and in loadtoconst.c */ |
| IF_X64(ASSERT_NOT_IMPLEMENTED(false)); |
| |
| /* FIXME: this routine is of course not in its final form |
| * we are still playing with different optimizations |
| */ |
| |
| /* all opts want to expand all bundles and many want cti info including instr_t |
| * targets, so we go ahead and do that up front |
| */ |
| instrlist_decode_cti(dcontext, trace); |
| |
| #ifdef DEBUG |
| LOG(THREAD, LOG_OPTS, 3, "\noptimize_trace ******************\n"); |
| LOG(THREAD, LOG_OPTS, 3, "\nbefore optimization:\n"); |
| |
| if (stats->loglevel >= 3 && (stats->logmask & LOG_OPTS) != 0) |
| instrlist_disassemble(dcontext, tag, trace, THREAD); |
| |
| #endif |
| |
| if (dynamo_options.instr_counts) { |
| instr_counts(dcontext, tag, trace, true); |
| } |
| |
| if (dynamo_options.call_return_matching) { |
| call_return_matching(dcontext, tag, trace); |
| } |
| |
| if (dynamo_options.unroll_loops) { |
| unroll_loops(dcontext, tag, trace); |
| } |
| |
| if (dynamo_options.vectorize) { |
| identify_for_loop(dcontext, tag, trace); |
| } |
| |
| if (dynamo_options.prefetch) { |
| prefetch_optimize_trace(dcontext, tag, trace); |
| } |
| |
| if (dynamo_options.rlr) { |
| remove_redundant_loads(dcontext, tag, trace); |
| } |
| |
| if (dynamo_options.remove_unnecessary_zeroing) { |
| remove_unnecessary_zeroing(dcontext, tag, trace); |
| } |
| |
| if (dynamo_options.constant_prop) { |
| constant_propagation(dcontext, tag, trace); |
| } |
| |
| if (dynamo_options.remove_dead_code) { |
| remove_dead_code(dcontext, tag, trace); |
| } |
| |
| if (dynamo_options.stack_adjust) { |
| stack_adjust_combiner(dcontext, tag, trace); |
| } |
| |
| if (dynamo_options.peephole) { |
| peephole_optimize(dcontext, tag, trace); |
| } |
| |
| #ifdef IA32_ON_IA64 |
| if (dynamo_options.test_i64) { |
| test_i64(dcontext, tag, trace); |
| } |
| #endif |
| |
| if (dynamo_options.instr_counts) { |
| instr_counts(dcontext, tag, trace, false); |
| } |
| |
| #ifdef DEBUG |
| LOG(THREAD, LOG_OPTS, 3, "\nafter optimization:\n"); |
| if (stats->loglevel >= 3 && (stats->logmask & LOG_OPTS) != 0) |
| instrlist_disassemble(dcontext, tag, trace, THREAD); |
| #endif |
| } |
| |
| #ifdef DEBUG |
| static struct { |
| /* rlr */ |
| uint loads_removed_from_loads; |
| uint loads_removed_from_stores; |
| uint ctis_in_load_removal; |
| int reg_overwritten; |
| int val_saved_in_dead_reg; |
| uint loads_examined; |
| /* inc->add */ |
| int incs_examined; |
| int incs_replaced; |
| /* unrolling */ |
| int loops_unrolled; |
| // spill_xmm |
| int vals_spilled_to_xmm; |
| int loads_replaced_by_xmm; |
| int xmm_saves_to_mem; |
| int stores_replaced_by_xmm; |
| int xmm_restored_from_memory; |
| int xmm_mov_to_dead_reg; |
| int loadstore_combos_replaced_by_xmm; |
| int xmm_traces; |
| int mmx_traces; |
| /* constant propagation */ |
| int num_instrs_simplified; |
| int num_fail_simplified; |
| int num_opnds_simplified; |
| int num_const_add_const_mem; |
| int num_jmps_simplified; |
| int num_jecxz_instrs_removed; |
| /* remove dead loads */ |
| int dead_loads_removed; |
| /* remove unnecssary XOR zeroing */ |
| int xors_removed; |
| /* stack adjustment combining */ |
| int num_stack_adjust_removed; |
| /* instr_counts */ |
| int pre_num_instrs_seen; |
| int pre_num_jmps_seen; |
| int post_num_instrs_seen; |
| int post_num_jmps_seen; |
| /* call return matching */ |
| int num_returns_removed; |
| int num_return_instrs_removed; |
| #ifdef IA32_ON_IA64 |
| bool i64_test; |
| int ia64_num_entries; |
| #endif |
| } opt_stats_t; |
| |
| /*this function is called when dynamo exits. prints stats for any optimization |
| that wants to keep them in opt_stats_t and put appropriate code below*/ |
| |
| void |
| print_optimization_stats() |
| { |
| if (dynamo_options.rlr) { |
| uint top, bottom; |
| LOG(GLOBAL, LOG_OPTS, 1, |
| "%u loads examined for rlr\n", opt_stats_t.loads_examined); |
| divide_uint64_print(opt_stats_t.loads_removed_from_stores + |
| opt_stats_t.loads_removed_from_loads, |
| opt_stats_t.loads_examined, |
| true, 2, &top, &bottom); |
| LOG(GLOBAL, LOG_OPTS,1,"%u.%.2u%% of examined loads removed\n", |
| top, bottom); |
| divide_uint64_print(opt_stats_t.ctis_in_load_removal, |
| opt_stats_t.loads_removed_from_loads + |
| opt_stats_t.loads_removed_from_stores, |
| false, 4, &top, &bottom); |
| LOG(GLOBAL, LOG_OPTS, 1,"%u loads removed from loads\n" |
| "%u loads removed from stores\n" |
| "%u ctis traversed. %u.%.4u ctis / load\n", |
| opt_stats_t.loads_removed_from_loads, |
| opt_stats_t.loads_removed_from_stores, |
| opt_stats_t.ctis_in_load_removal, top, bottom); |
| LOG(GLOBAL, LOG_OPTS, 1,"%d rlr's had problems because a reg. was overwritten\n", |
| opt_stats_t.reg_overwritten); |
| LOG(GLOBAL, LOG_OPTS, 1,"%d rlr's were saved by using a dead register to save value\n", |
| opt_stats_t.val_saved_in_dead_reg); |
| } |
| if (dynamo_options.peephole && proc_get_family() == FAMILY_PENTIUM_4) { |
| LOG(GLOBAL, LOG_OPTS, 1, "%d inc/dec examined, %d replaced with add/sub\n", |
| opt_stats_t.incs_examined, opt_stats_t.incs_replaced); |
| } |
| if (dynamo_options.unroll_loops) { |
| LOG(GLOBAL, LOG_OPTS, 1, "%d loops unrolled\n", opt_stats_t.loops_unrolled); |
| } |
| |
| if (dynamo_options.call_return_matching) { |
| LOG(GLOBAL, LOG_OPTS, 1, "Call Return Matching - stats\n"); |
| LOG(GLOBAL, LOG_OPTS, 1, " %d returns removed\n", opt_stats_t.num_returns_removed); |
| LOG(GLOBAL, LOG_OPTS, 1, " %d return instrs removed\n", opt_stats_t.num_return_instrs_removed); |
| } |
| |
| if (dynamo_options.constant_prop) { |
| LOG(GLOBAL, LOG_OPTS, 1, "Constant Prop - stats\n"); |
| LOG(GLOBAL, LOG_OPTS, 1, " %d operands simplified\n", opt_stats_t.num_opnds_simplified); |
| LOG(GLOBAL, LOG_OPTS, 1, " %d constant loads from immutable memory discoverd (included in operands simplified)\n", opt_stats_t.num_const_add_const_mem); |
| LOG(GLOBAL, LOG_OPTS, 1, " %d instructions simplified\n", opt_stats_t.num_instrs_simplified); |
| LOG(GLOBAL, LOG_OPTS, 1, " %d instructions failed simplification\n", opt_stats_t.num_fail_simplified); |
| LOG(GLOBAL, LOG_OPTS, 1, " %d jmps removed\n", opt_stats_t.num_jmps_simplified); |
| LOG(GLOBAL, LOG_OPTS, 1, " %d jecxz related instrs removed, (6 per jecxz instr)\n", opt_stats_t.num_jecxz_instrs_removed); |
| } |
| |
| if (dynamo_options.remove_unnecessary_zeroing) { |
| LOG(GLOBAL, LOG_OPTS, 1, "%d unnecessary zeroing instances removed\n", opt_stats_t.xors_removed); |
| |
| } |
| |
| if (dynamo_options.stack_adjust) { |
| LOG(GLOBAL, LOG_OPTS, 1, "Stack Adjustment Combiner - stats\n"); |
| LOG(GLOBAL, LOG_OPTS, 1, " %d stack adjustments removed\n", opt_stats_t.num_stack_adjust_removed); |
| } |
| |
| if (dynamo_options.remove_dead_code) { |
| LOG(GLOBAL, LOG_OPTS, 1, "Dead Code Elimination - stats\n"); |
| LOG(GLOBAL, LOG_OPTS, 1, " %d dead instructions removed\n", opt_stats_t.dead_loads_removed); |
| } |
| |
| if (dynamo_options.instr_counts) { |
| LOG(GLOBAL, LOG_OPTS, 1, "Prior to optimizations\n"); |
| LOG(GLOBAL, LOG_OPTS, 1, " %d instrs in traces\n", opt_stats_t.pre_num_instrs_seen); |
| LOG(GLOBAL, LOG_OPTS, 1, " %d jmps (cbr) in traces\n", opt_stats_t.pre_num_jmps_seen); |
| LOG(GLOBAL, LOG_OPTS, 1, "After optimizations\n"); |
| LOG(GLOBAL, LOG_OPTS, 1, " %d instrs in traces\n", opt_stats_t.post_num_instrs_seen); |
| LOG(GLOBAL, LOG_OPTS, 1, " %d jmps (cbr) in traces\n", opt_stats_t.post_num_jmps_seen); |
| } |
| |
| #ifdef IA32_ON_IA64 |
| if (dynamo_options.test_i64) { |
| if (opt_stats_t.i64_test) |
| LOG(GLOBAL, LOG_OPTS, 1, "IA64 test succeeded!\n"); |
| else |
| LOG(GLOBAL, LOG_OPTS, 1, "IA64 test failed!\n"); |
| LOG(GLOBAL, LOG_OPTS, 1, "%d entries into Itanium code\n", opt_stats_t.ia64_num_entries); |
| } |
| #endif |
| |
| } |
| #endif |
| |
| /****************************************************************************/ |
| |
| /* op1 and op2 are both memory references */ |
| static bool |
| generate_antialias_check(dcontext_t *dcontext, instrlist_t *pre_loop, |
| opnd_t op1, opnd_t op2) |
| { |
| /* basic idea: "lea op1 !overlap lea op2" */ |
| opnd_t lea1, lea2; |
| if (opnd_same(op1, op2)) |
| return false; /* guaranteed alias */ |
| if (!opnd_defines_use(op1, op2)) |
| return true; /* guaranteed non-alias */ |
| /* FIXME: get pre-loop values of registers */ |
| /* FIXME: get unroll factor -- pass to opnd_defines_use too */ |
| /* assumption: ebx and ecx are saved at start of pre_loop, restored at end |
| * FIXME: op1/op2 may use ebx/ecx! |
| */ |
| lea1 = op1; |
| opnd_set_size(&lea1, OPSZ_lea); |
| lea2 = op2; |
| opnd_set_size(&lea2, OPSZ_lea); |
| instrlist_append(pre_loop, INSTR_CREATE_lea(dcontext, opnd_create_reg(REG_EBX), lea1)); |
| instrlist_append(pre_loop, INSTR_CREATE_lea(dcontext, opnd_create_reg(REG_ECX), lea2)); |
| instrlist_append(pre_loop, INSTR_CREATE_cmp(dcontext, opnd_create_reg(REG_EBX), |
| opnd_create_reg(REG_ECX))); |
| return true; |
| } |
| |
| #define MAX_INDUCTION_VARS 8 |
| #define MAX_LCDS 32 |
| #define MAX_INVARIANTS 32 |
| |
| static void |
| identify_for_loop(dcontext_t *dcontext, app_pc tag, instrlist_t *trace) |
| { |
| instr_t *inst, *branch, *check; |
| opnd_t opnd; |
| int i, j, k; |
| instr_t *induction_var[MAX_INDUCTION_VARS]; |
| int num_induction_vars = 0; |
| opnd_t lcd[MAX_LCDS]; |
| int num_lcds = 0; |
| opnd_t invariant[MAX_INVARIANTS]; |
| int num_invariants = 0; |
| instrlist_t pre_loop; |
| instrlist_init(&pre_loop); |
| |
| /* FIXME: what about loops with cbr at top and ubr at bottom? */ |
| |
| /* FIXME: for now, we only look for single-basic-block traces */ |
| |
| LOG(THREAD, LOG_OPTS, 3, "identify_for_loop: examining trace with tag "PFX"\n", tag); |
| /* first, make sure we end with a conditional branch (followed by uncond. |
| * for fall-through) |
| */ |
| inst = instrlist_last(trace); |
| if (!instr_is_ubr(inst)) |
| return; |
| branch = instr_get_prev(inst); |
| if (!instr_is_cbr(branch)) |
| return; |
| /* now look for self-loop */ |
| if (opnd_get_pc(instr_get_target(branch)) != tag) |
| return; |
| |
| #ifdef DEBUG |
| LOG(THREAD, LOG_OPTS, 1, "\nidentify_for_loop: found whole-trace self-loop: tag "PFX"\n", tag); |
| if ((stats->logmask & LOG_OPTS) != 0) |
| instrlist_disassemble(dcontext, tag, trace, THREAD); |
| #endif |
| |
| /* FIXME: for each pair of read/write and write/write: insert pre-loop check to |
| * ensure no aliases |
| */ |
| |
| /* make a pass looking for lcds and induction variables |
| * only look at scalars -- ignore memory references, we deal with them |
| * separately later |
| * also make sure there's only one exit |
| */ |
| for (inst = instrlist_first(trace); inst != branch; inst = instr_get_next(inst)) { |
| /* for now do not allow exits in middle */ |
| if (instr_is_exit_cti(inst)) { |
| LOG(THREAD, LOG_OPTS, 1, "internal exit found, giving up\n"); |
| return; |
| } |
| /* loop-carried dependence: a read with no writes prior in loop but |
| * with a write following in loop |
| * loop invariant: a read with no writes anywhere in loop |
| * FIXME: better to store dependence info somehow, or to make passes |
| * through instrlist whenever need info? |
| */ |
| loginst(dcontext, 1, inst, "considering"); |
| for (i=0; i<instr_num_srcs(inst); i++) { |
| opnd = instr_get_src(inst, i); |
| /* ignore immeds and memory references */ |
| if (opnd_is_immed(opnd) || opnd_is_memory_reference(opnd)) |
| continue; |
| for (check = instrlist_first(trace); check != inst; |
| check = instr_get_next(check)) { |
| for (j=0; j<instr_num_dsts(check); j++) { |
| if (opnd_defines_use(instr_get_dst(check, j), opnd)) { |
| /* write prior to read: no lcd */ |
| loginst(dcontext, 1, check, |
| "\twrite prior to read -> no lcd"); |
| goto no_lcd; |
| } |
| } |
| } |
| for (check = inst; check != branch; check = instr_get_next(check)) { |
| for (j=0; j<instr_num_dsts(check); j++) { |
| if (opnd_defines_use(instr_get_dst(check, j), opnd)) { |
| /* write following read: lcd */ |
| goto has_lcd; |
| } |
| } |
| } |
| /* no writes: loop invariant! */ |
| logopnd(dcontext, 1, opnd, "\tloop invariant"); |
| invariant[num_invariants] = opnd; |
| num_invariants++; |
| if (num_invariants >= MAX_INVARIANTS) { |
| LOG(THREAD, LOG_OPTS, 1, "too many invariants, giving up\n"); |
| return; |
| } |
| } |
| loginst(dcontext, 1, inst, "\tfell off end -> no lcd"); |
| no_lcd: |
| continue; |
| has_lcd: |
| loginst(dcontext, 1, inst, "\tfound a loop-carried dependence"); |
| /* find basic induction variables: i = i + constant |
| * FIXME: consider loop invariants as well as immeds as constants |
| * FIXME: only consider inc,dec,add,sub? |
| * FIXME: look for dependent induction vars too: j = i + constant |
| */ |
| /* assumption: immediate operands are always 1st source */ |
| if (instr_get_opcode(inst) == OP_inc || instr_get_opcode(inst) == OP_dec || |
| (instr_num_srcs(inst) == 2 && instr_num_dsts(inst) == 1 && |
| opnd_is_immed_int(instr_get_src(inst, 0)) && |
| opnd_same(instr_get_src(inst, 1), instr_get_dst(inst, 0)))) { |
| loginst(dcontext, 1, inst, "\t\tfound induction variable"); |
| induction_var[num_induction_vars] = inst; |
| num_induction_vars++; |
| if (num_induction_vars >= MAX_INDUCTION_VARS) { |
| LOG(THREAD, LOG_OPTS, 1, "too many induction vars, giving up\n"); |
| return; |
| } |
| } else { |
| /* not an induction variable, but may be ok if lcd operand |
| * is based on induction var values |
| */ |
| lcd[num_lcds] = opnd; |
| num_lcds++; |
| if (num_lcds >= MAX_LCDS) { |
| LOG(THREAD, LOG_OPTS, 1, "too many lcds, giving up\n"); |
| return; |
| } |
| } |
| } |
| |
| LOG(THREAD, LOG_OPTS, 1, "now looking through lcds for ones we can't handle\n"); |
| /* it's ok for an lcd to read a value kept in an induction var |
| * or a loop invariant |
| */ |
| for (i=0; i<num_lcds; i++) { |
| if (opnd_is_reg(lcd[i])) { |
| for (j=0; j<num_induction_vars; j++) { |
| if (opnd_same(lcd[i], instr_get_dst(induction_var[j], 0))) |
| goto next_lcd; |
| } |
| for (j=0; j<num_invariants; j++) { |
| if (opnd_same(lcd[i], invariant[j])) |
| goto next_lcd; |
| } |
| } else if (opnd_is_memory_reference(lcd[i])) { |
| for (j=0; j<opnd_num_regs_used(lcd[i]); j++) { |
| opnd = opnd_create_reg(opnd_get_reg_used(lcd[i], j)); |
| for (k=0; k<num_induction_vars; k++) { |
| if (opnd_same(opnd, instr_get_dst(induction_var[k], 0))) |
| break; |
| } |
| if (k==num_induction_vars) { |
| for (k=0; k<num_invariants; k++) { |
| if (opnd_same(opnd, invariant[k])) |
| break; |
| } |
| if (k==num_invariants) |
| goto lcd_bad; |
| } |
| } |
| } else |
| ASSERT_NOT_REACHED(); |
| next_lcd: |
| logopnd(dcontext, 1, lcd[i], |
| "\tlcd read is induction var value, so it's ok"); |
| continue; |
| lcd_bad: |
| logopnd(dcontext, 1, lcd[i], |
| "\tlcd read is not induction var value, giving up"); |
| return; |
| } |
| |
| /* now look at loop termination test */ |
| inst = get_decision_instr(branch); |
| loginst(dcontext, 1, inst, "looking at decision instr"); |
| /* test must involve only induction vars and constants */ |
| for (i=0; i<instr_num_srcs(inst); i++) { |
| opnd = instr_get_src(inst, i); |
| if (!opnd_is_immed(opnd)) { |
| for (j=0; j<num_induction_vars; j++) { |
| if (opnd_same(opnd, instr_get_dst(induction_var[j], 0))) |
| break; |
| } |
| if (j==num_induction_vars) { |
| loginst(dcontext, 1, inst, |
| "\tloop termination test not just consts & inductions!"); |
| return; |
| } |
| } |
| } |
| |
| LOG(THREAD, LOG_OPTS, 1, "now looking at memory references\n"); |
| for (inst = instrlist_first(trace); inst != branch; inst = instr_get_next(inst)) { |
| /* for each store, generate pre-loop checks to ensure no overlap with |
| * any other store or read |
| */ |
| loginst(dcontext, 1, inst, "considering"); |
| for (i=0; i<instr_num_dsts(inst); i++) { |
| opnd = instr_get_dst(inst, i); |
| if (!opnd_is_memory_reference(opnd)) |
| continue; |
| for (check = instrlist_first(trace); check != branch; |
| check = instr_get_next(check)) { |
| for (j=0; j<instr_num_dsts(check); j++) { |
| if (check==inst && j==i) |
| continue; |
| if (opnd_is_memory_reference(instr_get_dst(check, j))) { |
| /* disambiguate these writes */ |
| logopnd(dcontext, 1, instr_get_dst(check, j), |
| "\tgenerating checks"); |
| if (!generate_antialias_check(dcontext, &pre_loop, opnd, |
| instr_get_dst(check, j))) { |
| loginst(dcontext, 1, inst, |
| "unavoidable alias, giving up"); |
| return; |
| } |
| } |
| } |
| } |
| } |
| } |
| if (instrlist_first(&pre_loop) != NULL) { |
| /* if we generated any tests, they assume we have two registers: |
| * save two registers at start, then restore at end, of pre_loop |
| * FIXME: what about eflags? |
| */ |
| instrlist_prepend(&pre_loop, |
| instr_create_save_to_dcontext(dcontext, REG_EBX, XBX_OFFSET)); |
| instrlist_prepend(&pre_loop, |
| instr_create_save_to_dcontext(dcontext, REG_ECX, XCX_OFFSET)); |
| instrlist_append(&pre_loop, |
| instr_create_restore_from_dcontext(dcontext, REG_ECX, XCX_OFFSET)); |
| instrlist_append(&pre_loop, |
| instr_create_restore_from_dcontext(dcontext, REG_EBX, XBX_OFFSET)); |
| } |
| |
| |
| LOG(THREAD, LOG_OPTS, 1, "loop has passed all tests so far!\n"); |
| #ifdef DEBUG |
| LOG(THREAD, LOG_OPTS, 1, "pre-loop checks are:\n"); |
| if (stats->loglevel >= 1 && (stats->logmask & LOG_OPTS) != 0) |
| instrlist_disassemble(dcontext, tag, &pre_loop, THREAD); |
| #endif |
| |
| /* now look for "load, arithop, store" pattern |
| * THIS IS A HACK -- just want to identify loop in mmx.c |
| */ |
| inst = instrlist_first(trace); |
| if (instr_get_opcode(inst) != OP_mov_ld) { |
| LOG(THREAD, LOG_OPTS, 1, "1st instr not a load, aborting\n"); |
| return; |
| } |
| inst = instr_get_next(inst); |
| if (instr_get_opcode(inst) != OP_add) { |
| LOG(THREAD, LOG_OPTS, 1, "2nd instr not an add, aborting\n"); |
| return; |
| } |
| inst = instr_get_next(inst); |
| if (instr_get_opcode(inst) != OP_mov_st) { |
| LOG(THREAD, LOG_OPTS, 1, "3rd instr not a store, aborting\n"); |
| return; |
| } |
| LOG(THREAD, LOG_OPTS, 1, "found 'load, arithop, store' pattern!\n"); |
| for (check = instr_get_next(inst); check != branch; |
| check = instr_get_next(check)) { |
| for (j=0; j<num_induction_vars; j++) { |
| if (induction_var[j] == check) |
| break; |
| } |
| if (j==num_induction_vars) { |
| loginst(dcontext, 1, check, "non-induction var is present"); |
| return; |
| } |
| } |
| LOG(THREAD, LOG_OPTS, 1, "vectorizing\n"); |
| |
| /* prior to unrolling, replace inc with add */ |
| for (i=0; i<num_induction_vars; i++) { |
| int opcode = instr_get_opcode(induction_var[i]); |
| if (opcode == OP_inc || opcode == OP_dec) { |
| inst = instr_get_prev(induction_var[i]); |
| if (replace_inc_with_add(dcontext, induction_var[i], trace)) { |
| /* orig induction var instr_t was destroyed, get new copy */ |
| if (inst == NULL) |
| induction_var[i] = instrlist_first(trace); |
| else |
| induction_var[i] = instr_get_next(inst); |
| } else { |
| loginst(dcontext, 1, induction_var[i], |
| "couldn't replace inc w/ add b/c of eflags\n"); |
| /* FIXME: undo earlier inc->adds */ |
| return; |
| } |
| } |
| } |
| |
| /********** unroll loop **********/ |
| #if 0 |
| do { |
| body using i; |
| i += inc; |
| } while (i < max); |
| |
| becomes |
| |
| /* |
| pre-loop to get alignment is tricky when replacing sideline...need to |
| measure memory addresses, can't just go on induction var...for |
| now do not have a pre-loop and do unaligned simd |
| offs = i % inc; |
| while (i % (inc * unrollfactor) != 0) { |
| body using i; |
| i += inc; |
| } |
| */ |
| do { |
| body using i; |
| body using i+inc; |
| i += (inc * unrollfactor); |
| } while (i < max - (inc * unrollfactor) + 1); |
| while (i < max) { |
| body using i; |
| i += inc; |
| } |
| #endif |
| #if 0 |
| /* First do pre-alignment loop |
| * to do mod: idiv divides edx:eax by r/m32, puts quotient |
| * in eax, remainder in edx |
| * cdq sign-extends eax into edx:eax |
| * so x mod y is: |
| * mov x,%eax |
| * cdq |
| * idiv y |
| * answer is now in %edx! */ |
| inst = instrlist_first(trace); |
| instrlist_preinsert(trace, inst, |
| instr_create_save_to_dcontext(dcontext, REG_EAX, XAX_OFFSET)); |
| instrlist_preinsert(trace, inst, |
| instr_create_save_to_dcontext(dcontext, REG_EDX, XDX_OFFSET)); |
| /* FIXME: get termination index var & lower/upper bound |
| * hardcoding to mmx.c loop for now |
| */ |
| instrlist_preinsert(trace, inst, |
| INSTR_CREATE_mov_ld(dcontext, opnd_create_reg(REG_EAX), opnd_create_reg(REG_ECX))); |
| instrlist_preinsert(trace, inst, INSTR_CREATE_cdq(dcontext)); |
| /* can't pass immed to idiv so have to grab register */ |
| instrlist_preinsert(trace, inst, |
| instr_create_save_to_dcontext(dcontext, REG_EBX, XBX_OFFSET)); |
| instrlist_preinsert(trace, inst, |
| INSTR_CREATE_mov_imm(dcontext, opnd_create_reg(REG_EBX), |
| OPND_CREATE_INT32(unroll_factor))); |
| instrlist_preinsert(trace, inst, |
| INSTR_CREATE_idiv_v(dcontext, opnd_create_reg(REG_EBX))); |
| instrlist_preinsert(trace, inst, |
| instr_create_restore_from_dcontext(dcontext, REG_EBX, XBX_OFFSET)); |
| /* mod is now in edx */ |
| /* FIXME: |
| * test edx, edx |
| * restore eax and edx |
| * jz unrolled_loop |
| * <body of loop> |
| * jmp top of pre loop |
| * unrolled_loop: |
| * <body of loop> X unroll factor except: |
| * test: "< bound" => "< bound - unrollfactor + 1" |
| * ivar: "ivar += k" => "ivar += unrollfactor * k" |
| */ |
| instrlist_preinsert(trace, inst, |
| instr_create_restore_from_dcontext(dcontext, REG_EDX, XDX_OFFSET)); |
| instrlist_preinsert(trace, inst, |
| instr_create_restore_from_dcontext(dcontext, REG_EAX, XAX_OFFSET)); |
| #endif |
| |
| /* HACK: focus on mmx.c sample loop */ |
| inst = instrlist_first(trace); |
| opnd = instr_get_src(inst, 0); |
| ASSERT(opnd_is_memory_reference(opnd)); |
| opnd_set_size(&opnd, OPSZ_8); |
| check = INSTR_CREATE_movq(dcontext, opnd_create_reg(REG_MM0), opnd); |
| loginst(dcontext, 1, inst, "replacing this"); |
| loginst(dcontext, 1, check, "\twith this"); |
| replace_inst(dcontext, trace, inst, check); |
| |
| inst = instr_get_next(check); |
| opnd = instr_get_src(inst, 0); |
| ASSERT(opnd_is_memory_reference(opnd)); |
| opnd_set_size(&opnd, OPSZ_8); |
| check = INSTR_CREATE_paddd(dcontext, opnd_create_reg(REG_MM0), opnd); |
| loginst(dcontext, 1, inst, "replacing this"); |
| loginst(dcontext, 1, check, "\twith this"); |
| replace_inst(dcontext, trace, inst, check); |
| |
| inst = instr_get_next(check); |
| opnd = instr_get_dst(inst, 0); |
| ASSERT(opnd_is_memory_reference(opnd)); |
| opnd_set_size(&opnd, OPSZ_8); |
| check = INSTR_CREATE_movq(dcontext, opnd, opnd_create_reg(REG_MM0)); |
| loginst(dcontext, 1, inst, "replacing this"); |
| loginst(dcontext, 1, check, "\twith this"); |
| replace_inst(dcontext, trace, inst, check); |
| |
| /* now make induction vars do X unroll duty */ |
| for (i=0; i<num_induction_vars; i++) { |
| if (instr_get_opcode(induction_var[i]) == OP_inc || |
| instr_get_opcode(induction_var[i]) == OP_dec) { |
| /* couldn't convert to add/sub, so duplicate */ |
| instrlist_preinsert(trace, induction_var[i], |
| instr_clone(dcontext, induction_var[i])); |
| } else { |
| opnd = instr_get_src(induction_var[i], 0); |
| ASSERT(opnd_is_immed_int(opnd)); |
| opnd = opnd_create_immed_int(opnd_get_immed_int(opnd)*2, opnd_get_size(opnd)); |
| instr_set_src(induction_var[i], 0, opnd); |
| } |
| } |
| |
| #ifdef DEBUG |
| LOG(THREAD, LOG_OPTS, 1, "\nfinal version of trace:\n"); |
| if ((stats->logmask & LOG_OPTS) != 0) |
| instrlist_disassemble(dcontext, tag, trace, THREAD); |
| #endif |
| } |
| |
| /****************************************************************************/ |
| |
| /* WARNING: this optimization inserts intra-trace loops that are not |
| * considered exit cti's, so they cannot be unlinked/relinked, nor does |
| * linkcount profiling work properly on them. |
| * We need to figure out our official stance on support for this kind |
| * of thing in optimized traces. |
| */ |
| static void |
| unroll_loops(dcontext_t *dcontext, app_pc tag, instrlist_t *trace) |
| { |
| instr_t *inst, *temp; |
| instr_t *branch, *decision, *cmp, *recovery_cmp, *final_jmp; |
| int unroll_factor, cmp_vs, i; |
| bool counting_up; |
| opnd_t cmp_const; |
| uint eflags_6 = 0; |
| |
| /* FIXME: what about loops with cbr at top and ubr at bottom? */ |
| |
| LOG(THREAD, LOG_OPTS, 3, "unroll loop: examining trace with tag "PFX"\n", tag); |
| /* first, make sure we end with a conditional branch (followed by uncond. |
| * for fall-through) |
| */ |
| final_jmp = instrlist_last(trace); |
| if (!instr_is_ubr(final_jmp)) |
| return; |
| branch = instr_get_prev(final_jmp); |
| if (!instr_is_cbr(branch)) |
| return; |
| /* now look for self-loop */ |
| if (opnd_get_pc(instr_get_target(branch)) != tag) |
| return; |
| |
| /* eflags: for simplicity require that eflags be written before read |
| * only need to look at arith flags |
| */ |
| #ifdef DEBUG |
| LOG(THREAD, LOG_OPTS, 3, "\nunroll loop -- checking eflags on:\n"); |
| if (stats->loglevel >= 3 && (stats->logmask & LOG_OPTS) != 0) |
| instrlist_disassemble(dcontext, tag, trace, THREAD); |
| #endif |
| for (inst = instrlist_first(trace); inst != branch; inst = instr_get_next(inst)) { |
| uint eflags = instr_get_arith_flags(inst, DR_QUERY_DEFAULT); |
| if (eflags != 0) { |
| if ((eflags & EFLAGS_READ_6) != 0) { |
| if ((eflags_6 | (eflags & EFLAGS_READ_6)) != eflags_6) { |
| /* we're reading a flag that has not been written yet */ |
| loginst(dcontext, 3, inst, |
| "reads flag before writing it, giving up"); |
| return; |
| } |
| } else if ((eflags & EFLAGS_WRITE_6) != 0) { |
| /* store the flags we're writing, but as read bits */ |
| eflags_6 |= EFLAGS_WRITE_TO_READ((eflags & EFLAGS_WRITE_6)); |
| /* check against read flags (we've shifted them): */ |
| if ((eflags_6 & EFLAGS_READ_6) == EFLAGS_READ_6) { |
| break; /* all written before read */ |
| } |
| } |
| } |
| } |
| /* if get here, eflags are written before read -- and we assume that |
| * our cmp checks below will ensure that exiting the trace will not |
| * have different eflags behavior than the unrolled loop |
| * FIXME: I'm not certain of this |
| */ |
| |
| #ifdef DEBUG |
| LOG(THREAD, LOG_OPTS, 3, "\nunroll loop: found whole-trace self-loop: tag "PFX"\n", tag); |
| if (stats->loglevel >= 3 && (stats->logmask & LOG_OPTS) != 0) |
| instrlist_disassemble(dcontext, tag, trace, THREAD); |
| #endif |
| |
| /********** unroll loop **********/ |
| /* |
| do { |
| body using i; |
| i += inc; |
| } while (i < max); |
| |
| becomes (assuming no eflags problems): |
| |
| while (i < max - (inc * (unrollfactor-1))) { |
| body using i; |
| body using i+inc; |
| body using i+(inc*2); |
| ... |
| body using i+(inc*(unrollfactor-1)); |
| i += (inc * unrollfactor); |
| } |
| while (i < max) { |
| body using i; |
| i += inc; |
| } |
| */ |
| |
| /* FIXME: determine best unroll factor, somehow */ |
| unroll_factor = 2; |
| |
| /* see if we can get the branch into a state that can |
| * have its bounds changed: "cmp var, immed" |
| */ |
| decision = get_decision_instr(branch); |
| if (decision == NULL) { |
| LOG(THREAD, LOG_OPTS, 3, "can't find decision instr\n"); |
| return; |
| } |
| loginst(dcontext, 3, decision, "decision instr"); |
| |
| if (instr_get_opcode(decision) == OP_cmp) { |
| int opcode = instr_get_opcode(branch); |
| switch (opcode) { |
| case OP_jb: counting_up = true; break; |
| case OP_jnb: counting_up = false; break; |
| case OP_jbe: counting_up = true; break; |
| case OP_jnbe: counting_up = false; break; |
| case OP_jl: counting_up = true; break; |
| case OP_jnl: counting_up = false; break; |
| case OP_jle: counting_up = true; break; |
| case OP_jnle: counting_up = false; break; |
| case OP_js: counting_up = true; break; |
| case OP_jns: counting_up = false; break; |
| default: |
| loginst(dcontext, 3, branch, "cannot handle decision branch"); |
| return; |
| } |
| } else if (instr_get_opcode(decision) == OP_inc || |
| instr_get_opcode(decision) == OP_add || |
| instr_get_opcode(decision) == OP_adc) |
| counting_up = true; |
| else if (instr_get_opcode(decision) == OP_dec || instr_get_opcode(decision) == OP_sub) |
| counting_up = false; |
| else { |
| LOG(THREAD, LOG_OPTS, 3, "can't figure out direction of loop index var\n"); |
| return; |
| } |
| |
| if (instr_get_opcode(decision) == OP_cmp) { |
| cmp = decision; |
| } else { |
| /* common loop type: ends with "dec var, jns" */ |
| if (instr_get_opcode(decision) == OP_inc || instr_get_opcode(decision) == OP_dec) { |
| /* FIXME: shadowing cmp_const */ |
| opnd_t cmp_var, cmp_const; |
| int opcode; |
| cmp_var = instr_get_dst(decision, 0); |
| if (instr_get_opcode(branch) == OP_jns) { |
| /* jump if non-negative */ |
| cmp_const = OPND_CREATE_INT8(0); |
| opcode = OP_jge; |
| } else if (instr_get_opcode(branch) == OP_js) { |
| /* jump if negative */ |
| cmp_const = OPND_CREATE_INT8(0); |
| opcode = OP_jl; |
| } else { |
| loginst(dcontext, 3, branch, "can't handle loop branch"); |
| return; |
| } |
| cmp = INSTR_CREATE_cmp(dcontext, cmp_var, cmp_const); |
| instrlist_preinsert(trace, branch, cmp); |
| temp = INSTR_CREATE_jcc(dcontext, opcode, instr_get_target(branch)); |
| replace_inst(dcontext, trace, branch, temp); |
| branch = temp; |
| |
| /* replace with add/sub for easy stride changing |
| * if we fail, give up, not because we can't inc twice, but |
| * because of eflags concerns |
| */ |
| if (!replace_inc_with_add(dcontext, decision, trace)) { |
| loginst(dcontext, 3, decision, "couldn't replace with add/sub"); |
| return; |
| } |
| } else { |
| /* give up -- if add cases in future, remember to deal w/ eflags */ |
| loginst(dcontext, 3, decision, "can't handle loop branch decision"); |
| return; |
| } |
| } |
| /* FIXME: detect loop invariants, and allow them as constants |
| * requires adding extra instructions to compute bounds |
| */ |
| if (!opnd_is_immed_int(instr_get_src(cmp, 1))) { |
| loginst(dcontext, 3, cmp, "cmp is not vs. constant"); |
| return; |
| } |
| |
| /* make recovery loop */ |
| recovery_cmp = instr_clone(dcontext, cmp); |
| instrlist_preinsert(trace, final_jmp, recovery_cmp); |
| temp = instr_clone(dcontext, branch); |
| instr_invert_cbr(temp); |
| instr_set_target(temp, instr_get_target(final_jmp)); |
| instrlist_preinsert(trace, final_jmp, temp); |
| for (inst = instrlist_first(trace); inst != cmp; |
| inst = instr_get_next(inst)) { |
| instrlist_preinsert(trace, final_jmp, instr_clone(dcontext, inst)); |
| } |
| /* now change final jmp to loop to recovery loop check */ |
| instr_set_target(final_jmp, opnd_create_instr(recovery_cmp)); |
| |
| /* unroll: |
| * duplicate every instruction up to cmp at end |
| */ |
| temp = instr_get_prev(cmp); |
| for (i=1; i<unroll_factor; i++) { |
| for (inst = instrlist_first(trace); inst != cmp; |
| inst = instr_get_next(inst)) { |
| instrlist_preinsert(trace, cmp, instr_clone(dcontext, inst)); |
| if (inst == temp) /* avoid infinite loop */ |
| break; |
| } |
| } |
| |
| /* now switch unrolled loop from do-while to while */ |
| |
| /* put jcc up front */ |
| temp = instr_clone(dcontext, branch); |
| instr_invert_cbr(temp); |
| instr_set_target(temp, opnd_create_instr(recovery_cmp)); |
| instrlist_prepend(trace, temp); |
| |
| /* now stick cmp in front of it */ |
| cmp_vs = (int) opnd_get_immed_int(instr_get_src(cmp, 1)); |
| if (counting_up) |
| cmp_vs -= (unroll_factor - 1); |
| else |
| cmp_vs += (unroll_factor - 1); |
| if (cmp_vs >= -128 && cmp_vs <= 127) |
| cmp_const = OPND_CREATE_INT8(cmp_vs); |
| else |
| cmp_const = OPND_CREATE_INT32(cmp_vs); |
| instrlist_prepend(trace, |
| INSTR_CREATE_cmp(dcontext, instr_get_src(cmp, 0), cmp_const)); |
| |
| /* now change end-of-unrolled-loop jcc to be a jmp to top cmp */ |
| instr_set_opcode(branch, OP_jmp); |
| instr_set_target(branch, opnd_create_instr(instrlist_first(trace))); |
| |
| /* remove end-of-unrolled-loop cmp */ |
| instrlist_remove(trace, cmp); |
| instr_destroy(dcontext, cmp); |
| |
| /* control flow is all set, now combine induction var updates |
| * FIXME: NOT DONE YET |
| */ |
| |
| #ifdef DEBUG |
| opt_stats_t.loops_unrolled++; |
| LOG(THREAD, LOG_OPTS, 3, "\nfinal version of unrolled trace:\n"); |
| if (stats->loglevel >= 3 && (stats->logmask & LOG_OPTS) != 0) |
| instrlist_disassemble(dcontext, tag, trace, THREAD); |
| #endif |
| } |
| |
| #ifdef IA32_ON_IA64 |
| /***************************************************************************/ |
| /* from Tim */ |
| /* tests the jmpe and jmpe extensions to IA32 for IA64 environments */ |
| static bool have_done = false; |
| static bool checked = false; |
| static int test1; |
| static int test2; |
| static int test3; |
| |
| static void |
| test_i64(dcontext_t *dcontext, app_pc tag, instrlist_t *trace) |
| { |
| uint * i64_code; |
| uint * i64_code_start; |
| int check = 0; |
| instr_t * inst; |
| if (!have_done) { |
| #ifdef DEBUG |
| opt_stats_t.i64_test = false; |
| LOG(THREAD, LOG_OPTS, 1, "\nadding ia64 test\n"); |
| if (stats->loglevel >= 1 && (stats->logmask & LOG_OPTS) != 0) |
| instrlist_disassemble(dcontext, tag, trace, THREAD); |
| #endif |
| have_done = true; |
| test1 = 0; |
| test2 = 0; |
| test3 = 0; |
| /* get the mem, get extra for alignment considerations */ |
| i64_code = (uint *)heap_alloc(dcontext, 16*sizeof(uint) HEAPACCT(ACCT_OTHER)); |
| /* ensure proper aligment, 16 byte */ |
| while ((((ptr_uint_t)i64_code) & 0xf) != 0) { |
| i64_code++; |
| check++; |
| } |
| ASSERT(check <= 4); |
| i64_code_start = i64_code; |
| /* emit itanium instruction */ |
| /* instruction byte order is */ |
| /* little endian, IA-32 is also little endian so set there */ |
| /* use reg 1 since jmpe will leave a return address there */ |
| /* branch nop is easy, is just 001000000.. for 41 bits */ |
| |
| /* first instruction bundle */ |
| /* just move */ |
| /* |
| *i64_code = 0x00000002; |
| i64_code++; |
| *i64_code = 0x08100001; |
| i64_code++; |
| *i64_code = 0x00038004; |
| i64_code++; |
| *i64_code = 0x00040000; |
| i64_code++; |
| */ |
| |
| /* mov and inc reg9 (ecx) bundle */ |
| *i64_code = 0x12044802; |
| i64_code++; |
| *i64_code = 0x08102100; |
| i64_code++; |
| *i64_code = 0x00038004; |
| i64_code++; |
| *i64_code = 0x00040000; |
| i64_code++; |
| |
| /* br.ia instruction bundle */ |
| *i64_code = 0x0000001d; |
| i64_code++; |
| *i64_code = 0x00000001; |
| i64_code++; |
| *i64_code = 0x20000200; |
| i64_code++; |
| *i64_code = 0x00800010; |
| |
| |
| /* add testing prefix to trace */ |
| inst = instrlist_first(trace); |
| |
| /* store state, and pref for tests */ |
| instrlist_preinsert(trace, inst, |
| instr_create_save_to_dcontext(dcontext, REG_ECX, XCX_OFFSET)); |
| instrlist_preinsert(trace, inst, |
| instr_create_save_to_dcontext(dcontext, REG_EBX, XBX_OFFSET)); |
| #ifdef DEBUG |
| instrlist_preinsert(trace, inst, |
| instr_create_save_to_dcontext(dcontext, REG_EAX, XAX_OFFSET)); |
| instrlist_preinsert(trace, inst, |
| INSTR_CREATE_lahf(dcontext)); |
| instrlist_preinsert(trace, inst, |
| INSTR_CREATE_inc(dcontext, opnd_create_base_disp(REG_NULL, REG_NULL, 1, (int)&(opt_stats_t.ia64_num_entries), OPSZ_4_short2))); |
| #endif |
| instrlist_preinsert(trace, inst, |
| INSTR_CREATE_mov_imm(dcontext, opnd_create_reg(REG_ECX), OPND_CREATE_INT32(1))); |
| instrlist_preinsert(trace, inst, |
| INSTR_CREATE_mov_st(dcontext, opnd_create_base_disp(REG_NULL, REG_NULL, 1, (int)&test1, OPSZ_4_short2), opnd_create_reg(REG_ECX))); |
| |
| /* test jmpe */ |
| instrlist_preinsert(trace, inst, |
| INSTR_CREATE_mov_imm(dcontext, opnd_create_reg(REG_EBX), OPND_CREATE_INT32((int)i64_code_start))); |
| instrlist_preinsert(trace, inst, |
| INSTR_CREATE_jmpe(dcontext, opnd_create_reg(REG_EBX))); |
| instrlist_preinsert(trace, inst, |
| INSTR_CREATE_mov_st(dcontext, opnd_create_base_disp(REG_NULL, REG_NULL, 1, (int)&test2, OPSZ_4_short2), opnd_create_reg(REG_ECX))); |
| |
| /* test jmpe_abs */ |
| instrlist_preinsert(trace, inst, |
| INSTR_CREATE_jmpe_abs(dcontext, opnd_create_pc((app_pc)i64_code_start))); |
| instrlist_preinsert(trace, inst, |
| INSTR_CREATE_mov_st(dcontext, opnd_create_base_disp(REG_NULL, REG_NULL, 1, (int)&test3, OPSZ_4_short2), opnd_create_reg(REG_ECX))); |
| |
| /* cleanup */ |
| #ifdef DEBUG |
| instrlist_preinsert(trace, inst, |
| INSTR_CREATE_sahf(dcontext)); |
| instrlist_preinsert(trace, inst, |
| instr_create_restore_from_dcontext(dcontext, REG_EAX, XAX_OFFSET)); |
| #endif |
| instrlist_preinsert(trace, inst, |
| instr_create_restore_from_dcontext(dcontext, REG_ECX, XCX_OFFSET)); |
| instrlist_preinsert(trace, inst, |
| instr_create_restore_from_dcontext(dcontext, REG_EBX, XBX_OFFSET)); |
| #ifdef DEBUG |
| LOG(THREAD, LOG_OPTS, 1, "\ndone adding ia64 test\n"); |
| if (stats->loglevel >= 1 && (stats->logmask & LOG_OPTS) != 0) |
| instrlist_disassemble(dcontext, tag, trace, THREAD); |
| #endif |
| LOG(THREAD, LOG_OPTS, 1, "i64_test cur vals test1 : %d test2 : %d test3 : %d\n", test1, test2, test3); |
| } else { |
| if (!(test1 == 1 && test2 == 2 && test3 == 3)) { |
| LOG(THREAD, LOG_OPTS, 1, "i64_test cur vals test1 : %d test2 : %d test3 : %d\n", test1, test2, test3); |
| } else if (!checked) { |
| LOG(THREAD, LOG_OPTS, 1, "i64_test cur vals test1 : %d test2 : %d test3 : %d\n", test1, test2, test3); |
| #ifdef DEBUG |
| opt_stats_t.i64_test = true; |
| checked = true; |
| } |
| #endif |
| } |
| } |
| #endif |
| |
| /***************************************************************************/ |
| /* from Tim */ |
| /* a simple non-optimization that counts the number of instructions processed */ |
| /* maby extend it later to gather more statistics, size distribution */ |
| /* op distribution, etc. if ever desired */ |
| |
| static void |
| instr_counts(dcontext_t *dcontext, app_pc tag, instrlist_t *trace, bool pre) |
| { |
| #ifdef DEBUG |
| instr_t *inst; |
| int jmps = 0; |
| int instrs = 0; |
| for (inst = instrlist_first(trace); inst != NULL; inst = instr_get_next(inst)) { |
| instrs++; |
| if (instr_is_cbr(inst)) |
| jmps++; |
| } |
| if (pre) { |
| LOG(THREAD, LOG_OPTS, 2, "Prior to optimization\n %d instrs in trace\n %d jmps exiting trace\n", instrs, jmps); |
| opt_stats_t.pre_num_instrs_seen += instrs; |
| opt_stats_t.pre_num_jmps_seen += jmps; |
| } else { |
| LOG(THREAD, LOG_OPTS, 2, "After optimization\n %d instrs in trace\n %d jmps exiting trace\n", instrs, jmps); |
| opt_stats_t.post_num_instrs_seen += instrs; |
| opt_stats_t.post_num_jmps_seen += jmps; |
| } |
| #endif |
| } |
| |
| |
| /***************************************************************************/ |
| /* from Tim */ |
| /* Constant Propagation */ |
| |
| /* utility structures */ |
| #define PS_VALID_VAL 0x01 |
| #define PS_LVALID_VAL 0x02 /* high and low parts, only used for regs */ |
| #define PS_HVALID_VAL 0x04 |
| #define PS_KEEP 0x80 |
| |
| #define NUM_CONSTANT_ADDRESS 24 |
| #define NUM_STACK_SLOTS 24 |
| |
| |
| static int cp_global_aggr; |
| static int cp_local_aggr; |
| |
| /* holds the current state of the propagation */ |
| typedef struct _prop_state_t { |
| dcontext_t *dcontext; |
| instrlist_t *trace; |
| instr_t *hint; |
| byte reg_state[8]; |
| int reg_vals[8]; |
| /* constant address */ |
| int addresses[NUM_CONSTANT_ADDRESS]; |
| int address_vals[NUM_CONSTANT_ADDRESS]; |
| byte address_state[NUM_CONSTANT_ADDRESS]; |
| |
| /* stack */ |
| int cur_scope; |
| |
| int stack_offsets_ebp[NUM_STACK_SLOTS]; |
| int stack_vals[NUM_STACK_SLOTS]; |
| |
| int stack_scope[NUM_STACK_SLOTS]; |
| byte stack_address_state[NUM_STACK_SLOTS]; |
| /* add esp offsets in the future ? */ |
| bool lost_scope_count; |
| } prop_state_t; |
| |
| |
| static void |
| set_stack_val(prop_state_t *state, int disp, int val, byte flags) |
| { |
| #ifdef DEBUG |
| dcontext_t *dcontext = state->dcontext; |
| #endif |
| bool cont = true; |
| int i; |
| for (i = 0; (i < NUM_STACK_SLOTS) && cont; i++) { |
| if (state->stack_address_state[i] == 0) { |
| state->stack_offsets_ebp[i] = disp; |
| state->stack_vals[i] = val; |
| state->stack_scope[i] = state->cur_scope; |
| state->stack_address_state[i] = flags; |
| cont = false; |
| } |
| } |
| if (cont) { |
| LOG(THREAD, LOG_OPTS, 3, "stack cache overflow\n"); |
| i = disp % NUM_STACK_SLOTS; |
| ASSERT(i>0 && i<NUM_STACK_SLOTS); |
| state->stack_offsets_ebp[i] = disp; |
| state->stack_vals[i] = val; |
| state->stack_scope[i] = state->cur_scope; |
| state->stack_address_state[i] = flags; |
| } |
| |
| LOG(THREAD, LOG_OPTS, 3, " stack cache add: "PFX" val "PFX" scope depth %d flags 0x%02x\n", disp, val, state->cur_scope, flags); |
| } |
| |
| static void |
| update_stack_val(prop_state_t *state, int disp, int val) |
| { |
| #ifdef DEBUG |
| dcontext_t *dcontext = state->dcontext; |
| #endif |
| bool cont = true; |
| int i; |
| for (i = 0; i < NUM_STACK_SLOTS && cont; i++) { |
| if (state->stack_offsets_ebp[i] == disp && |
| state->stack_scope[i] == state->cur_scope) { |
| state->stack_vals[i] = val; |
| state->stack_address_state[i] |= PS_VALID_VAL; |
| cont = false; |
| LOG(THREAD, LOG_OPTS, 3, " stack cache update disp "PFX" to value "PFX" at stack depth %d\n", disp, val, state->cur_scope); |
| } |
| } |
| if (cp_local_aggr > 2) { |
| if (cont) |
| set_stack_val(state, disp, val, PS_VALID_VAL); |
| } |
| } |
| |
| #if 0 /* not used */ |
| static bool |
| check_stack_val(prop_state_t *state, int disp, int val, bool valid) |
| { |
| int i; |
| for (i = 0; i < NUM_STACK_SLOTS; i++) { |
| if (state->stack_offsets_ebp[i] == disp && |
| state->stack_scope[i] == state->cur_scope) { |
| if ((state->stack_address_state[i] & PS_VALID_VAL) != 0) { |
| if (valid) |
| ASSERT(state->stack_vals[i] == val); |
| return false; |
| } |
| return valid; |
| } |
| } |
| return true; |
| } |
| #endif |
| |
| static void |
| clear_stack_val(prop_state_t *state, int address) |
| { |
| #ifdef DEBUG |
| dcontext_t *dcontext = state->dcontext; |
| #endif |
| int i; |
| bool cont=true; |
| for (i = 0; i < NUM_STACK_SLOTS && cont; i++) { |
| if (state->stack_offsets_ebp[i] == address && |
| state->stack_scope[i] == state->cur_scope) { |
| state->stack_address_state[i] &= PS_KEEP; |
| cont=false; |
| LOG(THREAD, LOG_OPTS, 3, " load constant cache cleared: disp "PFX" stack depth %d \n", address, state->cur_scope); |
| } |
| } |
| } |
| |
| /* adds an address value pair to the constant address cache */ |
| static void |
| set_address_val(prop_state_t *state, int address, int val, byte flags) |
| { |
| #ifdef DEBUG |
| dcontext_t *dcontext = state->dcontext; |
| #endif |
| bool cont = true; |
| int i; |
| for (i = 0; (i < NUM_CONSTANT_ADDRESS) && cont; i++) { |
| if (state->address_state[i] == 0) { |
| state->addresses[i] = address; |
| state->address_vals[i] = val; |
| state->address_state[i] = flags; |
| cont = false; |
| } |
| } |
| if (cont) { |
| LOG(THREAD, LOG_OPTS, 3, "constant address cache overflow\n"); |
| i = address % NUM_CONSTANT_ADDRESS; |
| ASSERT(i>0 && i<NUM_CONSTANT_ADDRESS); |
| state->addresses[i] = address; |
| state->address_vals[i] = val; |
| state->address_state[i] = flags; |
| } |
| LOG(THREAD, LOG_OPTS, 3, " load const cache add: "PFX" val "PFX" flags 0x%02x\n", address, val, flags); |
| } |
| |
| /* updates and address value pair in the constant address cache if the address is |
| * already there, else adds it */ |
| static void |
| update_address_val(prop_state_t *state, int address, int val) |
| { |
| #ifdef DEBUG |
| dcontext_t *dcontext = state->dcontext; |
| #endif |
| bool cont = true; |
| int i; |
| for (i = 0; i < NUM_CONSTANT_ADDRESS; i++) { |
| if (state->addresses[i] == address) { |
| state->address_vals[i] = val; |
| state->address_state[i] |= PS_VALID_VAL; |
| cont = false; |
| LOG(THREAD, LOG_OPTS, 3, " load const cache update: "PFX" val "PFX"\n", address, val); |
| } |
| } |
| if (cp_global_aggr > 2) { |
| if (cont) |
| set_address_val(state, address, val, PS_VALID_VAL); |
| } |
| } |
| |
| /* removes the address from the constant address cache */ |
| static void |
| clear_address_val(prop_state_t *state, int address) |
| { |
| #ifdef DEBUG |
| dcontext_t *dcontext = state->dcontext; |
| #endif |
| int i; |
| for (i = 0; i < NUM_CONSTANT_ADDRESS; i++) { |
| if (state->addresses[i] == address) { |
| state->address_state[i] &= PS_KEEP; |
| LOG(THREAD, LOG_OPTS, 3, " load constant cache cleared: "PFX"\n", address); |
| } |
| } |
| } |
| |
| /* gets the value of a const address to an immutable region in memory |
| * assumes that const_add_const_mem has already been called on this |
| * and returned true |
| */ |
| |
| static int |
| get_immutable_value(opnd_t address, prop_state_t *state, int size) |
| { |
| int result; |
| |
| switch(size) { |
| case OPSZ_1: |
| { |
| char *ptr_byte = ((char*)(ptr_int_t)opnd_get_disp(address)); |
| IF_X64(ASSERT_NOT_IMPLEMENTED(false)); |
| result = *ptr_byte; |
| break; |
| } |
| case OPSZ_2: |
| { |
| short *ptr_byte = ((short*)(ptr_int_t)opnd_get_disp(address)); |
| IF_X64(ASSERT_NOT_IMPLEMENTED(false)); |
| result = *ptr_byte; |
| break; |
| } |
| case OPSZ_4: |
| { |
| int *ptr_byte = ((int*)(ptr_int_t)opnd_get_disp(address)); |
| IF_X64(ASSERT_NOT_IMPLEMENTED(false)); |
| result = *ptr_byte; |
| break; |
| } |
| default : |
| { |
| /* can't handle size, log is usually quadwords for floats */ |
| #ifdef DEBUG |
| dcontext_t *dcontext = state->dcontext; |
| logopnd(state->dcontext, 3, address, "Couldn't handle size in get_immutable_value"); |
| LOG(THREAD, LOG_OPTS, 3, "Address size was %d\n", size); |
| #endif |
| /* should never get here, since const_address_const_mem should fail */ |
| result = 0; |
| ASSERT_NOT_REACHED(); |
| } |
| } |
| |
| return result; |
| } |
| |
| |
| /* returns true if the opnd is a stack address (ebp) |
| * i.e. is memory access with ebp as reg base and null as index reg */ |
| static bool |
| opnd_is_stack_address(opnd_t address) |
| { |
| return (opnd_is_near_base_disp(address) && |
| (opnd_get_base(address) == REG_EBP) && |
| (opnd_get_index(address) == REG_NULL)); |
| } |
| |
| /* true if addresses (which must be a constant address) is an |
| * immutable region of memory */ |
| static bool |
| const_address_const_mem(opnd_t address, prop_state_t *state, bool prefix_data) |
| { |
| bool success = false; |
| int size = opnd_get_size(address); |
| logopnd(state->dcontext, 3, address, " checking const address const mem\n"); |
| if (size == OPSZ_4_short2) { |
| if (prefix_data) |
| size = OPSZ_2; |
| else |
| size = OPSZ_4; |
| } |
| if (size != OPSZ_1 && size != OPSZ_2 && size != OPSZ_4) { |
| /* can't handle size, is usually quadwords for floats */ |
| #ifdef DEBUG |
| dcontext_t *dcontext = state->dcontext; |
| logopnd(state->dcontext, 3, address, "Couldn't handle size in const_address_const_mem"); |
| LOG(THREAD, LOG_OPTS, 3, "Address size was %d\n", size); |
| #endif |
| return false; |
| } |
| |
| /* FIXME : is is_execuatable always right here? */ |
| /* i.e. is it going to be true, forever, that this location isn't writable */ |
| IF_X64(ASSERT_NOT_IMPLEMENTED(false)); |
| if (cp_global_aggr > 1 && |
| is_executable_address((app_pc)(ptr_int_t)opnd_get_disp(address))) |
| success = true; |
| |
| return success; |
| } |
| |
| /* takes an opnd and returns a simplified version, simplifies address and |
| * regs based on the information in propState |
| */ |
| static opnd_t |
| propagate_address(opnd_t old, prop_state_t *state) |
| { |
| reg_id_t base_reg, index_reg, seg; |
| int disp, scale; |
| opnd_size_t size; |
| bool modified; |
| |
| if (!opnd_is_memory_reference(old)) |
| return old; |
| IF_X64(ASSERT_NOT_IMPLEMENTED(false)); /* rel and abs mem refs NYI */ |
| /* tries to simplify the address calculation with propagated values */ |
| base_reg = opnd_get_base(old) - REG_START_32; |
| disp = opnd_get_disp(old); |
| index_reg = opnd_get_index(old) - REG_START_32; |
| scale = opnd_get_scale(old); |
| seg = REG_NULL; |
| size = opnd_get_size(old); |
| modified = false; |
| |
| if (opnd_is_far_base_disp(old)) { |
| seg = opnd_get_segment(old); |
| } |
| |
| if (index_reg < 8 /* rules out underflow */ && |
| ((state->reg_state[index_reg] & PS_VALID_VAL) != 0)) { |
| |
| disp += (state->reg_vals[index_reg] * scale); |
| index_reg = REG_NULL; |
| modified = true; |
| } else { |
| index_reg += REG_START_32; |
| } |
| |
| if (base_reg < 8 /* rules out underflow */ && |
| ((state->reg_state[base_reg] & PS_VALID_VAL) != 0)) { |
| |
| disp += state->reg_vals[base_reg]; |
| /* don't think this is necessary *******FIXME************* |
| if ((seg == REG_NULL) && ((base_reg + REG_START_32 == REG_ESP) || |
| (base_reg + REG_START_32 == REG_EBP))) { |
| seg = SEG_SS; |
| } |
| */ |
| base_reg = REG_NULL; |
| modified = true; |
| } else { |
| base_reg += REG_START_32; |
| } |
| |
| if (!modified) |
| return old; |
| |
| if (seg == REG_NULL) { |
| /* return base disp */ |
| return opnd_create_base_disp(base_reg, index_reg, scale, disp, size); |
| } |
| |
| /* return far base disp */ |
| return opnd_create_far_base_disp(seg, base_reg, index_reg, scale, disp, size); |
| } |
| |
| /* attempts to simplify the opnd with propagated information */ |
| |
| static opnd_t |
| propagate_opnd(opnd_t old, prop_state_t *state, bool prefix_data) |
| { |
| |
| reg_id_t reg; |
| int immed, disp, i; |
| opnd_size_t size=OPSZ_NA; |
| #ifdef DEBUG |
| dcontext_t *dcontext = state->dcontext; |
| #endif |
| |
| if (opnd_is_reg(old)) { |
| reg = opnd_get_reg(old) - REG_START_32; |
| if (reg < 8 /* rules out underflow */) { |
| if ((state->reg_state[reg] & PS_VALID_VAL) != 0) { |
| immed = state->reg_vals[reg]; |
| return opnd_create_immed_int(immed, OPSZ_4); |
| } else |
| return old; |
| } |
| reg = opnd_get_reg(old) - REG_START_16; |
| if (reg < 8 /* rules out underflow */) { |
| if ((state->reg_state[reg] & PS_VALID_VAL) != 0 || |
| ((state->reg_state[reg] & PS_LVALID_VAL) != 0 && |
| (state->reg_state[reg] & PS_HVALID_VAL) != 0)) { |
| /* mask out top part of register */ |
| immed = state->reg_vals[reg] & 0x0000ffff; |
| /* sign extend if negative */ |
| if ((immed & 0x00008000) != 0) |
| immed |= 0xffff0000; |
| return opnd_create_immed_int(immed, OPSZ_2); |
| } else |
| return old; |
| } |
| reg = opnd_get_reg(old) - REG_START_8; |
| if (reg < 4 /* rules out underflow */ && |
| ((state->reg_state[reg] & PS_VALID_VAL) != 0 || |
| (state->reg_state[reg] & PS_LVALID_VAL) != 0)) { |
| /* is low part */ |
| /* mask out top part of register */ |
| immed = state->reg_vals[reg] & 0x000000ff; |
| /* sign extend if negative */ |
| if ((immed & 0x00000080) != 0) |
| immed |= 0xffffff00; |
| return opnd_create_immed_int(immed, OPSZ_1); |
| } |
| reg -= 4; |
| if (reg < 4 /* rules out underflow */ && |
| ((state->reg_state[reg] & PS_VALID_VAL) != 0 || |
| (state->reg_state[reg] & PS_HVALID_VAL) != 0)) { |
| /* is high part */ |
| /* mask out part of register */ |
| immed = state->reg_vals[reg] & 0x0000ff00; |
| /* shift */ |
| immed = immed >> 8; |
| /* sign extend if negative */ |
| if ((immed & 0x00000080) != 0) |
| immed |= 0xffffff00; |
| return opnd_create_immed_int(immed, OPSZ_1); |
| } |
| return old; |
| } |
| |
| old = propagate_address(old, state); |
| |
| /* if address, get size */ |
| if (opnd_is_memory_reference(old)) { |
| size = opnd_get_size(old); |
| /* handle variable size */ |
| if (size == OPSZ_4_short2) { |
| if (prefix_data) |
| size = OPSZ_2; |
| else |
| size = OPSZ_4; |
| } |
| } |
| |
| if (opnd_is_stack_address(old) && cp_local_aggr > 0) { |
| // check stack value |
| disp = opnd_get_disp(old); |
| for (i = 0; i < NUM_STACK_SLOTS; i++) { |
| if (state->stack_offsets_ebp[i] == disp && |
| state->cur_scope == state->stack_scope[i] && |
| (state->stack_address_state[i] & PS_VALID_VAL) != 0) { |
| LOG(THREAD, LOG_OPTS, 3, " at stack depth %d ", state->cur_scope); |
| logopnd(state->dcontext, 3, old, " found cached stack address"); |
| immed = state->stack_vals[i]; |
| return opnd_create_immed_int(immed, size); |
| } |
| } |
| } |
| |
| if (opnd_is_constant_address(old) && cp_global_aggr > 0) { |
| if (const_address_const_mem(old, state, prefix_data)) { |
| #ifdef DEBUG |
| logopnd(state->dcontext, 2, old, " found const address const mem\n"); |
| opt_stats_t.num_const_add_const_mem++; |
| #endif |
| immed = get_immutable_value(old, state, size); |
| return opnd_create_immed_int(immed, size); |
| } else { |
| // check for constant address |
| disp = opnd_get_disp(old); |
| for (i = 0; i < NUM_CONSTANT_ADDRESS; i++) { |
| if (state->addresses[i] == disp && (state->address_state[i] & PS_VALID_VAL) != 0) { |
| logopnd(state->dcontext, 3, old, " found cached constant address\n"); |
| immed = state->address_vals[i]; |
| return opnd_create_immed_int(immed, size); |
| } |
| } |
| } |
| } |
| return old; |
| } |
| |
| /* checks an eflags and eflags_valid to see if a particular flag flag is valid |
| * and has appropriate value */ |
| static bool |
| check_flag_val(uint flag, int val, uint eflags, uint eflags_valid) |
| { |
| if ((eflags_valid & flag) != 0) { |
| if (((val == 1) && ((flag & eflags) != 0)) || |
| ((val == 0) && ((flag & eflags) == 0))) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| /* checks and eflags and an eflags_valid and checks to see that the two given flags |
| * are both valid and set either same (if same is true) or different (if same is false) */ |
| static bool |
| compare_flag_vals(uint flag1, uint flag2, bool same, uint eflags, uint eflags_valid) |
| { |
| if (((eflags_valid & flag1) != 0) && |
| ((eflags_valid & flag2) != 0)) { |
| if ((same && (((flag1 & eflags) != 0) == |
| ((flag2 & eflags) != 0))) || |
| ((!same) && (((flag1 & eflags) != 0) != |
| ((flag2 & eflags) != 0)))) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| /* returns true if, given the passed in flag information the jump |
| * will never be taken */ |
| static bool |
| removable_jmp(instr_t *inst, uint eflags, uint eflags_valid) |
| { |
| int opcode = instr_get_opcode(inst); |
| switch(opcode) { |
| case OP_jno_short: case OP_jno: |
| { |
| return check_flag_val(EFLAGS_READ_OF, 1, eflags, eflags_valid); |
| } |
| case OP_jo_short: case OP_jo: |
| { |
| return check_flag_val(EFLAGS_READ_OF, 0, eflags, eflags_valid); |
| } |
| case OP_jnb_short: case OP_jnb: |
| { |
| return check_flag_val(EFLAGS_READ_CF, 1, eflags, eflags_valid); |
| } |
| case OP_jb_short: case OP_jb: |
| { |
| return check_flag_val(EFLAGS_READ_CF, 0, eflags, eflags_valid); |
| } |
| case OP_jnz_short: case OP_jnz: |
| { |
| return check_flag_val(EFLAGS_READ_ZF, 1, eflags, eflags_valid); |
| } |
| case OP_jz_short: case OP_jz: |
| { |
| return check_flag_val(EFLAGS_READ_ZF, 0, eflags, eflags_valid); |
| } |
| case OP_jnbe_short: case OP_jnbe: |
| { |
| return (check_flag_val(EFLAGS_READ_CF, 1, eflags, eflags_valid) || |
| check_flag_val(EFLAGS_READ_ZF, 1, eflags, eflags_valid)); |
| } |
| case OP_jbe_short: case OP_jbe: |
| { |
| return (check_flag_val(EFLAGS_READ_CF, 0, eflags, eflags_valid) && |
| check_flag_val(EFLAGS_READ_ZF, 0, eflags, eflags_valid)); |
| } |
| case OP_jns_short: case OP_jns: |
| { |
| return check_flag_val(EFLAGS_READ_SF, 1, eflags, eflags_valid); |
| } |
| case OP_js_short: case OP_js: |
| { |
| return check_flag_val(EFLAGS_READ_SF, 0, eflags, eflags_valid); |
| } |
| case OP_jnp_short: case OP_jnp: |
| { |
| return check_flag_val(EFLAGS_READ_PF, 1, eflags, eflags_valid); |
| } |
| case OP_jp_short: case OP_jp: |
| { |
| return check_flag_val(EFLAGS_READ_PF, 0, eflags, eflags_valid); |
| } |
| case OP_jnl_short: case OP_jnl: |
| { |
| return compare_flag_vals(EFLAGS_READ_SF, EFLAGS_READ_OF, false, eflags, eflags_valid); |
| } |
| case OP_jl_short: case OP_jl: |
| { |
| return compare_flag_vals(EFLAGS_READ_SF, EFLAGS_READ_OF, true, eflags, eflags_valid); |
| } |
| case OP_jnle_short: case OP_jnle: |
| { |
| return (check_flag_val(EFLAGS_READ_ZF, 1, eflags, eflags_valid) || |
| compare_flag_vals(EFLAGS_READ_SF, EFLAGS_READ_OF, false, eflags, eflags_valid)); |
| } |
| case OP_jle_short: case OP_jle: |
| { |
| return (check_flag_val(EFLAGS_READ_ZF, 0, eflags, eflags_valid) && |
| compare_flag_vals(EFLAGS_READ_SF, EFLAGS_READ_OF, true, eflags, eflags_valid)); |
| } |
| } |
| return false; |
| } |
| |
| /* takes in an eflags, eflags_valid and eflags_invalid and propagates the information |
| * forward simplifying instructions and eliminating jumps where possible, returns false |
| * if it reaches a non simplifiable instruction depends on the any of the eflags_valid |
| * or eflags_invalid before all flags in valid and invalid are overwritten by instructions */ |
| static bool |
| do_forward_check_eflags(instr_t *inst, uint eflags, uint eflags_valid, uint eflags_invalid, prop_state_t *state) |
| { |
| #ifdef DEBUG |
| dcontext_t *dcontext = state->dcontext; |
| #endif |
| instr_t *next_inst, *temp=NULL; |
| uint eflags_check; |
| int opcode; |
| uint temp_ef; |
| bool replace = false;; |
| if ((eflags_valid == 0) && (eflags_invalid == 0)) |
| return true; |
| for (inst = instr_get_next(inst); inst != NULL; inst = next_inst) { |
| next_inst = instr_get_next(inst); |
| loginst(state->dcontext, 3, inst, " flag checking "); |
| while ((inst != NULL) && instr_is_cti(inst)) { |
| LOG(THREAD, LOG_OPTS, 3, "attempting to remove, eflags = "PFX" eflags valid = "PFX"\n", eflags, eflags_valid); |
| if (removable_jmp(inst, eflags, eflags_valid)) { |
| #ifdef DEBUG |
| opt_stats_t.num_jmps_simplified++; |
| loginst(state->dcontext, 3, inst, " removing this jmp "); |
| #endif |
| remove_inst(state->dcontext, state->trace, inst); |
| inst = next_inst; |
| next_inst = instr_get_next(inst); |
| } else { |
| if (INTERNAL_OPTION(unsafe_ignore_eflags_trace) && |
| instr_get_opcode(inst) == OP_jecxz) |
| return true; |
| LOG(THREAD, LOG_OPTS, 3, "forward eflags check failed (1)\n"); |
| return false; |
| } |
| } |
| if ((inst == NULL) || instr_is_interrupt(inst) || instr_is_call(inst)) { |
| LOG(THREAD, LOG_OPTS, 3, "forward eflags check failed (2)\n"); |
| return false; |
| } |
| |
| // probably move to own method later if expanded to others |
| // FIXME cmov's other setcc's cmc |
| // don't bother to change to xor for zeroing, is not more efficient for 1 byte |
| /* TODO : add set[n]be set[n]l set[n]le, skip p since never used and might not |
| * be setting it right |
| */ |
| opcode = instr_get_opcode(inst); |
| if (((opcode == OP_seto) || (opcode == OP_setno)) && ((eflags_valid & EFLAGS_READ_OF) != 0)) { |
| if (((eflags & EFLAGS_READ_OF) != 0 && opcode == OP_seto) || |
| ((eflags & EFLAGS_READ_OF) == 0 && opcode == OP_setno)) { |
| temp = INSTR_CREATE_mov_st(state->dcontext, instr_get_dst(inst, 0), OPND_CREATE_INT8(1)); |
| } else { |
| temp = INSTR_CREATE_mov_st(state->dcontext, instr_get_dst(inst, 0), OPND_CREATE_INT8(0)); |
| } |
| replace = true; |
| } |
| if (((opcode == OP_setz) || (opcode == OP_setnz)) && ((eflags_valid & EFLAGS_READ_ZF) != 0)) { |
| if (((eflags & EFLAGS_READ_ZF) != 0 && opcode == OP_setz) || |
| ((eflags & EFLAGS_READ_ZF) == 0 && opcode == OP_setnz)) { |
| temp = INSTR_CREATE_mov_st(state->dcontext, instr_get_dst(inst, 0), OPND_CREATE_INT8(1)); |
| } else { |
| temp = INSTR_CREATE_mov_st(state->dcontext, instr_get_dst(inst, 0), OPND_CREATE_INT8(0)); |
| } |
| replace = true; |
| } |
| if (((opcode == OP_setb) || (opcode == OP_setnb)) && ((eflags_valid & EFLAGS_READ_CF) != 0)) { |
| if (((eflags & EFLAGS_READ_CF) != 0 && opcode == OP_setb) || |
| ((eflags & EFLAGS_READ_CF) == 0 && opcode == OP_setnb)) { |
| temp = INSTR_CREATE_mov_st(state->dcontext, instr_get_dst(inst, 0), OPND_CREATE_INT8(1)); |
| } else { |
| temp = INSTR_CREATE_mov_st(state->dcontext, instr_get_dst(inst, 0), OPND_CREATE_INT8(0)); |
| } |
| replace = true; |
| } |
| if (((opcode == OP_sets) || (opcode == OP_setns)) && ((eflags_valid & EFLAGS_READ_SF) != 0)) { |
| if (((eflags & EFLAGS_READ_SF) != 0 && opcode == OP_sets) || |
| ((eflags & EFLAGS_READ_SF) == 0 && opcode == OP_setns)) { |
| temp = INSTR_CREATE_mov_st(state->dcontext, instr_get_dst(inst, 0), OPND_CREATE_INT8(1)); |
| } else { |
| temp = INSTR_CREATE_mov_st(state->dcontext, instr_get_dst(inst, 0), OPND_CREATE_INT8(0)); |
| } |
| replace = true; |
| } |
| |
| temp_ef = EFLAGS_READ_SF | EFLAGS_READ_ZF | EFLAGS_READ_AF | EFLAGS_READ_PF | EFLAGS_READ_CF; |
| if ((opcode == OP_lahf) && ((eflags_valid & temp_ef) == temp_ef)) { |
| temp_ef = 0x02; |
| if ((eflags & EFLAGS_READ_CF) != 0) |
| temp_ef = temp_ef | 0x01; |
| if ((eflags & EFLAGS_READ_PF) != 0) |
| temp_ef = temp_ef | 0x04; |
| if ((eflags & EFLAGS_READ_AF) != 0) |
| temp_ef = temp_ef | 0x10; |
| if ((eflags & EFLAGS_READ_ZF) != 0) |
| temp_ef = temp_ef | 0x40; |
| // have to sign extend so the create immed int turns out right |
| if ((eflags & EFLAGS_READ_SF) != 0) |
| temp_ef = temp_ef | 0xffffff80; |
| LOG(THREAD, LOG_OPTS, 3, "lahf val %d "PFX"\n", temp_ef, temp_ef); |
| temp = INSTR_CREATE_mov_imm(state->dcontext, instr_get_dst(inst, 0), OPND_CREATE_INT8(temp_ef)); |
| replace = true; |
| } |
| |
| if (replace) { |
| #ifdef DEBUG |
| opt_stats_t.num_instrs_simplified++; |
| loginst(state->dcontext, 3, inst, " old instruction"); |
| loginst(state->dcontext, 3, temp, " simplified to "); |
| #endif |
| replace_inst(state->dcontext, state->trace, inst, temp); |
| inst = temp; |
| replace = false; |
| } |
| |
| eflags_check = instr_get_eflags(inst, DR_QUERY_DEFAULT); |
| if (((eflags_invalid & eflags_check) != 0) || ((eflags_valid & eflags_check) != 0)) { |
| loginst(state->dcontext, 3, inst, " uses eflags!"); |
| LOG(THREAD, LOG_OPTS, 3, "forward eflags check failed(3) "PFX" "PFX" "PFX"\n", eflags_valid, eflags_invalid, eflags_check); |
| return false; |
| } |
| eflags_invalid = eflags_invalid & (~ (EFLAGS_WRITE_TO_READ(eflags_check & EFLAGS_WRITE_ALL))); |
| eflags_valid = eflags_valid & (~ (EFLAGS_WRITE_TO_READ(eflags_check & EFLAGS_WRITE_ALL))); |
| if ((eflags_valid == 0) && (eflags_invalid == 0)) |
| return true; |
| } |
| LOG(THREAD, LOG_OPTS, 3, "forward eflags check failed(4)\n"); |
| return false; |
| } |
| |
| /* looks at the eflags of the instr passed in and checks to see if there |
| * is any dependency on the eflags written, gives up at CTI's |
| * return true if no dependency found |
| */ |
| static bool |
| forward_check_eflags(instr_t *inst, prop_state_t *state) |
| { |
| return do_forward_check_eflags(inst, 0, 0, |
| EFLAGS_WRITE_TO_READ |
| (instr_get_eflags(inst, DR_QUERY_DEFAULT) & |
| EFLAGS_WRITE_ALL), state); |
| } |
| |
| static instr_t * |
| make_imm_store(prop_state_t *state, instr_t *inst, int value) |
| { |
| return INSTR_CREATE_mov_st(state->dcontext, instr_get_dst(inst, 0), OPND_CREATE_INT32(value)); |
| } |
| |
| /* replaces inst with a mov imm of value to the same dst */ |
| static instr_t * |
| make_to_imm_store(instr_t *inst, int value, prop_state_t *state) |
| { |
| instr_t *replacement; |
| opnd_t dst = instr_get_dst(inst, 0); |
| dcontext_t *dcontext = state->dcontext; |
| |
| if (value == 0 && opnd_is_reg(dst)) { |
| replacement = INSTR_CREATE_xor(dcontext, dst, dst); |
| if (instr_get_prefix_flag(inst, PREFIX_DATA)) { |
| instr_set_prefix_flag(replacement, PREFIX_DATA); |
| LOG(THREAD, LOG_OPTS, 3, "carrying data prefix over %d\n", instr_get_prefixes(inst)); |
| } |
| if (do_forward_check_eflags(inst, 0, 0, EFLAGS_WRITE_TO_READ(instr_get_eflags(replacement, DR_QUERY_DEFAULT)), state)) { |
| /* check size prefixes, ignore lock and addr prefix */ |
| replace_inst(dcontext, state->trace, inst, replacement); |
| return replacement; |
| } else { |
| loginst(dcontext, 3, inst, " unable to simplify move zero to xor, e-flags check failed "); |
| instr_destroy(dcontext, replacement); |
| } |
| } |
| |
| /* is always creating the right sized imm? */ |
| replacement = INSTR_CREATE_mov_st(state->dcontext, dst, opnd_create_immed_int(value, opnd_get_size(dst))); |
| /* handle prefixes, imm->reg (data) imm->mem (data & addr) */ |
| if (instr_get_prefix_flag(inst, PREFIX_DATA)) { |
| instr_set_prefix_flag(replacement, PREFIX_DATA); |
| LOG(THREAD, LOG_OPTS, 3, "carrying data prefix over %d\n", instr_get_prefixes(inst)); |
| } |
| if (instr_get_prefix_flag(inst, PREFIX_ADDR) && opnd_is_memory_reference(dst)) { |
| instr_set_prefix_flag(replacement, PREFIX_ADDR); |
| LOG(THREAD, LOG_OPTS, 3, "carrying addr prefix over %d\n", instr_get_prefixes(inst)); |
| } |
| replace_inst(dcontext, state->trace, inst, replacement); |
| return replacement; |
| } |
| |
| static instr_t * |
| make_to_nop(prop_state_t *state, instr_t *inst, const char *pre, |
| const char *post, const char *fail) |
| { |
| instr_t *backup; |
| if (forward_check_eflags(inst, state)) { |
| loginst(state->dcontext, 3, inst, pre); |
| backup = INSTR_CREATE_nop(state->dcontext); |
| replace_inst(state->dcontext, state->trace, inst, backup); |
| loginst(state->dcontext, 3, backup, post); |
| return backup; |
| } else { |
| loginst(state->dcontext, 3, inst, fail); |
| return inst; |
| } |
| } |
| |
| /* uses < 0 as short for if top bit is set */ |
| /* calculates zf pf sf flags from some result immed */ |
| static int |
| calculate_zf_pf_sf(int immed) |
| { |
| int result = 0; |
| int i; |
| bool parity = true; |
| if (immed == 0) |
| result = result | EFLAGS_READ_ZF; |
| if (immed < 0) |
| result = result | EFLAGS_READ_SF; |
| for (i = 0; i < 8; i++) { |
| if (((immed >> i) & 0x1) != 0) |
| parity = !parity; |
| } |
| if (parity) |
| result = result | EFLAGS_READ_PF; |
| return result; |
| } |
| |
| /* simplifies an instruction where possible */ |
| /* NOTE that at this point all subsized arguments have been sign extended */ |
| /* if op takes subsize note signextend (movzx and shifts for ex.) must */ |
| /* explicitly check the size of the immed */ |
| static instr_t * |
| prop_simplify(instr_t *inst, prop_state_t *state) |
| { |
| instr_t *replacement; |
| int opcode, num_src, num_dst, immed1, immed2, immed3=0, i; |
| opnd_t temp_opnd; |
| uint eflags, eflags_valid, eflags_invalid; |
| dcontext_t *dcontext = state->dcontext; |
| |
| num_src = instr_num_srcs(inst); |
| num_dst = instr_num_dsts(inst); |
| opcode = instr_get_opcode(inst); |
| |
| if (opcode == OP_lea) { |
| temp_opnd = instr_get_src(inst, 0); |
| if (opnd_is_constant_address(temp_opnd)) { |
| inst = make_to_imm_store(inst, opnd_get_disp(temp_opnd), state); |
| } |
| return inst; |
| } |
| |
| if ((num_src == 1) && (num_dst == 1) && opnd_is_immed_int(instr_get_src(inst, 0))) { |
| immed1 = (int) opnd_get_immed_int(instr_get_src(inst, 0)); |
| switch(opcode) { |
| // movsx bsf bsr |
| case OP_mov_st: |
| case OP_mov_ld: |
| { |
| inst = make_to_imm_store(inst, immed1, state); |
| break; |
| } |
| case OP_movzx: |
| { |
| if (opnd_get_size(instr_get_src(inst, 0)) == OPSZ_1) { |
| immed3 = immed1 & 0x000000ff; |
| } else { |
| immed3 = immed1 & 0x0000ffff; |
| } |
| inst = make_to_imm_store(inst, immed3, state); |
| break; |
| } |
| case OP_movsx: |
| { |
| /* already sign extended */ |
| immed3 = immed1; |
| inst = make_to_imm_store(inst, immed3, state); |
| break; |
| } |
| case OP_bswap: |
| { |
| immed3 = (((immed1 << 24) & 0xff000000) | |
| ((immed1 << 8) & 0x00ff0000) | |
| ((immed1 >> 8) & 0x0000ff00) | |
| ((immed1 >> 24) & 0x000000ff)); |
| inst = make_to_imm_store(inst, immed3, state); |
| break; |
| } |
| case OP_not: |
| { |
| immed3 = ~immed1; |
| inst = make_to_imm_store(inst, immed3, state); |
| break; |
| } |
| case OP_neg: |
| { |
| immed3 = -immed1; |
| inst = make_to_imm_store(inst, immed3, state); |
| break; |
| } |
| case OP_inc: |
| { |
| immed3 = immed1 + 1; |
| eflags = calculate_zf_pf_sf(immed3); |
| eflags_valid = EFLAGS_READ_ZF | EFLAGS_READ_SF | EFLAGS_READ_PF; |
| eflags_invalid = instr_get_eflags(inst, DR_QUERY_DEFAULT) & EFLAGS_READ_ALL & (~eflags_valid); |
| if (do_forward_check_eflags(inst, eflags, eflags_valid, eflags_invalid, state)) |
| inst = make_to_imm_store(inst, immed3, state); |
| else |
| state->hint = make_imm_store(state, inst, immed3); |
| break; |
| } |
| case OP_dec: |
| { |
| immed3 = immed1 - 1; |
| eflags = calculate_zf_pf_sf(immed3); |
| eflags_valid = EFLAGS_READ_ZF | EFLAGS_READ_SF | EFLAGS_READ_PF; |
| eflags_invalid = instr_get_eflags(inst, DR_QUERY_DEFAULT) & EFLAGS_READ_ALL & (~eflags_valid); |
| if (do_forward_check_eflags(inst, eflags, eflags_valid, eflags_invalid, state)) |
| inst = make_to_imm_store(inst, immed3, state); |
| else |
| state->hint = make_imm_store(state, inst, immed3); |
| break; |
| } |
| default: |
| { |
| // unable to simplify instruction |
| } |
| } |
| return inst; |
| } |
| |
| if ((num_src == 2) && opnd_is_immed_int(instr_get_src(inst, 1)) && !opnd_is_immed_int(instr_get_src(inst, 0))) { |
| immed1 = (int) opnd_get_immed_int(instr_get_src(inst, 1)); |
| if (opcode == OP_cmp || opcode == OP_test) { |
| temp_opnd = instr_get_src(inst, 1); |
| if ((immed1 <= 127) && (immed1 >= -128)) { |
| opnd_set_size(&temp_opnd, OPSZ_1); |
| instr_set_src(inst, 1, temp_opnd); |
| } |
| } |
| /* jecxz hack, Should only match our indirect branch handling thing */ |
| if (opcode == OP_jecxz) { |
| if (immed1 == 0) { |
| /* NOTE : this hardcodes indirect branch stuff */ |
| instr_t *inst2, *inst3; |
| LOG(THREAD, LOG_OPTS, 3, "Found removable jeczx inst noping it and removing 2 prev, and next three instructions\n"); |
| replacement = INSTR_CREATE_nop(state->dcontext); |
| replace_inst(state->dcontext, state->trace, inst, replacement); |
| inst = replacement; |
| |
| /* remove control flow after jecxz */ |
| inst2 = instr_get_next(inst); |
| loginst(dcontext, 3, inst2, "removing "); |
| ASSERT(instr_get_opcode(inst2) == OP_lea && opnd_get_reg(instr_get_dst(inst2, 0)) == REG_ECX); |
| instrlist_remove(state->trace, inst2); |
| inst2 = instr_get_next(inst); |
| loginst(dcontext, 3, inst2, "removing "); |
| ASSERT(instr_get_opcode(inst2) == OP_jmp); |
| instrlist_remove(state->trace, inst2); |
| |
| /* remove prev inst */ |
| inst2 = instr_get_prev(inst); |
| inst3 = instr_get_prev(inst2); |
| if (instr_get_opcode(inst2) == OP_nop || |
| ((instr_get_opcode(inst2) == OP_mov_imm || |
| instr_get_opcode(inst2) == OP_mov_st || |
| is_zeroing_instr(inst2)) && |
| opnd_get_reg(instr_get_dst(inst2, 0)) == REG_ECX)) { |
| loginst(dcontext, 3, inst2, "removing "); |
| instrlist_remove(state->trace, inst2); |
| } else { |
| loginst(dcontext, 1, inst2, "ERROR : unexpected instruction in constant prop indirect branch removal (1) aborting"); |
| return inst; |
| } |
| inst2 = inst3; |
| /* three possiblities at this point */ |
| /* is lea or pop from return = shouldn't happen, well at */ |
| /* not till we start propagating stack vals, in which case */ |
| /* maby we can ignore since -call_return matching will get */ |
| /* is push from indirect call, move imm->reg, save to ecxoff*/ |
| /* is mov imm->reg, save to ecxoff */ |
| /* !save to ecxoff might be noped, as might any of the other */ |
| /* prev. by constant prop, asserts are fragile, remove them? */ |
| if (instr_get_opcode(inst2) == OP_pop || instr_get_opcode(inst2) == OP_lea) { |
| loginst(dcontext, 3, inst2, "ERROR : found what looks like a call return in jecxz removal, but we can't find those yet!! aborting"); |
| return inst; |
| } |
| if (instr_get_opcode(inst2) == OP_push_imm) { |
| loginst(dcontext, 3, inst2, "skipping "); |
| inst2 = instr_get_prev(inst2); |
| } |
| inst3 = instr_get_prev(inst2); |
| if (instr_get_opcode(inst2) == OP_nop || |
| ((instr_get_opcode(inst2) == OP_mov_imm || |
| instr_get_opcode(inst2) == OP_mov_st || |
| is_zeroing_instr(inst2)) && |
| opnd_get_reg(instr_get_dst(inst2, 0)) == REG_ECX)) { |
| loginst(dcontext, 3, inst2, "removing "); |
| instrlist_remove(state->trace, inst2); |
| } else { |
| loginst(dcontext, 1, inst2, "ERROR : unexpected instruction in constant prop indirect branch removal (2) aborting"); |
| return inst; |
| } |
| inst2 = inst3; |
| if (instr_get_opcode(inst2) == OP_nop || is_store_to_ecxoff(dcontext, inst2)) { |
| loginst(dcontext, 3, inst2, "removing "); |
| instrlist_remove(state->trace, inst2); |
| } else { |
| loginst(dcontext, 1, inst2, "ERROR : unexpected instruction in constant prop indirect branch removal (3) aborting"); |
| return inst; |
| } |
| |
| /* remove post inst */ |
| /* some op may have removed this already so check to be sure */ |
| inst2 = instr_get_next(inst); |
| if (is_load_from_ecxoff(dcontext, inst2)) { |
| loginst(dcontext, 3, inst2, "removing "); |
| instrlist_remove(state->trace, inst2); |
| } else { |
| loginst(dcontext, 1, inst2, "ERROR : unexpected instruction in constant prop indirect branch removal (a), This could be very bad"); |
| } |
| |
| #ifdef DEBUG |
| opt_stats_t.num_jmps_simplified++; |
| opt_stats_t.num_instrs_simplified++; |
| opt_stats_t.num_jecxz_instrs_removed += 6; |
| #endif |
| } else { |
| loginst(dcontext, 1, inst, "ERROR : Constant prop predicts indirect branch exit from trace always taken! If this is part of a reconstruct for exception state then the pc calculated is going to be wrong, if it isn't then something is broken regarding constant prop"); |
| } |
| } |
| return inst; |
| } |
| |
| if ((num_src == 2) && opnd_is_immed_int(instr_get_src(inst, 0))) { |
| immed1 = (int) opnd_get_immed_int(instr_get_src(inst, 0)); |
| |
| if (!opnd_is_immed_int(instr_get_src(inst, 1))) { |
| switch(opcode) { |
| case OP_sub: |
| case OP_add: |
| case OP_or: |
| case OP_and: |
| case OP_xor: |
| { |
| temp_opnd = instr_get_src(inst, 0); |
| if ((immed1 <= 127) && (immed1 >= -128)) { |
| opnd_set_size(&temp_opnd, OPSZ_1); |
| instr_set_src(inst, 0, temp_opnd); |
| } |
| break; |
| } |
| case OP_test: |
| { |
| temp_opnd = instr_get_src(inst, 0); |
| if ((immed1 <= 127) && (immed1 >= -128)) |
| opnd_set_size(&temp_opnd, OPSZ_1); |
| instr_set_src(inst, 0, instr_get_src(inst, 1)); |
| instr_set_src(inst, 1, temp_opnd); |
| break; |
| } |
| case OP_push: |
| { |
| instr_set_opcode(inst, OP_push_imm); |
| break; |
| } |
| } |
| return inst; |
| } else { |
| immed2 = (int) opnd_get_immed_int(instr_get_src(inst, 1)); |
| switch(num_dst) { |
| case 0: |
| { |
| switch(opcode) { |
| case OP_test: |
| { |
| immed3 = immed1 & immed2; |
| eflags_valid = EFLAGS_READ_CF | EFLAGS_READ_PF | EFLAGS_READ_ZF | EFLAGS_READ_SF | EFLAGS_READ_OF | EFLAGS_READ_AF; |
| // technically AF is undefined, but since no one |
| // should be relying on it we can set it to zero |
| eflags = calculate_zf_pf_sf(immed3); |
| if (do_forward_check_eflags(inst, eflags, eflags_valid, 0, state)) { |
| replacement = INSTR_CREATE_nop(state->dcontext); |
| replace_inst(dcontext, state->trace, inst, replacement); |
| inst = replacement; |
| } |
| break; |
| } |
| case OP_cmp: |
| { |
| //FIXME of and sf and af correct? FIXME!! |
| immed3 = immed1 - immed2; |
| eflags = calculate_zf_pf_sf(immed3); |
| if (((uint)immed1) < ((uint)immed2)) { |
| eflags = eflags | EFLAGS_READ_CF; |
| } |
| if (((immed1 >= immed2) && ((eflags & EFLAGS_READ_CF) != 0)) || |
| ((immed1 < immed2) && ((eflags & EFLAGS_READ_CF) == 0))) { |
| eflags = eflags | EFLAGS_READ_OF; |
| } |
| if ((immed1 & 0x7) < (immed2 & 0x7)) |
| eflags = eflags | EFLAGS_READ_AF; |
| eflags_valid = EFLAGS_READ_CF | EFLAGS_READ_PF | EFLAGS_READ_ZF | EFLAGS_READ_SF | EFLAGS_READ_OF | EFLAGS_READ_AF; |
| if (do_forward_check_eflags(inst, eflags, eflags_valid, 0, state)) { |
| replacement = INSTR_CREATE_nop(state->dcontext); |
| replace_inst(state->dcontext, state->trace, inst, replacement); |
| inst = replacement; |
| } |
| break; |
| } |
| default: |
| { |
| // couldn't handle |
| } |
| } |
| break; |
| } |
| case 1: |
| { |
| /* TODO: maby explicitly find some flags? */ |
| bool replace = true; |
| int size; |
| switch(opcode) { |
| case OP_add: |
| immed3 = immed2 + immed1; |
| break; |
| case OP_sub: |
| immed3 = immed2 - immed1; |
| break; |
| case OP_or: |
| immed3 = immed2 | immed1; |
| break; |
| case OP_and: |
| immed3 = immed2 & immed1; |
| break; |
| case OP_xor: |
| immed3 = immed2 ^ immed1; |
| break; |
| case OP_shl: |
| /* same as OP_sal */ |
| size = opnd_get_size(instr_get_src(inst, 1)); |
| immed3 = immed2 << (immed1 & 0x1F); |
| /* adjust for size */ |
| if (size == OPSZ_1) { |
| if ((immed3 & 0x00000080) != 0) |
| immed3 |= 0xffffff00; |
| else |
| immed3 &= 0x000000ff; |
| } else if (size == OPSZ_2) { |
| if ((immed3 & 0x00008000) != 0) |
| immed3 |= 0xffff0000; |
| else |
| immed3 &= 0x0000ffff; |
| } else if (size != OPSZ_4) |
| replace = false; |
| break; |
| case OP_sar: |
| { |
| bool neg = immed2 < 0; |
| size = opnd_get_size(instr_get_src(inst, 1)); |
| if (size == OPSZ_1) { |
| immed3 = immed2 >> (immed1 & 0x1f); |
| if (neg) |
| immed3 |= 0xffffff00; |
| else |
| immed3 &= 0x000000ff; |
| } else if (size == OPSZ_2) { |
| immed3 = immed2 >> (immed1 & 0x1f); |
| if (neg) |
| immed3 |= 0xffff0000; |
| else |
| immed3 &= 0x0000ffff; |
| } else if (size == OPSZ_4) { |
| immed3 = immed2; |
| if (neg) { |
| for (i = 0; i < (immed1 & 0x1F); i++) |
| immed3 = (immed3 >> 1) | 0x80000000; |
| } else { |
| immed3 = immed3 >> (immed1 & 0x1f); |
| } |
| } else |
| replace = false; |
| break; |
| } |
| case OP_shr: |
| size = opnd_get_size(instr_get_src(inst, 1)); |
| if (immed1 == 0 || immed2 == 0) |
| immed3 = immed2; |
| else { |
| if (size == OPSZ_1) { |
| immed3 = (immed2 & 0x000000ff) >> (immed1 & 0x1f); |
| } else if (size == OPSZ_2) { |
| immed3 = (immed2 & 0x0000ffff) >> (immed1 & 0x1f); |
| } else if (size == OPSZ_4) { |
| if (immed2 > 0) { |
| immed3 = immed2 >> (immed1 & 0x1f); |
| } |
| else { |
| immed3 = immed2; |
| for (i = 0; i < (immed1 & 0x1F); i++) |
| immed3 = (immed3 >> 1) & 0x7fffffff; |
| } |
| } else |
| replace = false; |
| } |
| break; |
| /* TODO : rotates, keep size issues in mind */ |
| case OP_ror: |
| case OP_rol: |
| default: |
| replace = false; |
| /* can't handle this instruction */ |
| } |
| if (!replace) |
| break; |
| if (forward_check_eflags(inst, state)) |
| inst = make_to_imm_store(inst, immed3, state); |
| else |
| state->hint = make_imm_store(state, inst, immed3); |
| break; |
| } |
| case 2: |
| { |
| // mul divide xchg xadd |
| } |
| default: |
| { |
| // unable to simlify this instruction |
| } |
| } |
| } |
| return inst; |
| } |
| |
| // cpuid |
| |
| return inst; |
| } |
| |
| /* initializes all the trace constant stuff and add */ |
| static void |
| get_trace_constant(prop_state_t *state) |
| { |
| /* can add all dynamo addresses here, they are never aliased so always */ |
| /* safe to optimize, but takes up space in our cache, with new jump code */ |
| /* probably only use exc so just put it in, and maby eax to since is fav */ |
| /* when need to store flags/pass arg, can always add more location later */ |
| /* probably cleaner way of getting addresses but who cares for now */ |
| set_address_val(state, opnd_get_disp(opnd_create_dcontext_field(state->dcontext, XCX_OFFSET)), 0, PS_KEEP); |
| set_address_val(state, opnd_get_disp(opnd_create_dcontext_field(state->dcontext, XAX_OFFSET)), 0, PS_KEEP); |
| } |
| |
| /* updates the prop state as appropriate */ |
| static instr_t * |
| update_prop_state(prop_state_t *state, instr_t *inst, bool intrace) |
| { |
| int opcode = instr_get_opcode(inst); |
| int num_dst = instr_num_dsts(inst); |
| bool is_zeroing = is_zeroing_instr(inst); |
| dcontext_t *dcontext = state->dcontext; |
| instr_t *backup; |
| opnd_t opnd; |
| int val, i; |
| reg_id_t reg; |
| if (is_zeroing || ((opcode == OP_mov_imm || opcode == OP_mov_st) && |
| opnd_is_immed_int(instr_get_src(inst, 0)))) { |
| if (is_zeroing) |
| val = 0; |
| else |
| val = (int) opnd_get_immed_int(instr_get_src(inst, 0)); |
| opnd = instr_get_dst(inst, 0); |
| if (opnd_is_reg(opnd)) { |
| reg = opnd_get_reg(opnd) - REG_START_32; |
| if (reg < 8 /* rules out underflow */) { |
| /* if resetting to same value then just nop the instruction */ |
| if (intrace && (state->reg_state[reg] & PS_VALID_VAL) != 0 && state->reg_vals[reg] == val) { |
| inst = make_to_nop(state, inst, " register already set to val, simplify ", " to ", " register already set to val, but unable to simplify due to eflags"); |
| } else { |
| state->reg_state[reg] = PS_VALID_VAL; |
| state->reg_vals[reg] = val; |
| } |
| } else { |
| reg = opnd_get_reg(opnd) - REG_START_16; |
| if (reg < 8 /* rules out underflow */) { |
| /* if resetting to same value then just nop the instruction */ |
| if (intrace && |
| ((state->reg_state[reg] & PS_VALID_VAL) != 0 || |
| ((state->reg_state[reg] & PS_LVALID_VAL) != 0 && |
| (state->reg_state[reg] & PS_HVALID_VAL) != 0)) && |
| (state->reg_vals[reg] & 0x0000ffff) == (val & 0x0000ffff)) { |
| inst = make_to_nop(state, inst, " register already set to val, simplify ", " to ", " register already set to val, but unable to simplify due to eflags"); |
| } else { |
| state->reg_state[reg] |= PS_LVALID_VAL | PS_HVALID_VAL; |
| state->reg_vals[reg] = (state->reg_vals[reg] & 0xffff0000) | (val & 0x0000ffff); |
| } |
| } else { |
| reg = opnd_get_reg(opnd) - REG_START_8; |
| if (reg < 4 /* rules out underflow */) { |
| /* if resetting to same value then just nop the instruction */ |
| if (intrace && |
| ((state->reg_state[reg] & PS_VALID_VAL) != 0 || |
| (state->reg_state[reg] & PS_LVALID_VAL) != 0) && |
| (state->reg_vals[reg] & 0x000000ff) == (val & 0x000000ff)) { |
| inst = make_to_nop(state, inst, " register already set to val, simplify ", " to ", " register already set to val, but unable to simplify due to eflags"); |
| } else { |
| state->reg_state[reg] |= PS_LVALID_VAL; |
| state->reg_vals[reg] = (state->reg_vals[reg] & 0xffffff00) | (val & 0x000000ff); |
| } |
| } else { |
| reg -= 4; |
| if (reg < 4 /* rules out underflow */) { |
| /* if resetting to same value then just nop the instruction */ |
| if (intrace && |
| ((state->reg_state[reg] & PS_VALID_VAL) != 0 || |
| (state->reg_state[reg] & PS_HVALID_VAL) != 0) && |
| (state->reg_vals[reg] & 0x0000ff00) == ((val << 8) & 0x0000ff00)) { |
| inst = make_to_nop(state, inst, " register already set to val, simplify ", " to ", " register already set to val, but unable to simplify due to eflags"); |
| } else { |
| state->reg_state[reg] |= PS_HVALID_VAL; |
| state->reg_vals[reg] = (state->reg_vals[reg] & 0xffff00ff) | ((val << 8) & 0x0000ff00); |
| } |
| } else { |
| /* just in case */ |
| for (i = 0; i < 8; i++) { |
| if (instr_writes_to_reg(inst, |
| REG_START_32 + (reg_id_t)i, |
| DR_QUERY_DEFAULT)) { |
| state->reg_state[i] = 0; |
| } |
| } |
| } |
| } |
| } |
| } |
| } else { |
| // do constant addresses |
| if (opnd_is_constant_address(opnd) && cp_global_aggr > 0 ) { |
| int disp = opnd_get_disp(opnd); |
| for (i = 0; i < NUM_CONSTANT_ADDRESS; i++) { |
| if (state->addresses[i] == disp && state->address_vals[i] == val && (state->address_state[i] & PS_VALID_VAL) != 0) { |
| loginst(dcontext, 3, inst, " mem location already set to val, simplify "); |
| backup = INSTR_CREATE_nop(dcontext); |
| replace_inst(dcontext, state->trace, inst, backup); |
| loginst(dcontext, 3, backup, " to "); |
| inst = backup; |
| } |
| } |
| update_address_val(state, disp, val); |
| } |
| |
| // do stack vals |
| |
| if (opnd_is_stack_address(opnd) && cp_local_aggr > 0) { |
| int disp = opnd_get_disp(opnd); |
| for (i = 0; i < NUM_STACK_SLOTS; i++) { |
| if (state->stack_offsets_ebp[i] == disp && state->stack_vals[i] == val && (state->stack_address_state[i] & PS_VALID_VAL) != 0 && state->stack_scope[i] == state->cur_scope) { |
| loginst(dcontext, 3, inst, " mem location already set to val, simplify "); |
| backup = INSTR_CREATE_nop(dcontext); |
| replace_inst(dcontext, state->trace, inst, backup); |
| loginst(dcontext, 3, backup, " to "); |
| inst = backup; |
| } |
| } |
| |
| update_stack_val(state, disp, val); |
| |
| } |
| } |
| } else { |
| // call and int |
| if (instr_is_interrupt(inst) || instr_is_call(inst)) { |
| // can assume mem addresses not touched?? |
| // shouldn't be going to app code for call at least |
| for (i = 0; i < 8; i++) |
| state->reg_state[i] = 0; |
| } |
| // update for regs written to, actually if xh then don't need to |
| // invalidate xl and vice versa, but to much work to check for that probably unlikely occurrence |
| for (i = 0; i < 8; i++) { |
| if (instr_writes_to_reg(inst, REG_START_32 + (reg_id_t)i, DR_QUERY_DEFAULT)) { |
| state->reg_state[i] = 0; |
| } |
| } |
| // update mem caches |
| for (i = 0; i < num_dst; i++) { |
| opnd = instr_get_dst(inst, i); |
| if (opnd_is_constant_address(opnd) && cp_global_aggr >0) { |
| clear_address_val(state, opnd_get_disp(opnd)); |
| } |
| } |
| // update stack cahes |
| for (i = 0; i < num_dst; i++) { |
| opnd = instr_get_dst(inst, i); |
| if (opnd_is_stack_address(opnd) && cp_local_aggr > 0) { |
| clear_stack_val(state, opnd_get_disp(opnd)); |
| } |
| } |
| } |
| return inst; |
| } |
| |
| typedef struct _two_entry_list_element_t { |
| int disp; |
| int scope; |
| struct _two_entry_list_element_t * next; |
| } two_entry_list_element_t; |
| |
| typedef struct _start_list_element_t { |
| int endscope; |
| two_entry_list_element_t * next; |
| } start_list_element_t; |
| |
| |
| /*************************************************************************/ |
| |
| /*This track the scopes. The number indicates the depth of the nested |
| scopes. Also checks for stack constant instructions */ |
| instr_t * |
| handle_stack(prop_state_t *state, instr_t *inst) |
| { |
| dcontext_t *dcontext = state->dcontext; |
| int i; |
| if (instr_get_opcode(inst) == OP_enter || |
| ((instr_get_opcode(inst) == OP_mov_st || instr_get_opcode(inst) == OP_mov_ld) && |
| opnd_is_reg(instr_get_src(inst, 0)) && |
| opnd_get_reg(instr_get_src(inst, 0)) == REG_ESP && |
| opnd_is_reg(instr_get_dst(inst, 0)) && |
| opnd_get_reg(instr_get_dst(inst, 0)) == REG_EBP)) { |
| state->cur_scope++; |
| LOG(THREAD, LOG_OPTS, 3, "Adjust scope up to %d\n", state->cur_scope); |
| return inst; |
| } |
| if (instr_get_opcode(inst) == OP_leave || |
| (instr_get_opcode(inst) == OP_pop && |
| opnd_is_reg(instr_get_dst(inst, 0)) && |
| opnd_get_reg(instr_get_dst(inst, 0)) == REG_EBP)) { |
| state->cur_scope--; |
| |
| for (i = 0; i < NUM_STACK_SLOTS; i++) { |
| if (state->stack_scope[i] > state->cur_scope && |
| state->stack_address_state[i] != 0) { |
| state->stack_address_state[i] = 0; |
| } |
| } |
| LOG(THREAD, LOG_OPTS, 3, "Adjust scope down to %d\n", state->cur_scope); |
| return inst; |
| } |
| if (instr_writes_to_reg(inst, REG_EBP, DR_QUERY_DEFAULT)) { |
| loginst(dcontext, 2, inst, "Lost stack scope count"); |
| state->lost_scope_count = true; |
| for (i = 0; i< NUM_STACK_SLOTS; i++) { |
| state->stack_address_state[i] = 0; |
| } |
| } |
| return inst; |
| } |
| |
| |
| |
| /* FIXME : (could affect correctness at higher levels of optimization) |
| * constant address and various operand sizes, at level 1 what if we write/read |
| * a 16bit value, we'll actually do 32bit, hard to detect since ?believe? will |
| * both be OPSZ_4_short2, look at prefix? Similarly at level 2 we don't do any size |
| * checking at all for constant address, what if is a byte of existing etc. |
| * Ah, just trust the programmer, these are all addresses from him anyways |
| * we can trust that he won't do anything that weird with them |
| * FIXME : more robust matching for the dynamorio stack call hints, the pattern |
| * matching at the moment is somewhat birttle, though should fail gracefully |
| * TODO : (doesn't affect correctness only effectiveness) |
| * Easy |
| * reverse cmp arg. if one constant and can flip any dependat jmps? will have to |
| * either check target or assume normal eflags |
| * if write to xh then don't need to invalidate xl, right now invalidate all |
| * probably not worth to effort to fix, is pretty rare occurrence where it |
| * matters |
| * handle more setcc cmovcc instrs in do_forward_eflags_check, probably |
|