| /* ********************************************************** |
| * Copyright (c) 2011-2022 Google, Inc. All rights reserved. |
| * Copyright (c) 2008-2010 VMware, Inc. All rights reserved. |
| * Copyright (c) 2022 Arm Limited All rights reserved. |
| * **********************************************************/ |
| |
| /* drutil: DynamoRIO Instrumentation Utilities |
| * Derived from Dr. Memory: the memory debugger |
| * |
| * This library is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU Lesser General Public |
| * License as published by the Free Software Foundation; |
| * version 2.1 of the License, and no later version. |
| |
| * This library is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| * Library General Public License for more details. |
| |
| * You should have received a copy of the GNU Lesser General Public |
| * License along with this library; if not, write to the Free Software |
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
| */ |
| |
| /* DynamoRIO Instrumentation Utilities Extension */ |
| |
| #include "dr_api.h" |
| #include "drmgr.h" |
| #include "../ext_utils.h" |
| |
| /* currently using asserts on internal logic sanity checks (never on |
| * input from user) |
| */ |
| #ifdef DEBUG |
| # define ASSERT(x, msg) DR_ASSERT_MSG(x, msg) |
| #else |
| # define ASSERT(x, msg) /* nothing */ |
| #endif |
| |
| /* There are cases where notifying the user is the right thing, even for a library. |
| * Xref i#1055 where w/o visible notification the user might not know what's |
| * going on. |
| */ |
| #ifdef WINDOWS |
| # define USAGE_ERROR(msg) \ |
| do { \ |
| dr_messagebox("FATAL USAGE ERROR: %s", msg); \ |
| dr_abort(); \ |
| } while (0); |
| #else |
| # define USAGE_ERROR(msg) \ |
| do { \ |
| dr_fprintf(STDERR, "FATAL USAGE ERROR: %s\n", msg); \ |
| dr_abort(); \ |
| } while (0); |
| #endif |
| |
| #define PRE instrlist_meta_preinsert |
| /* for inserting an app instruction, which must have a translation ("xl8") field */ |
| #define PREXL8 instrlist_preinsert |
| |
| #ifdef X86 |
| static uint drutil_xsave_area_size; |
| #endif |
| |
| /*************************************************************************** |
| * INIT |
| */ |
| |
| static int drutil_init_count; |
| |
| #ifdef X86 |
| |
| static inline void |
| native_unix_cpuid(uint *eax, uint *ebx, uint *ecx, uint *edx) |
| { |
| # ifdef UNIX |
| /* We need to do this xbx trick, because xbx might be used for fPIC, |
| * and gcc < 5 chokes on it. This can get removed and replaced by |
| * a "=b" constraint when moving to gcc-5. |
| */ |
| # ifdef X64 |
| /* In 64-bit, we are getting a 64-bit pointer (xref i#3478). */ |
| asm volatile("xchgq\t%%rbx, %q1\n\t" |
| "cpuid\n\t" |
| "xchgq\t%%rbx, %q1\n\t" |
| : "=a"(*eax), "=&r"(*ebx), "=c"(*ecx), "=d"(*edx) |
| : "0"(*eax), "2"(*ecx)); |
| # else |
| asm volatile("xchgl\t%%ebx, %k1\n\t" |
| "cpuid\n\t" |
| "xchgl\t%%ebx, %k1\n\t" |
| : "=a"(*eax), "=&r"(*ebx), "=c"(*ecx), "=d"(*edx) |
| : "0"(*eax), "2"(*ecx)); |
| # endif |
| # endif |
| } |
| |
| static inline void |
| cpuid(uint op, uint subop, uint *eax, uint *ebx, uint *ecx, uint *edx) |
| { |
| # ifdef WINDOWS |
| int output[4]; |
| __cpuidex(output, op, subop); |
| /* XXX i#3469: On a Windows laptop, I inspected this and it returned 1088 |
| * bytes, which is a rather unexpected number. Investigate whether this is |
| * correct. |
| */ |
| *eax = output[0]; |
| *ebx = output[1]; |
| *ecx = output[2]; |
| *edx = output[3]; |
| # else |
| *eax = op; |
| *ecx = subop; |
| native_unix_cpuid(eax, ebx, ecx, edx); |
| # endif |
| } |
| |
| #endif |
| |
| DR_EXPORT |
| bool |
| drutil_init(void) |
| { |
| /* handle multiple sets of init/exit calls */ |
| int count = dr_atomic_add32_return_sum(&drutil_init_count, 1); |
| if (count > 1) |
| return true; |
| |
| #ifdef X86 |
| /* XXX: we may want to re-factor and move functions like this into drx and/or |
| * using pre-existing versions in clients/drcpusim/tests/cpuid.c. |
| */ |
| uint eax, ecx, edx; |
| const int proc_ext_state_main_leaf = 0xd; |
| cpuid(proc_ext_state_main_leaf, 0, &eax, &drutil_xsave_area_size, &ecx, &edx); |
| #endif |
| |
| /* nothing yet: but putting in API up front in case need later */ |
| |
| return true; |
| } |
| |
| DR_EXPORT |
| void |
| drutil_exit(void) |
| { |
| /* handle multiple sets of init/exit calls */ |
| int count = dr_atomic_add32_return_sum(&drutil_init_count, -1); |
| if (count != 0) |
| return; |
| |
| /* nothing yet: but putting in API up front in case need later */ |
| } |
| |
| /*************************************************************************** |
| * MEMORY TRACING |
| */ |
| #ifdef X86 |
| static bool |
| drutil_insert_get_mem_addr_x86(void *drcontext, instrlist_t *bb, instr_t *where, |
| opnd_t memref, reg_id_t dst, reg_id_t scratch, |
| DR_PARAM_OUT bool *scratch_used); |
| #elif defined(AARCHXX) || defined(RISCV64) |
| static bool |
| drutil_insert_get_mem_addr_risc(void *drcontext, instrlist_t *bb, instr_t *where, |
| opnd_t memref, reg_id_t dst, reg_id_t scratch, |
| DR_PARAM_OUT bool *scratch_used); |
| #endif /* X86/ARM/RISCV64 */ |
| |
| /* Could be optimized to have scratch==dst for many common cases, but |
| * need way to get a 2nd reg for corner cases: simpler to ask caller |
| * to give us scratch reg distinct from dst |
| * XXX: however, this means that a client must spill the scratch reg |
| * every time, even though it's only used for far or xlat memref. |
| * |
| * XXX: provide a version that calls clean call? would have to hardcode |
| * what gets included: memory size? perhaps should try to create a |
| * vararg clean call arg feature to chain things together. |
| */ |
| DR_EXPORT |
| bool |
| drutil_insert_get_mem_addr_ex(void *drcontext, instrlist_t *bb, instr_t *where, |
| opnd_t memref, reg_id_t dst, reg_id_t scratch, |
| DR_PARAM_OUT bool *scratch_used) |
| { |
| if (scratch_used != NULL) |
| *scratch_used = false; |
| #if defined(X86) |
| return drutil_insert_get_mem_addr_x86(drcontext, bb, where, memref, dst, scratch, |
| scratch_used); |
| #elif defined(AARCHXX) || defined(RISCV64) |
| return drutil_insert_get_mem_addr_risc(drcontext, bb, where, memref, dst, scratch, |
| scratch_used); |
| #endif |
| } |
| |
| DR_EXPORT |
| bool |
| drutil_insert_get_mem_addr(void *drcontext, instrlist_t *bb, instr_t *where, |
| opnd_t memref, reg_id_t dst, reg_id_t scratch) |
| { |
| #if defined(X86) |
| return drutil_insert_get_mem_addr_x86(drcontext, bb, where, memref, dst, scratch, |
| NULL); |
| #elif defined(AARCHXX) || defined(RISCV64) |
| return drutil_insert_get_mem_addr_risc(drcontext, bb, where, memref, dst, scratch, |
| NULL); |
| #endif |
| } |
| |
| #ifdef X86 |
| static bool |
| drutil_insert_get_mem_addr_x86(void *drcontext, instrlist_t *bb, instr_t *where, |
| opnd_t memref, reg_id_t dst, reg_id_t scratch, |
| DR_PARAM_OUT bool *scratch_used) |
| { |
| if (opnd_is_far_base_disp(memref) && |
| /* We assume that far memory references via %ds and %es are flat, |
| * i.e. the segment base is 0, so we only handle %fs and %gs here. |
| * The assumption is consistent with dr_insert_get_seg_base, |
| * which does say for windows it only supports TLS segment, |
| * and inserts "mov 0 => reg" for %ds and %es instead. |
| */ |
| opnd_get_segment(memref) != DR_SEG_ES && opnd_get_segment(memref) != DR_SEG_DS && |
| /* cs: is sometimes seen, as here on win10: |
| * RPCRT4!Invoke+0x28: |
| * 76d85ea0 2eff1548d5de76 call dword ptr cs:[RPCRT4! |
| * __guard_check_icall_fptr (76ded548)] |
| * We assume it's flat. |
| */ |
| opnd_get_segment(memref) != DR_SEG_CS) { |
| instr_t *near_in_scratch = NULL; |
| reg_id_t reg_segbase = dst; |
| /* If we need two steps, we get the near first as it may depend on dst. */ |
| if (opnd_uses_reg(memref, dst) || |
| (opnd_get_base(memref) != DR_REG_NULL && |
| opnd_get_index(memref) != DR_REG_NULL)) { |
| /* We need a scratch reg. We document these conditions so it's user error |
| * if one wasn't provided. |
| */ |
| if (scratch == DR_REG_NULL) |
| return false; |
| if ((opnd_get_base(memref) == DR_REG_NULL || |
| opnd_get_index(memref) == DR_REG_NULL) && |
| !opnd_uses_reg(memref, scratch)) { |
| /* We can do it one step if we swap regs. */ |
| reg_id_t temp = reg_segbase; |
| reg_segbase = scratch; |
| scratch = temp; |
| } else { |
| /* We have to take two steps. */ |
| opnd_set_size(&memref, OPSZ_lea); |
| if (scratch_used != NULL) |
| *scratch_used = true; |
| near_in_scratch = |
| INSTR_CREATE_lea(drcontext, opnd_create_reg(scratch), memref); |
| PRE(bb, where, near_in_scratch); |
| } |
| } |
| /* Now get segment base into dst, then add to near address. */ |
| if (!dr_insert_get_seg_base(drcontext, bb, where, opnd_get_segment(memref), |
| reg_segbase)) |
| return false; |
| if (near_in_scratch != NULL) { |
| PRE(bb, where, |
| INSTR_CREATE_lea( |
| drcontext, opnd_create_reg(dst), |
| opnd_create_base_disp(reg_segbase, scratch, 1, 0, OPSZ_lea))); |
| } else { |
| reg_id_t base = opnd_get_base(memref); |
| reg_id_t index = opnd_get_index(memref); |
| int scale = opnd_get_scale(memref); |
| int disp = opnd_get_disp(memref); |
| if (opnd_get_base(memref) == DR_REG_NULL) { |
| base = reg_segbase; |
| } else if (opnd_get_index(memref) == DR_REG_NULL) { |
| index = reg_segbase; |
| scale = 1; |
| } else { |
| ASSERT(false, "memaddr internal error"); |
| } |
| PRE(bb, where, |
| INSTR_CREATE_lea( |
| drcontext, opnd_create_reg(dst), |
| opnd_create_base_disp(base, index, scale, disp, OPSZ_lea))); |
| } |
| } else if (opnd_is_base_disp(memref)) { |
| /* special handling for xlat instr, [%ebx,%al] |
| * - save %eax |
| * - movzx %al => %eax |
| * - lea [%ebx, %eax] => dst |
| * - restore %eax |
| */ |
| bool is_xlat = false; |
| if (opnd_get_index(memref) == DR_REG_AL) { |
| is_xlat = true; |
| if (scratch == DR_REG_NULL) |
| return false; |
| if (scratch != DR_REG_XAX && dst != DR_REG_XAX) { |
| /* we do not have to save xax if it is saved by caller */ |
| if (scratch_used != NULL) |
| *scratch_used = true; |
| PRE(bb, where, |
| INSTR_CREATE_mov_ld(drcontext, opnd_create_reg(scratch), |
| opnd_create_reg(DR_REG_XAX))); |
| } |
| PRE(bb, where, |
| INSTR_CREATE_movzx(drcontext, opnd_create_reg(DR_REG_XAX), |
| opnd_create_reg(DR_REG_AL))); |
| memref = opnd_create_base_disp(DR_REG_XBX, DR_REG_XAX, 1, 0, OPSZ_lea); |
| } |
| /* lea [ref] => reg */ |
| opnd_set_size(&memref, OPSZ_lea); |
| PRE(bb, where, INSTR_CREATE_lea(drcontext, opnd_create_reg(dst), memref)); |
| if (is_xlat && scratch != DR_REG_XAX && dst != DR_REG_XAX) { |
| PRE(bb, where, |
| INSTR_CREATE_mov_ld(drcontext, opnd_create_reg(DR_REG_XAX), |
| opnd_create_reg(scratch))); |
| } |
| } else if (IF_X64(opnd_is_rel_addr(memref) ||) opnd_is_abs_addr(memref)) { |
| /* mov addr => reg */ |
| PRE(bb, where, |
| INSTR_CREATE_mov_imm(drcontext, opnd_create_reg(dst), |
| OPND_CREATE_INTPTR(opnd_get_addr(memref)))); |
| } else { |
| /* unhandled memory reference */ |
| return false; |
| } |
| return true; |
| } |
| #elif defined(AARCHXX) || defined(RISCV64) |
| |
| # ifdef ARM |
| static bool |
| instr_has_opnd(instr_t *instr, opnd_t opnd) |
| { |
| int i; |
| if (instr == NULL) |
| return false; |
| for (i = 0; i < instr_num_srcs(instr); i++) { |
| if (opnd_same(opnd, instr_get_src(instr, i))) |
| return true; |
| } |
| for (i = 0; i < instr_num_dsts(instr); i++) { |
| if (opnd_same(opnd, instr_get_dst(instr, i))) |
| return true; |
| } |
| return false; |
| } |
| |
| static instr_t * |
| instrlist_find_app_instr(instrlist_t *ilist, instr_t *where, opnd_t opnd) |
| { |
| instr_t *app; |
| /* looking for app instr at/after where */ |
| for (app = instr_is_app(where) ? where : instr_get_next_app(where); app != NULL; |
| app = instr_get_next_app(app)) { |
| if (instr_has_opnd(app, opnd)) |
| return app; |
| } |
| /* looking for app instr before where */ |
| for (app = instr_get_prev_app(where); app != NULL; app = instr_get_prev_app(app)) { |
| if (instr_has_opnd(app, opnd)) |
| return app; |
| } |
| return NULL; |
| } |
| # endif /* ARM */ |
| |
| static reg_id_t |
| replace_stolen_reg(void *drcontext, instrlist_t *bb, instr_t *where, opnd_t memref, |
| reg_id_t dst, reg_id_t scratch, DR_PARAM_OUT bool *scratch_used) |
| { |
| reg_id_t reg; |
| reg = opnd_uses_reg(memref, dst) ? scratch : dst; |
| if (scratch_used != NULL && reg == scratch) |
| *scratch_used = true; |
| DR_ASSERT(!opnd_uses_reg(memref, reg)); |
| dr_insert_get_stolen_reg_value(drcontext, bb, where, reg); |
| return reg; |
| } |
| |
| static bool |
| drutil_insert_get_mem_addr_risc(void *drcontext, instrlist_t *bb, instr_t *where, |
| opnd_t memref, reg_id_t dst, reg_id_t scratch, |
| DR_PARAM_OUT bool *scratch_used) |
| { |
| if (!opnd_is_base_disp(memref) IF_AARCHXX_OR_RISCV64(&&!opnd_is_rel_addr(memref))) |
| return false; |
| # ifdef ARM |
| if (opnd_get_base(memref) == DR_REG_PC) { |
| app_pc target; |
| /* We need the app instr for getting the rel_addr_target. |
| * XXX: add drutil_insert_get_mem_addr_ex to let client provide app instr. |
| */ |
| instr_t *app = instrlist_find_app_instr(bb, where, memref); |
| if (app == NULL) |
| return false; |
| if (!instr_get_rel_addr_target(app, &target)) |
| return false; |
| instrlist_insert_mov_immed_ptrsz(drcontext, (ptr_int_t)target, |
| opnd_create_reg(dst), bb, where, NULL, NULL); |
| } |
| # else /* AARCH64/RISCV64 */ |
| if (opnd_is_rel_addr(memref)) { |
| instrlist_insert_mov_immed_ptrsz(drcontext, (ptr_int_t)opnd_get_addr(memref), |
| opnd_create_reg(dst), bb, where, NULL, NULL); |
| return true; |
| } |
| # endif /* ARM/AARCH64/RISCV64 */ |
| else { |
| instr_t *instr; |
| reg_id_t base = opnd_get_base(memref); |
| reg_id_t index = opnd_get_index(memref); |
| int disp = opnd_get_disp(memref); |
| reg_id_t stolen = dr_get_stolen_reg(); |
| # ifdef AARCHXX |
| bool negated = TEST(DR_OPND_NEGATED, opnd_get_flags(memref)); |
| /* On ARM, disp is never negative; on AArch64, we do not use DR_OPND_NEGATED. */ |
| ASSERT(IF_ARM_ELSE(disp >= 0, !negated), "DR_OPND_NEGATED internal error"); |
| if (disp < 0) { |
| disp = -disp; |
| negated = !negated; |
| } |
| # endif |
| # ifdef AARCH64 |
| /* In cases where only the lower 32 bits of the index register are |
| * used, we need to widen to 64 bits in order to handle stolen |
| * register's replacement. See replace_stolen_reg() below, where index |
| * is narrowed after replacement. |
| */ |
| bool is_index_32bit_stolen = false; |
| if (index == reg_64_to_32(stolen)) { |
| index = stolen; |
| is_index_32bit_stolen = true; |
| } |
| # endif |
| if (dst == stolen || scratch == stolen) |
| return false; |
| if (base == stolen) { |
| base = replace_stolen_reg(drcontext, bb, where, memref, dst, scratch, |
| scratch_used); |
| } else if (index == stolen) { |
| index = replace_stolen_reg(drcontext, bb, where, memref, dst, scratch, |
| scratch_used); |
| # ifdef AARCH64 |
| /* Narrow replaced index register if it was 32 bit stolen register |
| * before replace_stolen_reg() call. |
| */ |
| if (is_index_32bit_stolen) |
| index = reg_64_to_32(index); |
| # endif |
| } |
| if (index == REG_NULL && opnd_get_disp(memref) != 0) { |
| /* First try "add dst, base, #disp". */ |
| instr = IF_AARCHXX(negated ? INSTR_CREATE_sub(drcontext, opnd_create_reg(dst), |
| opnd_create_reg(base), |
| OPND_CREATE_INT(disp)) |
| :) |
| XINST_CREATE_add_2src(drcontext, opnd_create_reg(dst), |
| opnd_create_reg(base), OPND_CREATE_INT(disp)); |
| # define MAX_ADD_IMM_DISP (1 << 12) |
| if (IF_ARM_ELSE(instr_is_encoding_possible(instr), disp < MAX_ADD_IMM_DISP)) { |
| PRE(bb, where, instr); |
| return true; |
| } |
| # undef MAX_ADD_IMM_DISP |
| instr_destroy(drcontext, instr); |
| /* The memref may have a disp that cannot be directly encoded into an |
| * add_imm instr, so we use movw to put disp into the scratch instead |
| * and fake it as an index reg to insert an add instr later. |
| */ |
| /* if dst is used in memref, we use scratch instead */ |
| index = (base == dst) ? scratch : dst; |
| if (scratch_used != NULL && index == scratch) |
| *scratch_used = true; |
| PRE(bb, where, |
| XINST_CREATE_load_int(drcontext, opnd_create_reg(index), |
| OPND_CREATE_INT(disp))); |
| /* "add" instr is inserted below with a fake index reg added here */ |
| } |
| if (index != REG_NULL) { |
| # ifdef ARM |
| uint amount; |
| dr_shift_type_t shift = opnd_get_index_shift(memref, &amount); |
| instr = negated |
| ? INSTR_CREATE_sub_shimm(drcontext, opnd_create_reg(dst), |
| opnd_create_reg(base), opnd_create_reg(index), |
| OPND_CREATE_INT(shift), OPND_CREATE_INT(amount)) |
| : INSTR_CREATE_add_shimm(drcontext, opnd_create_reg(dst), |
| opnd_create_reg(base), opnd_create_reg(index), |
| OPND_CREATE_INT(shift), OPND_CREATE_INT(amount)); |
| PRE(bb, where, instr); |
| # elif defined(AARCH64) |
| uint amount; |
| dr_extend_type_t extend = opnd_get_index_extend(memref, NULL, &amount); |
| instr = negated |
| ? INSTR_CREATE_sub_extend(drcontext, opnd_create_reg(dst), |
| opnd_create_reg(base), opnd_create_reg(index), |
| OPND_CREATE_INT(extend), |
| OPND_CREATE_INT(amount)) |
| : INSTR_CREATE_add_extend(drcontext, opnd_create_reg(dst), |
| opnd_create_reg(base), opnd_create_reg(index), |
| OPND_CREATE_INT(extend), |
| OPND_CREATE_INT(amount)); |
| PRE(bb, where, instr); |
| # else /* RISCV64 */ |
| ASSERT(false, |
| "Unreachable, there is no base + index addressing mode in RISC-V."); |
| # endif /* AARCHXX/RISCV64 */ |
| } else if (base != dst) { |
| PRE(bb, where, |
| XINST_CREATE_move(drcontext, opnd_create_reg(dst), |
| opnd_create_reg(base))); |
| } |
| } |
| return true; |
| } |
| #endif /* X86/AARCHXX/RISCV64 */ |
| |
| DR_EXPORT |
| uint |
| drutil_opnd_mem_size_in_bytes(opnd_t memref, instr_t *inst) |
| { |
| #ifdef X86 |
| if (inst != NULL && instr_get_opcode(inst) == OP_enter) { |
| uint extra_pushes = (uint)opnd_get_immed_int(instr_get_src(inst, 1)); |
| uint sz = opnd_size_in_bytes(opnd_get_size(instr_get_dst(inst, 1))); |
| ASSERT(opnd_is_immed_int(instr_get_src(inst, 1)), "malformed OP_enter"); |
| return sz * extra_pushes; |
| } else if (inst != NULL && instr_is_xsave(inst)) { |
| /* See the doxygen docs. */ |
| switch (instr_get_opcode(inst)) { |
| case OP_xsave32: |
| case OP_xsave64: |
| case OP_xsaveopt32: |
| case OP_xsaveopt64: |
| case OP_xsavec32: |
| case OP_xsavec64: return drutil_xsave_area_size; break; |
| default: ASSERT(false, "unknown xsave opcode"); return 0; |
| } |
| } else |
| #endif /* X86 */ |
| return opnd_size_in_bytes(opnd_get_size(memref)); |
| } |
| |
| #ifdef X86 |
| static bool |
| opc_is_stringop_loop(uint opc) |
| { |
| return (opc == OP_rep_ins || opc == OP_rep_outs || opc == OP_rep_movs || |
| opc == OP_rep_stos || opc == OP_rep_lods || opc == OP_rep_cmps || |
| opc == OP_repne_cmps || opc == OP_rep_scas || opc == OP_repne_scas); |
| } |
| |
| static instr_t * |
| create_nonloop_stringop(void *drcontext, instr_t *inst) |
| { |
| instr_t *res; |
| int nsrc = instr_num_srcs(inst); |
| int ndst = instr_num_dsts(inst); |
| uint opc = instr_get_opcode(inst); |
| int i; |
| ASSERT(opc_is_stringop_loop(opc), "invalid param"); |
| switch (opc) { |
| case OP_rep_ins: |
| opc = OP_ins; |
| break; |
| ; |
| case OP_rep_outs: |
| opc = OP_outs; |
| break; |
| ; |
| case OP_rep_movs: |
| opc = OP_movs; |
| break; |
| ; |
| case OP_rep_stos: |
| opc = OP_stos; |
| break; |
| ; |
| case OP_rep_lods: |
| opc = OP_lods; |
| break; |
| ; |
| case OP_rep_cmps: |
| opc = OP_cmps; |
| break; |
| ; |
| case OP_repne_cmps: |
| opc = OP_cmps; |
| break; |
| ; |
| case OP_rep_scas: |
| opc = OP_scas; |
| break; |
| ; |
| case OP_repne_scas: |
| opc = OP_scas; |
| break; |
| ; |
| default: ASSERT(false, "not a stringop loop opcode"); return NULL; |
| } |
| res = instr_build(drcontext, opc, ndst - 1, nsrc - 1); |
| /* We assume xcx is last src and last dst */ |
| ASSERT(opnd_is_reg(instr_get_src(inst, nsrc - 1)) && |
| opnd_uses_reg(instr_get_src(inst, nsrc - 1), DR_REG_XCX), |
| "rep opnd order assumption violated"); |
| ASSERT(opnd_is_reg(instr_get_dst(inst, ndst - 1)) && |
| opnd_uses_reg(instr_get_dst(inst, ndst - 1), DR_REG_XCX), |
| "rep opnd order assumption violated"); |
| for (i = 0; i < nsrc - 1; i++) |
| instr_set_src(res, i, instr_get_src(inst, i)); |
| for (i = 0; i < ndst - 1; i++) |
| instr_set_dst(res, i, instr_get_dst(inst, i)); |
| instr_set_translation(res, instr_get_app_pc(inst)); |
| return res; |
| } |
| #endif /* X86 */ |
| |
| DR_EXPORT |
| bool |
| drutil_instr_is_stringop_loop(instr_t *inst) |
| { |
| #ifdef X86 |
| return opc_is_stringop_loop(instr_get_opcode(inst)); |
| #else |
| return false; |
| #endif |
| } |
| |
| DR_EXPORT |
| bool |
| drutil_expand_rep_string_ex(void *drcontext, instrlist_t *bb, bool *expanded DR_PARAM_OUT, |
| instr_t **stringop DR_PARAM_OUT) |
| { |
| #ifdef X86 |
| instr_t *inst, *next_inst, *first_app = NULL; |
| bool delete_rest = false; |
| uint opc; |
| #endif |
| |
| if (drmgr_current_bb_phase(drcontext) != DRMGR_PHASE_APP2APP) { |
| USAGE_ERROR("drutil_expand_rep_string* must be called from " |
| "drmgr's app2app phase"); |
| return false; |
| } |
| |
| #ifdef X86 |
| /* Make a rep string instr be its own bb: the loop is going to |
| * duplicate the tail anyway, and have to terminate at the added cbr. |
| */ |
| for (inst = instrlist_first(bb); inst != NULL; inst = next_inst) { |
| next_inst = instr_get_next(inst); |
| if (delete_rest) { |
| instrlist_remove(bb, inst); |
| instr_destroy(drcontext, inst); |
| } else if (instr_is_app(inst)) { |
| /* We have to handle meta instrs, as drwrap_replace_native() and |
| * some other app2app xforms use them. |
| */ |
| if (first_app == NULL) |
| first_app = inst; |
| opc = instr_get_opcode(inst); |
| if (opc_is_stringop_loop(opc)) { |
| delete_rest = true; |
| if (inst != first_app) { |
| instrlist_remove(bb, inst); |
| instr_destroy(drcontext, inst); |
| } |
| } |
| } |
| } |
| |
| /* Convert to a regular loop if it's the sole instr */ |
| inst = first_app; |
| opc = (inst == NULL) ? OP_INVALID : instr_get_opcode(inst); |
| if (opc_is_stringop_loop(opc)) { |
| /* A rep string instr does check for 0 up front. DR limits us |
| * to 1 cbr but drmgr will mark the extras as meta later. If ecx is uninit |
| * the loop* will catch it so we're ok not instrumenting this. |
| * I would just jecxz to loop, but w/ instru it can't reach so |
| * I have to add yet more internal jmps that will execute each |
| * iter. We use drmgr's feature of allowing extra non-meta instrs. |
| * Our "mov $1,ecx" will remain non-meta. |
| * Note that we do not want any of the others to have xl8 as its |
| * translation as that could trigger duplicate clean calls from |
| * other passes looking for post-call or other addresses so we use |
| * xl8+1 which will always be mid-instr. NULL is another possibility, |
| * but it results in meta-may-fault instrs that need a translation |
| * and naturally want to use the app instr's translation. |
| * |
| * So we have: |
| * rep movs |
| * => |
| * jecxz zero |
| * jmp iter |
| * zero: |
| * mov $0x00000001 -> %ecx |
| * jmp pre_loop |
| * iter: |
| * movs %ds:(%esi) %esi %edi -> %es:(%edi) %esi %edi |
| * pre_loop: |
| * loop |
| * |
| * XXX: this non-linear code can complicate subsequent |
| * analysis routines. Perhaps we should consider splitting |
| * into multiple bbs? |
| * |
| * XXX i#1460: the jecxz is marked meta by drmgr (via i#676) and is |
| * thus not mangled by DR, resulting in just an 8-bit reach. |
| */ |
| app_pc xl8 = instr_get_app_pc(inst); |
| app_pc fake_xl8 = xl8 + 1; |
| opnd_t xcx = instr_get_dst(inst, instr_num_dsts(inst) - 1); |
| instr_t *loop, *pre_loop, *jecxz, *zero, *iter, *string; |
| ASSERT(opnd_uses_reg(xcx, DR_REG_XCX), "rep string opnd order mismatch"); |
| ASSERT(inst == instrlist_last(bb), "repstr not alone in bb"); |
| |
| emulated_instr_t emulated_instr; |
| emulated_instr.size = sizeof(emulated_instr); |
| emulated_instr.pc = xl8; |
| emulated_instr.instr = inst; |
| /* We can't place an end label after our conditional branch as DR won't |
| * allow anything past the branch (we explored relaxing that and ran into |
| * many complexities that were not worth further work), so we instead |
| * use the flag to mark the whole block as emulated. |
| */ |
| emulated_instr.flags = DR_EMULATE_REST_OF_BLOCK | |
| /* This is a different type of emulation where we want |
| * observational clients to look at the original instruction for instruction |
| * fetch info but the emulation sequence for data load/store info. We use |
| * this flag in emulated_instr_t to indicate this. |
| */ |
| DR_EMULATE_INSTR_ONLY; |
| drmgr_insert_emulation_start(drcontext, bb, inst, &emulated_instr); |
| |
| pre_loop = INSTR_CREATE_label(drcontext); |
| /* hack to handle loop decrementing xcx: simpler if could have 2 cbrs! */ |
| if (opnd_get_size(xcx) == OPSZ_8) { |
| /* rely on setting upper 32 bits to zero */ |
| zero = INSTR_CREATE_mov_imm(drcontext, opnd_create_reg(DR_REG_ECX), |
| OPND_CREATE_INT32(1)); |
| } else { |
| zero = INSTR_CREATE_mov_imm(drcontext, xcx, |
| opnd_create_immed_int(1, opnd_get_size(xcx))); |
| } |
| iter = INSTR_CREATE_label(drcontext); |
| |
| jecxz = INSTR_CREATE_jecxz(drcontext, opnd_create_instr(zero)); |
| /* be sure to match the same counter reg width */ |
| instr_set_src(jecxz, 1, xcx); |
| PREXL8(bb, inst, INSTR_XL8(jecxz, fake_xl8)); |
| PREXL8(bb, inst, |
| INSTR_XL8(INSTR_CREATE_jmp_short(drcontext, opnd_create_instr(iter)), |
| fake_xl8)); |
| PREXL8(bb, inst, INSTR_XL8(zero, fake_xl8)); |
| /* target the instrumentation for the loop, not loop itself */ |
| PREXL8(bb, inst, |
| INSTR_XL8(INSTR_CREATE_jmp(drcontext, opnd_create_instr(pre_loop)), |
| fake_xl8)); |
| PRE(bb, inst, iter); |
| |
| string = INSTR_XL8(create_nonloop_stringop(drcontext, inst), xl8); |
| if (stringop != NULL) |
| *stringop = string; |
| PREXL8(bb, inst, string); |
| |
| PRE(bb, inst, pre_loop); |
| if (opc == OP_rep_cmps || opc == OP_rep_scas) { |
| loop = INSTR_CREATE_loope(drcontext, opnd_create_pc(xl8)); |
| } else if (opc == OP_repne_cmps || opc == OP_repne_scas) { |
| loop = INSTR_CREATE_loopne(drcontext, opnd_create_pc(xl8)); |
| } else { |
| loop = INSTR_CREATE_loop(drcontext, opnd_create_pc(xl8)); |
| } |
| /* be sure to match the same counter reg width */ |
| instr_set_src(loop, 1, xcx); |
| instr_set_dst(loop, 0, xcx); |
| PREXL8(bb, inst, INSTR_XL8(loop, fake_xl8)); |
| |
| /* Now throw out the original instr. It is part of the emulation label |
| * and will be freed along with the instrlist so we just remove it from |
| * the list and do not free it ourselves. |
| */ |
| instrlist_remove(bb, inst); |
| |
| if (expanded != NULL) |
| *expanded = true; |
| return true; |
| } |
| #endif |
| |
| if (expanded != NULL) |
| *expanded = false; |
| if (stringop != NULL) |
| *stringop = NULL; |
| return true; |
| } |
| |
| DR_EXPORT |
| bool |
| drutil_expand_rep_string(void *drcontext, instrlist_t *bb) |
| { |
| return drutil_expand_rep_string_ex(drcontext, bb, NULL, NULL); |
| } |