| /* ********************************************************** |
| * Copyright (c) 2014-2021 Google, Inc. All rights reserved. |
| * Copyright (c) 2016 ARM Limited. All rights reserved. |
| * **********************************************************/ |
| |
| /* |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are met: |
| * |
| * * Redistributions of source code must retain the above copyright notice, |
| * this list of conditions and the following disclaimer. |
| * |
| * * Redistributions in binary form must reproduce the above copyright notice, |
| * this list of conditions and the following disclaimer in the documentation |
| * and/or other materials provided with the distribution. |
| * |
| * * Neither the name of ARM Limited nor the names of its contributors may be |
| * used to endorse or promote products derived from this software without |
| * specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| * ARE DISCLAIMED. IN NO EVENT SHALL ARM LIMITED OR CONTRIBUTORS BE LIABLE |
| * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
| * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
| * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH |
| * DAMAGE. |
| */ |
| |
| #include "../globals.h" |
| #include "arch.h" |
| #include "instr_create.h" |
| #include "instrument.h" /* instrlist_meta_preinsert */ |
| #include "../clean_call_opt.h" |
| #include "disassemble.h" |
| |
| /* Make code more readable by shortening long lines. |
| * We mark everything we add as non-app instr. |
| */ |
| #define POST instrlist_meta_postinsert |
| #define PRE instrlist_meta_preinsert |
| |
| /* For ARM and AArch64, we always use TLS and never use hardcoded |
| * dcontext (xref USE_SHARED_GENCODE_ALWAYS() and -private_ib_in_tls). |
| * Thus we use instr_create_{save_to,restore_from}_tls() directly. |
| */ |
| |
| #ifdef AARCH64 |
| /* Defined in aarch64.asm. */ |
| void |
| icache_op_ic_ivau_asm(void); |
| void |
| icache_op_isb_asm(void); |
| |
| typedef struct ALIGN_VAR(16) _icache_op_struct_t { |
| /* This flag is set if any icache lines have been invalidated. */ |
| unsigned int flag; |
| /* The lower half of the address of "lock" must be non-zero as we want to |
| * acquire the lock using only two free registers and STXR Ws, Wt, [Xn] |
| * requires s != t and s != n, so we use t == n. With this ordering of the |
| * members alignment guarantees that bit 2 of the address of "lock" is set. |
| */ |
| unsigned int lock; |
| /* The icache line size. This is discovered using the system register |
| * ctr_el0 and will be (1 << (2 + n)) with 0 <= n < 16. |
| */ |
| size_t linesize; |
| /* If these are equal then no icache lines have been invalidated. Otherwise |
| * they are both aligned to the icache line size and describe a set of |
| * consecutive icache lines (which could wrap around the top of memory). |
| */ |
| void *begin, *end; |
| /* Some space to spill registers. */ |
| ptr_uint_t spill[2]; |
| } icache_op_struct_t; |
| |
| /* Used in aarch64.asm. */ |
| icache_op_struct_t icache_op_struct; |
| #endif |
| |
| void |
| mangle_arch_init(void) |
| { |
| #ifdef AARCH64 |
| /* Check address of "lock" is unaligned. See comment in icache_op_struct_t. */ |
| ASSERT(!ALIGNED(&icache_op_struct.lock, 16)); |
| #endif |
| } |
| |
| void |
| insert_clear_eflags(dcontext_t *dcontext, clean_call_info_t *cci, instrlist_t *ilist, |
| instr_t *instr) |
| { |
| /* On ARM/AArch64 no known calling convention requires any of the |
| * flags to be zero on entry to a function, so there is nothing to do. |
| */ |
| } |
| |
| #ifdef AARCH64 |
| /* Maximum positive immediate offset for STP/LDP with 64 bit registers. */ |
| # define MAX_STP_OFFSET 504 |
| |
| /* Creates a memory reference for registers saved/restored to memory. */ |
| static opnd_t |
| create_base_disp_for_save_restore(uint base_reg, bool is_single_reg, bool is_gpr, |
| uint num_saved, callee_info_t *ci) |
| { |
| /* opzs depends on the kind of register and whether a single register or |
| * a pair of registers is saved/restored using stp/ldp. |
| */ |
| uint opsz; |
| if (is_gpr) { |
| if (is_single_reg) |
| opsz = OPSZ_8; |
| else |
| opsz = OPSZ_16; |
| } else { |
| if (is_single_reg) |
| opsz = OPSZ_16; |
| else |
| opsz = OPSZ_32; |
| } |
| |
| uint offset = num_saved * (is_gpr ? sizeof(reg_t) : sizeof(dr_simd_t)); |
| return opnd_create_base_disp(base_reg, DR_REG_NULL, 0, offset, opsz); |
| } |
| |
| static instr_t * |
| create_load_or_store_instr(dcontext_t *dcontext, reg_id_t reg, opnd_t mem, bool save) |
| { |
| if (save) { |
| return INSTR_CREATE_str(dcontext, mem, opnd_create_reg(reg)); |
| } |
| return INSTR_CREATE_ldr(dcontext, opnd_create_reg(reg), mem); |
| } |
| |
| /* Creates code to save or restore GPR or SIMD registers to memory starting at |
| * base_reg. Uses stp/ldp to save/restore as many register pairs to memory as possible |
| * and uses a single str/ldp for the last register in case the number of registers |
| * is odd. Optionally takes reg_skip into account. |
| */ |
| static void |
| insert_save_or_restore_registers(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, |
| bool *reg_skip, reg_id_t base_reg, reg_id_t first_reg, |
| bool save, bool is_gpr, |
| opnd_t (*get_mem_opnd)(uint base_reg, bool is_single_reg, |
| bool is_gpr, uint num_saved, |
| callee_info_t *ci), |
| callee_info_t *ci) |
| { |
| uint i, reg1 = UINT_MAX, num_regs = is_gpr ? 30 : 32; |
| uint saved_regs = 0; |
| instr_t *new_instr; |
| /* Use stp/ldp to save/restore as many register pairs to memory, skipping |
| * registers according to reg_skip. |
| */ |
| for (i = 0; i < num_regs; i += 1) { |
| if (reg_skip != NULL && reg_skip[i]) |
| continue; |
| |
| if (reg1 == UINT_MAX) |
| reg1 = i; |
| else { |
| opnd_t mem1 = |
| get_mem_opnd(base_reg, false /* is_single_reg */, is_gpr, |
| /* When creating save/restore instructions |
| * for inlining, we need the register id |
| * to compute the address. |
| */ |
| ci != NULL ? first_reg + (reg_id_t)reg1 : saved_regs, ci); |
| |
| uint disp = opnd_get_disp(mem1); |
| /* We cannot use STP/LDP if the immediate offset is too big. */ |
| if (disp > MAX_STP_OFFSET) { |
| PRE(ilist, instr, |
| create_load_or_store_instr(dcontext, first_reg + reg1, mem1, save)); |
| |
| opnd_t mem2 = |
| get_mem_opnd(base_reg, false /* is_single_reg */, is_gpr, |
| /* When creating save/restore instructions |
| * for inlining, we need the register id |
| * to compute the address. |
| */ |
| ci != NULL ? first_reg + (reg_id_t)i : saved_regs, ci); |
| |
| PRE(ilist, instr, |
| create_load_or_store_instr(dcontext, first_reg + i, mem2, save)); |
| } else { |
| if (save) { |
| new_instr = INSTR_CREATE_stp(dcontext, mem1, |
| opnd_create_reg(first_reg + reg1), |
| opnd_create_reg(first_reg + i)); |
| } else { |
| new_instr = |
| INSTR_CREATE_ldp(dcontext, opnd_create_reg(first_reg + reg1), |
| opnd_create_reg(first_reg + i), mem1); |
| } |
| PRE(ilist, instr, new_instr); |
| } |
| reg1 = UINT_MAX; |
| saved_regs += 2; |
| } |
| } |
| |
| /* Use str/ldr to save/restore last single register to memory if the number |
| * of registers to save/restore is odd. |
| */ |
| if (reg1 != UINT_MAX) { |
| opnd_t mem = |
| get_mem_opnd(base_reg, true /* is_single_reg */, is_gpr, |
| ci != NULL ? first_reg + (reg_id_t)reg1 : saved_regs, ci); |
| PRE(ilist, instr, |
| create_load_or_store_instr(dcontext, first_reg + reg1, mem, save)); |
| } |
| } |
| |
| static void |
| insert_save_registers(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, |
| bool *reg_skip, reg_id_t base_reg, reg_id_t first_reg, bool is_gpr) |
| { |
| insert_save_or_restore_registers(dcontext, ilist, instr, reg_skip, base_reg, |
| first_reg, true /* save */, is_gpr, |
| create_base_disp_for_save_restore, NULL); |
| } |
| |
| static void |
| insert_restore_registers(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, |
| bool *reg_skip, reg_id_t base_reg, reg_id_t first_reg, |
| bool is_gpr) |
| { |
| insert_save_or_restore_registers(dcontext, ilist, instr, reg_skip, base_reg, |
| first_reg, false /* restore */, is_gpr, |
| create_base_disp_for_save_restore, NULL); |
| } |
| |
| static opnd_t |
| inline_get_mem_opnd(uint base_reg, bool is_single_reg, bool is_gpr, uint reg_id, |
| callee_info_t *ci) |
| { |
| return callee_info_slot_opnd(ci, SLOT_REG, reg_id); |
| } |
| |
| void |
| insert_save_inline_registers(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, |
| bool *reg_skip, reg_id_t first_reg, bool is_gpr, void *ci) |
| { |
| insert_save_or_restore_registers(dcontext, ilist, instr, reg_skip, 0, first_reg, |
| true /* save */, is_gpr, inline_get_mem_opnd, |
| (callee_info_t *)ci); |
| } |
| |
| void |
| insert_restore_inline_registers(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, |
| bool *reg_skip, reg_id_t first_reg, bool is_gpr, void *ci) |
| { |
| insert_save_or_restore_registers(dcontext, ilist, instr, reg_skip, 0, first_reg, |
| false /* restore */, is_gpr, inline_get_mem_opnd, |
| (callee_info_t *)ci); |
| } |
| |
| #endif |
| |
| /* Pushes not only the GPRs but also simd regs, xip, and xflags, in |
| * priv_mcontext_t order. |
| * The current stack pointer alignment should be passed. Use 1 if |
| * unknown (NOT 0). |
| * Returns the amount of data pushed. Does NOT fix up the xsp value pushed |
| * to be the value prior to any pushes for x64 as no caller needs that |
| * currently (they all build a priv_mcontext_t and have to do further xsp |
| * fixups anyway). |
| * Does NOT push the app's value of the stolen register. |
| * If scratch is REG_NULL, spills a register for scratch space. |
| */ |
| uint |
| insert_push_all_registers(dcontext_t *dcontext, clean_call_info_t *cci, |
| instrlist_t *ilist, instr_t *instr, uint alignment, |
| opnd_t push_pc, |
| reg_id_t scratch /*optional*/ |
| _IF_AARCH64(bool out_of_line)) |
| { |
| uint dstack_offs = 0; |
| #ifdef AARCH64 |
| uint max_offs; |
| #endif |
| if (cci == NULL) |
| cci = &default_clean_call_info; |
| ASSERT(proc_num_simd_registers() == MCXT_NUM_SIMD_SLOTS); |
| if (cci->preserve_mcontext || cci->num_simd_skip != proc_num_simd_registers()) { |
| /* FIXME i#1551: once we add skipping of regs, need to keep shape here. |
| * Also, num_opmask_skip is not applicable to ARM/AArch64. |
| */ |
| } |
| /* FIXME i#1551: once we have cci->num_simd_skip, skip this if possible */ |
| #ifdef AARCH64 |
| /* X0 is used to hold the stack pointer. */ |
| cci->reg_skip[DR_REG_X0 - DR_REG_START_GPR] = false; |
| /* X1 and X2 are used to save and restore the status and control registers. */ |
| cci->reg_skip[DR_REG_X1 - DR_REG_START_GPR] = false; |
| cci->reg_skip[DR_REG_X2 - DR_REG_START_GPR] = false; |
| /* X11 is used to calculate the target address of the clean call. */ |
| cci->reg_skip[DR_REG_X11 - DR_REG_START_GPR] = false; |
| |
| max_offs = get_clean_call_switch_stack_size(); |
| |
| /* For out-of-line clean calls, the stack pointer is adjusted before jumping |
| * to this code. |
| */ |
| if (!out_of_line) { |
| /* sub sp, sp, #clean_call_switch_stack_size */ |
| PRE(ilist, instr, |
| XINST_CREATE_sub(dcontext, opnd_create_reg(DR_REG_SP), |
| OPND_CREATE_INT16(max_offs))); |
| } |
| |
| /* Push GPRs. */ |
| insert_save_registers(dcontext, ilist, instr, cci->reg_skip, DR_REG_SP, DR_REG_X0, |
| true /* is_gpr */); |
| |
| dstack_offs += 32 * XSP_SZ; |
| |
| /* mov x0, sp */ |
| PRE(ilist, instr, |
| XINST_CREATE_move(dcontext, opnd_create_reg(DR_REG_X0), |
| opnd_create_reg(DR_REG_SP))); |
| |
| /* For out-of-line clean calls, X30 is saved before jumping to this code, |
| * because it is used for the return address. |
| */ |
| if (!out_of_line) { |
| /* stp x30, x0, [sp, #x30_offset] */ |
| PRE(ilist, instr, |
| INSTR_CREATE_stp(dcontext, |
| opnd_create_base_disp(DR_REG_SP, DR_REG_NULL, 0, |
| REG_OFFSET(DR_REG_X30), OPSZ_16), |
| opnd_create_reg(DR_REG_X30), opnd_create_reg(DR_REG_X0))); |
| } |
| |
| /* add x0, x0, #dstack_offs */ |
| PRE(ilist, instr, |
| XINST_CREATE_add(dcontext, opnd_create_reg(DR_REG_X0), |
| OPND_CREATE_INT16(dstack_offs))); |
| |
| /* save the push_pc operand to the priv_mcontext_t.pc field */ |
| if (!(cci->skip_save_flags)) { |
| if (opnd_is_immed_int(push_pc)) { |
| PRE(ilist, instr, |
| XINST_CREATE_load_int(dcontext, opnd_create_reg(DR_REG_X1), push_pc)); |
| } else { |
| ASSERT(opnd_is_reg(push_pc)); |
| reg_id_t push_pc_reg = opnd_get_reg(push_pc); |
| /* push_pc opnd is already pushed on the stack */ |
| /* ldr x1, [sp, #push_pc_offset] */ |
| PRE(ilist, instr, |
| INSTR_CREATE_ldr(dcontext, opnd_create_reg(DR_REG_X1), |
| OPND_CREATE_MEM64(DR_REG_SP, REG_OFFSET(push_pc_reg)))); |
| } |
| |
| /* str x1, [sp, #dstack_offset] */ |
| PRE(ilist, instr, |
| INSTR_CREATE_str(dcontext, OPND_CREATE_MEM64(DR_REG_SP, dstack_offs), |
| opnd_create_reg(DR_REG_X1))); |
| } |
| |
| dstack_offs += XSP_SZ; |
| |
| /* Save flag values using x1, x2. */ |
| /* mrs x1, nzcv */ |
| PRE(ilist, instr, |
| INSTR_CREATE_mrs(dcontext, opnd_create_reg(DR_REG_X1), |
| opnd_create_reg(DR_REG_NZCV))); |
| /* mrs x2, fpcr */ |
| PRE(ilist, instr, |
| INSTR_CREATE_mrs(dcontext, opnd_create_reg(DR_REG_X2), |
| opnd_create_reg(DR_REG_FPCR))); |
| /* stp w1, w2, [x0, #8] */ |
| PRE(ilist, instr, |
| INSTR_CREATE_stp(dcontext, OPND_CREATE_MEM64(DR_REG_X0, 8), |
| opnd_create_reg(DR_REG_W1), opnd_create_reg(DR_REG_W2))); |
| |
| /* mrs x1, fpsr */ |
| PRE(ilist, instr, |
| INSTR_CREATE_mrs(dcontext, opnd_create_reg(DR_REG_X1), |
| opnd_create_reg(DR_REG_FPSR))); |
| /* str w1, [x0, #16] */ |
| PRE(ilist, instr, |
| INSTR_CREATE_str(dcontext, OPND_CREATE_MEM32(DR_REG_X0, 16), |
| opnd_create_reg(DR_REG_W1))); |
| |
| /* The three flag registers take 12 bytes. */ |
| dstack_offs += 12; |
| |
| /* The SIMD register data is 16-byte-aligned. */ |
| dstack_offs = ALIGN_FORWARD(dstack_offs, 16); |
| |
| /* add x0, x0, #(dstack_offs - prev_dstack_offs) */ |
| PRE(ilist, instr, |
| XINST_CREATE_add(dcontext, opnd_create_reg(DR_REG_X0), |
| OPND_CREATE_INT16(dstack_offs - 32 * XSP_SZ))); |
| |
| /* Push SIMD registers. */ |
| insert_save_registers(dcontext, ilist, instr, cci->simd_skip, DR_REG_X0, DR_REG_Q0, |
| false /* is_gpr */); |
| |
| dstack_offs += (proc_num_simd_registers() * sizeof(dr_simd_t)); |
| ASSERT(proc_num_simd_registers() == MCXT_NUM_SIMD_SLOTS); |
| |
| /* Restore the registers we used. */ |
| /* ldp x0, x1, [sp] */ |
| PRE(ilist, instr, |
| INSTR_CREATE_ldp(dcontext, opnd_create_reg(DR_REG_X0), opnd_create_reg(DR_REG_X1), |
| opnd_create_base_disp(DR_REG_SP, DR_REG_NULL, 0, 0, OPSZ_16))); |
| /* ldr x2, [sp, #x2_offset] */ |
| PRE(ilist, instr, |
| INSTR_CREATE_ldr(dcontext, opnd_create_reg(DR_REG_X2), |
| opnd_create_base_disp(DR_REG_SP, DR_REG_NULL, 0, |
| REG_OFFSET(DR_REG_X2), OPSZ_8))); |
| #else |
| |
| /* vstmdb always does writeback */ |
| PRE(ilist, instr, |
| INSTR_CREATE_vstmdb(dcontext, OPND_CREATE_MEMLIST(DR_REG_SP), SIMD_REG_LIST_LEN, |
| SIMD_REG_LIST_16_31)); |
| PRE(ilist, instr, |
| INSTR_CREATE_vstmdb(dcontext, OPND_CREATE_MEMLIST(DR_REG_SP), SIMD_REG_LIST_LEN, |
| SIMD_REG_LIST_0_15)); |
| |
| dstack_offs += proc_num_simd_registers() * sizeof(dr_simd_t); |
| ASSERT(proc_num_simd_registers() == MCXT_NUM_SIMD_SLOTS); |
| |
| /* pc and aflags */ |
| if (cci->skip_save_flags) { |
| /* even if we skip flag saves we want to keep mcontext shape */ |
| int offs_beyond_xmm = 2 * XSP_SZ; |
| dstack_offs += offs_beyond_xmm; |
| PRE(ilist, instr, |
| XINST_CREATE_sub(dcontext, opnd_create_reg(DR_REG_SP), |
| OPND_CREATE_INT(offs_beyond_xmm))); |
| } else { |
| uint slot = TLS_REG0_SLOT; |
| bool spill = scratch == REG_NULL; |
| if (spill) { |
| scratch = DR_REG_R0; |
| if (opnd_is_reg(push_pc) && opnd_get_reg(push_pc) == scratch) { |
| scratch = DR_REG_R1; |
| slot = TLS_REG1_SLOT; |
| } |
| } |
| /* XXX: actually, r0 was just used as scratch for swapping stack |
| * via dcontext, so an optimization opportunity exists to avoid |
| * that restore and the re-spill here. |
| */ |
| if (spill) |
| PRE(ilist, instr, instr_create_save_to_tls(dcontext, scratch, slot)); |
| PRE(ilist, instr, |
| INSTR_CREATE_mrs(dcontext, opnd_create_reg(scratch), |
| opnd_create_reg(DR_REG_CPSR))); |
| PRE(ilist, instr, INSTR_CREATE_push(dcontext, opnd_create_reg(scratch))); |
| dstack_offs += XSP_SZ; |
| if (opnd_is_immed_int(push_pc)) { |
| PRE(ilist, instr, |
| XINST_CREATE_load_int(dcontext, opnd_create_reg(scratch), push_pc)); |
| PRE(ilist, instr, INSTR_CREATE_push(dcontext, opnd_create_reg(scratch))); |
| } else { |
| ASSERT(opnd_is_reg(push_pc)); |
| PRE(ilist, instr, INSTR_CREATE_push(dcontext, push_pc)); |
| } |
| if (spill) |
| PRE(ilist, instr, instr_create_restore_from_tls(dcontext, scratch, slot)); |
| dstack_offs += XSP_SZ; |
| } |
| |
| /* We rely on dr_get_mcontext_priv() to fill in the app's stolen reg value |
| * and sp value. |
| */ |
| if (dr_get_isa_mode(dcontext) == DR_ISA_ARM_THUMB) { |
| /* We can't use sp with stm */ |
| PRE(ilist, instr, INSTR_CREATE_push(dcontext, opnd_create_reg(DR_REG_LR))); |
| /* We can't push sp w/ writeback, and in fact dr_get_mcontext() gets |
| * sp from the stack swap so we can leave this empty. |
| */ |
| PRE(ilist, instr, |
| XINST_CREATE_sub(dcontext, opnd_create_reg(DR_REG_SP), |
| OPND_CREATE_INT(XSP_SZ))); |
| PRE(ilist, instr, |
| INSTR_CREATE_stmdb_wb(dcontext, OPND_CREATE_MEMLIST(DR_REG_SP), |
| DR_REG_LIST_LENGTH_T32, DR_REG_LIST_T32)); |
| } else { |
| PRE(ilist, instr, |
| INSTR_CREATE_stmdb_wb(dcontext, OPND_CREATE_MEMLIST(DR_REG_SP), |
| DR_REG_LIST_LENGTH_ARM, DR_REG_LIST_ARM)); |
| } |
| dstack_offs += 15 * XSP_SZ; |
| |
| /* Make dstack_offs 8-byte algined, as we only accounted for 17 4-byte slots. */ |
| dstack_offs += XSP_SZ; |
| ASSERT(cci->skip_save_flags || cci->num_simd_skip != 0 || cci->num_regs_skip != 0 || |
| dstack_offs == (uint)get_clean_call_switch_stack_size()); |
| #endif |
| return dstack_offs; |
| } |
| |
| /* User should pass the alignment from insert_push_all_registers: i.e., the |
| * alignment at the end of all the popping, not the alignment prior to |
| * the popping. |
| */ |
| void |
| insert_pop_all_registers(dcontext_t *dcontext, clean_call_info_t *cci, instrlist_t *ilist, |
| instr_t *instr, uint alignment _IF_AARCH64(bool out_of_line)) |
| { |
| if (cci == NULL) |
| cci = &default_clean_call_info; |
| #ifdef AARCH64 |
| uint current_offs; |
| /* mov x0, sp */ |
| PRE(ilist, instr, |
| XINST_CREATE_move(dcontext, opnd_create_reg(DR_REG_X0), |
| opnd_create_reg(DR_REG_SP))); |
| |
| current_offs = get_clean_call_switch_stack_size() - |
| proc_num_simd_registers() * sizeof(dr_simd_t); |
| ASSERT(proc_num_simd_registers() == MCXT_NUM_SIMD_SLOTS); |
| |
| /* add x0, x0, current_offs */ |
| PRE(ilist, instr, |
| XINST_CREATE_add(dcontext, opnd_create_reg(DR_REG_X0), |
| OPND_CREATE_INT32(current_offs))); |
| |
| /* Pop SIMD registers. */ |
| insert_restore_registers(dcontext, ilist, instr, cci->simd_skip, DR_REG_X0, DR_REG_Q0, |
| false /* is_gpr */); |
| |
| /* mov x0, sp */ |
| PRE(ilist, instr, |
| XINST_CREATE_move(dcontext, opnd_create_reg(DR_REG_X0), |
| opnd_create_reg(DR_REG_SP))); |
| |
| /* point x0 to push_pc field */ |
| current_offs = (32 * XSP_SZ); |
| |
| /* add x0, x0, #gpr_size */ |
| PRE(ilist, instr, |
| XINST_CREATE_add(dcontext, opnd_create_reg(DR_REG_X0), |
| OPND_CREATE_INT32(current_offs))); |
| |
| /* load pc and flags */ |
| if (!(cci->skip_save_flags)) { |
| /* ldp w1, w2, [x0, #8] */ |
| PRE(ilist, instr, |
| INSTR_CREATE_ldp(dcontext, opnd_create_reg(DR_REG_W1), |
| opnd_create_reg(DR_REG_W2), |
| OPND_CREATE_MEM64(DR_REG_X0, 8))); |
| /* msr nzcv, w1 */ |
| PRE(ilist, instr, |
| INSTR_CREATE_msr(dcontext, opnd_create_reg(DR_REG_NZCV), |
| opnd_create_reg(DR_REG_X1))); |
| /* msr fpcr, w2 */ |
| PRE(ilist, instr, |
| INSTR_CREATE_msr(dcontext, opnd_create_reg(DR_REG_FPCR), |
| opnd_create_reg(DR_REG_X2))); |
| |
| /* ldr w1, [x0, #16] */ |
| PRE(ilist, instr, |
| INSTR_CREATE_ldr(dcontext, opnd_create_reg(DR_REG_W1), |
| OPND_CREATE_MEM32(DR_REG_X0, 16))); |
| |
| /* msr fpsr, w1 */ |
| PRE(ilist, instr, |
| INSTR_CREATE_msr(dcontext, opnd_create_reg(DR_REG_FPSR), |
| opnd_create_reg(DR_REG_X1))); |
| } |
| |
| /* Pop GPRs */ |
| insert_restore_registers(dcontext, ilist, instr, cci->reg_skip, DR_REG_SP, DR_REG_X0, |
| true /* is_gpr */); |
| |
| /* For out-of-line clean calls, X30 is restored after jumping back from this |
| * code, because it is used for the return address. |
| */ |
| if (!out_of_line) { |
| /* Recover x30 */ |
| /* ldr w3, [x0, #16] */ |
| PRE(ilist, instr, |
| INSTR_CREATE_ldr(dcontext, opnd_create_reg(DR_REG_X30), |
| OPND_CREATE_MEM64(DR_REG_SP, REG_OFFSET(DR_REG_X30)))); |
| PRE(ilist, instr, |
| XINST_CREATE_add(dcontext, opnd_create_reg(DR_REG_SP), |
| OPND_CREATE_INT16(get_clean_call_switch_stack_size()))); |
| } |
| |
| #else |
| /* We rely on dr_set_mcontext_priv() to set the app's stolen reg value, |
| * and the stack swap to set the sp value: we assume the stolen reg on |
| * the stack still has our TLS base in it. |
| */ |
| /* We can't use sp with ldm for Thumb, and we don't want to write sp for ARM. */ |
| PRE(ilist, instr, |
| INSTR_CREATE_ldm_wb(dcontext, OPND_CREATE_MEMLIST(DR_REG_SP), |
| DR_REG_LIST_LENGTH_T32, DR_REG_LIST_T32)); |
| /* We don't want the sp value */ |
| PRE(ilist, instr, |
| XINST_CREATE_add(dcontext, opnd_create_reg(DR_REG_SP), OPND_CREATE_INT(XSP_SZ))); |
| PRE(ilist, instr, INSTR_CREATE_pop(dcontext, opnd_create_reg(DR_REG_LR))); |
| |
| /* pc and aflags */ |
| if (cci->skip_save_flags) { |
| /* even if we skip flag saves we still keep mcontext shape */ |
| int offs_beyond_xmm = 2 * XSP_SZ; |
| PRE(ilist, instr, |
| XINST_CREATE_add(dcontext, opnd_create_reg(DR_REG_SP), |
| OPND_CREATE_INT(offs_beyond_xmm))); |
| } else { |
| reg_id_t scratch = DR_REG_R0; |
| uint slot = TLS_REG0_SLOT; |
| /* just throw pc slot away */ |
| PRE(ilist, instr, |
| XINST_CREATE_add(dcontext, opnd_create_reg(DR_REG_SP), |
| OPND_CREATE_INT(XSP_SZ))); |
| PRE(ilist, instr, instr_create_save_to_tls(dcontext, scratch, slot)); |
| PRE(ilist, instr, INSTR_CREATE_pop(dcontext, opnd_create_reg(scratch))); |
| PRE(ilist, instr, |
| INSTR_CREATE_msr(dcontext, opnd_create_reg(DR_REG_CPSR), |
| OPND_CREATE_INT_MSR_NZCVQG(), opnd_create_reg(scratch))); |
| PRE(ilist, instr, instr_create_restore_from_tls(dcontext, scratch, slot)); |
| } |
| /* FIXME i#1551: once we have cci->num_simd_skip, skip this if possible */ |
| PRE(ilist, instr, |
| INSTR_CREATE_vldm_wb(dcontext, OPND_CREATE_MEMLIST(DR_REG_SP), SIMD_REG_LIST_LEN, |
| SIMD_REG_LIST_0_15)); |
| PRE(ilist, instr, |
| INSTR_CREATE_vldm_wb(dcontext, OPND_CREATE_MEMLIST(DR_REG_SP), SIMD_REG_LIST_LEN, |
| SIMD_REG_LIST_16_31)); |
| #endif |
| } |
| |
| #ifndef AARCH64 |
| reg_id_t |
| shrink_reg_for_param(reg_id_t regular, opnd_t arg) |
| { |
| return regular; |
| } |
| #endif /* !AARCH64 */ |
| |
| /* Return true if opnd is a register, but not XSP, or immediate zero on AArch64. */ |
| static bool |
| opnd_is_reglike(opnd_t opnd) |
| { |
| return ((opnd_is_reg(opnd) && opnd_get_reg(opnd) != DR_REG_XSP) |
| IF_X64(|| (opnd_is_immed_int(opnd) && opnd_get_immed_int(opnd) == 0))); |
| } |
| |
| uint |
| insert_parameter_preparation(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, |
| bool clean_call, uint num_args, opnd_t *args) |
| { |
| uint num_regs = num_args < NUM_REGPARM ? num_args : NUM_REGPARM; |
| signed char regs[NUM_REGPARM]; |
| int usecount[NUM_REGPARM]; |
| ptr_int_t stack_inc = 0; |
| uint i, j; |
| |
| /* We expect every arg to be an immediate integer, a full-size register, |
| * or a simple memory reference (NYI). |
| */ |
| for (i = 0; i < num_args; i++) { |
| CLIENT_ASSERT(opnd_is_immed_int((args[i])) || |
| (opnd_is_reg(args[i]) && |
| reg_get_size(opnd_get_reg(args[i])) == OPSZ_PTR) || |
| opnd_is_base_disp(args[i]), |
| "insert_parameter_preparation: bad argument type"); |
| ASSERT_NOT_IMPLEMENTED(!opnd_is_base_disp(args[i])); /* FIXME i#2210 */ |
| } |
| |
| /* The strategy here is to first set up the arguments that can be set up |
| * without using a temporary register: stack arguments that are registers and |
| * register arguments that are not involved in a cycle. When this has been done, |
| * the value in the link register (LR) will be dead, so we can use LR as a |
| * temporary for setting up the remaining arguments. |
| */ |
| |
| /* Set up stack arguments that are registers (not SP) or zero (on AArch64). */ |
| if (num_args > NUM_REGPARM) { |
| uint n = num_args - NUM_REGPARM; |
| /* On both ARM and AArch64 the stack pointer is kept (2 * XSP_SZ)-aligned. */ |
| stack_inc = ALIGN_FORWARD(n, 2) * XSP_SZ; |
| #ifdef AARCH64 |
| for (i = 0; i < n; i += 2) { |
| opnd_t *arg0 = &args[NUM_REGPARM + i]; |
| opnd_t *arg1 = i + 1 < n ? &args[NUM_REGPARM + i + 1] : NULL; |
| if (i == 0) { |
| if (i + 1 < n && opnd_is_reglike(*arg1)) { |
| /* stp x(...), x(...), [sp, #-(stack_inc)]! */ |
| PRE(ilist, instr, |
| instr_create_2dst_4src( |
| dcontext, OP_stp, |
| opnd_create_base_disp(DR_REG_XSP, DR_REG_NULL, 0, -stack_inc, |
| OPSZ_16), |
| opnd_create_reg(DR_REG_XSP), |
| opnd_is_reg(*arg0) ? *arg0 : opnd_create_reg(DR_REG_XZR), |
| opnd_is_reg(*arg1) ? *arg1 : opnd_create_reg(DR_REG_XZR), |
| opnd_create_reg(DR_REG_XSP), |
| opnd_create_immed_int(-stack_inc, OPSZ_PTR))); |
| } else if (opnd_is_reglike(*arg0)) { |
| /* str x(...), [sp, #-(stack_inc)]! */ |
| PRE(ilist, instr, |
| instr_create_2dst_3src( |
| dcontext, OP_str, |
| opnd_create_base_disp(DR_REG_XSP, DR_REG_NULL, 0, -stack_inc, |
| OPSZ_PTR), |
| opnd_create_reg(DR_REG_XSP), |
| opnd_is_reg(*arg0) ? *arg0 : opnd_create_reg(DR_REG_XZR), |
| opnd_create_reg(DR_REG_XSP), |
| opnd_create_immed_int(-stack_inc, OPSZ_PTR))); |
| } else { |
| /* sub sp, sp, #(stack_inc) */ |
| PRE(ilist, instr, |
| INSTR_CREATE_sub(dcontext, opnd_create_reg(DR_REG_XSP), |
| opnd_create_reg(DR_REG_XSP), |
| OPND_CREATE_INT32(stack_inc))); |
| } |
| } else if (opnd_is_reglike(*arg0)) { |
| if (i + 1 < n && opnd_is_reglike(*arg1)) { |
| /* stp x(...), x(...), [sp, #(i * XSP_SZ)] */ |
| PRE(ilist, instr, |
| instr_create_1dst_2src( |
| dcontext, OP_stp, |
| opnd_create_base_disp(DR_REG_XSP, DR_REG_NULL, 0, i * XSP_SZ, |
| OPSZ_16), |
| opnd_is_reg(*arg0) ? *arg0 : opnd_create_reg(DR_REG_XZR), |
| opnd_is_reg(*arg1) ? *arg1 : opnd_create_reg(DR_REG_XZR))); |
| } else { |
| /* str x(...), [sp, #(i * XSP_SZ)] */ |
| PRE(ilist, instr, |
| instr_create_1dst_1src( |
| dcontext, OP_str, |
| opnd_create_base_disp(DR_REG_XSP, DR_REG_NULL, 0, i * XSP_SZ, |
| OPSZ_PTR), |
| opnd_is_reg(*arg0) ? *arg0 : opnd_create_reg(DR_REG_XZR))); |
| } |
| } else if (i + 1 < n && opnd_is_reglike(*arg1)) { |
| /* str x(...), [sp, #((i + 1) * XSP_SZ)] */ |
| PRE(ilist, instr, |
| instr_create_1dst_1src( |
| dcontext, OP_str, |
| opnd_create_base_disp(DR_REG_XSP, DR_REG_NULL, 0, |
| (i + 1) * XSP_SZ, OPSZ_PTR), |
| opnd_is_reg(*arg1) ? *arg1 : opnd_create_reg(DR_REG_XZR))); |
| } |
| } |
| #else /* ARM */ |
| /* XXX: We could use OP_stm here, but with lots of awkward corner cases. */ |
| PRE(ilist, instr, |
| INSTR_CREATE_sub(dcontext, opnd_create_reg(DR_REG_XSP), |
| opnd_create_reg(DR_REG_XSP), OPND_CREATE_INT32(stack_inc))); |
| for (i = 0; i < n; i++) { |
| opnd_t arg = args[NUM_REGPARM + i]; |
| if (opnd_is_reglike(arg)) { |
| /* str r(...), [sp, #(i * XSP_SZ)] */ |
| PRE(ilist, instr, |
| XINST_CREATE_store(dcontext, |
| opnd_create_base_disp(DR_REG_XSP, DR_REG_NULL, 0, |
| i * XSP_SZ, OPSZ_PTR), |
| arg)); |
| } |
| } |
| #endif |
| } |
| |
| /* Initialise regs[], which encodes the contents of parameter registers. |
| * A non-negative value x means d_r_regparms[x]; |
| * -1 means an immediate integer; |
| * -2 means a non-parameter register. |
| */ |
| for (i = 0; i < num_regs; i++) { |
| if (opnd_is_immed_int(args[i])) |
| regs[i] = -1; |
| else { |
| reg_id_t reg = opnd_get_reg(args[i]); |
| regs[i] = -2; |
| for (j = 0; j < NUM_REGPARM; j++) { |
| if (reg == d_r_regparms[j]) { |
| regs[i] = j; |
| break; |
| } |
| } |
| } |
| } |
| |
| /* Initialise usecount[]: how many other registers use the value in a reg. */ |
| for (i = 0; i < num_regs; i++) |
| usecount[i] = 0; |
| for (i = 0; i < num_regs; i++) { |
| if (regs[i] >= 0 && regs[i] != i) |
| ++usecount[regs[i]]; |
| } |
| |
| /* Set up register arguments that are not part of a cycle. */ |
| { |
| bool changed; |
| do { |
| changed = false; |
| for (i = 0; i < num_regs; i++) { |
| if (regs[i] == i || usecount[i] != 0) |
| continue; |
| if (regs[i] == -1) { |
| insert_mov_immed_ptrsz(dcontext, opnd_get_immed_int(args[i]), |
| opnd_create_reg(d_r_regparms[i]), ilist, instr, |
| NULL, NULL); |
| } else if (regs[i] == -2 && opnd_get_reg(args[i]) == DR_REG_XSP) { |
| /* XXX: We could record which register has been set to the SP to |
| * avoid repeating this load if several arguments are set to SP. |
| */ |
| insert_get_mcontext_base(dcontext, ilist, instr, d_r_regparms[i]); |
| PRE(ilist, instr, |
| instr_create_restore_from_dc_via_reg( |
| dcontext, d_r_regparms[i], d_r_regparms[i], XSP_OFFSET)); |
| } else { |
| PRE(ilist, instr, |
| XINST_CREATE_move(dcontext, opnd_create_reg(d_r_regparms[i]), |
| args[i])); |
| if (regs[i] != -2) |
| --usecount[regs[i]]; |
| } |
| regs[i] = i; |
| changed = true; |
| } |
| } while (changed); |
| } |
| |
| /* From now on it is safe to use LR as a temporary. */ |
| |
| /* Set up register arguments that are in cycles. A rotation of n values is |
| * realised with (n + 1) moves. |
| */ |
| for (;;) { |
| int first, tmp; |
| for (i = 0; i < num_regs; i++) { |
| if (regs[i] != i) |
| break; |
| } |
| if (i >= num_regs) |
| break; |
| first = i; |
| PRE(ilist, instr, |
| XINST_CREATE_move(dcontext, opnd_create_reg(DR_REG_LR), |
| opnd_create_reg(d_r_regparms[i]))); |
| do { |
| tmp = regs[i]; |
| ASSERT(0 <= tmp && tmp < num_regs); |
| PRE(ilist, instr, |
| XINST_CREATE_move(dcontext, opnd_create_reg(d_r_regparms[i]), |
| tmp == first ? opnd_create_reg(DR_REG_LR) |
| : opnd_create_reg(d_r_regparms[tmp]))); |
| regs[i] = i; |
| i = tmp; |
| } while (tmp != first); |
| } |
| |
| /* Set up stack arguments that are (non-zero) constants or SP. */ |
| for (i = NUM_REGPARM; i < num_args; i++) { |
| uint off = (i - NUM_REGPARM) * XSP_SZ; |
| opnd_t arg = args[i]; |
| if (!opnd_is_reglike(arg)) { |
| if (opnd_is_reg(arg)) { |
| ASSERT(opnd_get_reg(arg) == DR_REG_XSP); |
| insert_get_mcontext_base(dcontext, ilist, instr, DR_REG_LR); |
| PRE(ilist, instr, |
| instr_create_restore_from_dc_via_reg(dcontext, DR_REG_LR, DR_REG_LR, |
| XSP_OFFSET)); |
| } else { |
| ASSERT(opnd_is_immed_int(arg)); |
| insert_mov_immed_ptrsz(dcontext, opnd_get_immed_int(arg), |
| opnd_create_reg(DR_REG_LR), ilist, instr, NULL, |
| NULL); |
| } |
| PRE(ilist, instr, |
| XINST_CREATE_store( |
| dcontext, |
| opnd_create_base_disp(DR_REG_XSP, DR_REG_NULL, 0, off, OPSZ_PTR), |
| opnd_create_reg(DR_REG_LR))); |
| } |
| } |
| |
| return (uint)stack_inc; |
| } |
| |
| bool |
| insert_reachable_cti(dcontext_t *dcontext, instrlist_t *ilist, instr_t *where, |
| byte *encode_pc, byte *target, bool jmp, bool returns, bool precise, |
| reg_id_t scratch, instr_t **inlined_tgt_instr) |
| { |
| ASSERT(scratch != REG_NULL); /* required */ |
| /* load target into scratch register */ |
| insert_mov_immed_ptrsz(dcontext, |
| (ptr_int_t)PC_AS_JMP_TGT(dr_get_isa_mode(dcontext), target), |
| opnd_create_reg(scratch), ilist, where, NULL, NULL); |
| /* even if a call and not a jmp, we can skip this if it doesn't return */ |
| if (!jmp && returns) { |
| PRE(ilist, where, XINST_CREATE_call_reg(dcontext, opnd_create_reg(scratch))); |
| } else { |
| PRE(ilist, where, XINST_CREATE_jump_reg(dcontext, opnd_create_reg(scratch))); |
| } |
| return false /* an ind branch */; |
| } |
| |
| int |
| insert_out_of_line_context_switch(dcontext_t *dcontext, instrlist_t *ilist, |
| instr_t *instr, bool save, byte *encode_pc) |
| { |
| #ifdef AARCH64 |
| if (save) { |
| /* Reserve stack space to push the context. We do it here instead of |
| * in insert_push_all_registers, so we can save the original value |
| * of X30 on the stack before it is changed by the BL (branch & link) |
| * to the clean call save routine in the code cache. |
| * |
| * sub sp, sp, #clean_call_switch_stack_size |
| */ |
| PRE(ilist, instr, |
| XINST_CREATE_sub(dcontext, opnd_create_reg(DR_REG_SP), |
| OPND_CREATE_INT16(get_clean_call_switch_stack_size()))); |
| |
| /* str x30, [sp, #x30_offset] |
| * |
| * We have to save the original value of x30 before using BLR to jump |
| * to the save code, because BLR will modify x30. The original value of |
| * x30 is restored after the returning from the save/restore functions below. |
| */ |
| PRE(ilist, instr, |
| INSTR_CREATE_str(dcontext, |
| opnd_create_base_disp(DR_REG_SP, DR_REG_NULL, 0, |
| REG_OFFSET(DR_REG_X30), OPSZ_8), |
| opnd_create_reg(DR_REG_X30))); |
| } |
| |
| insert_mov_immed_ptrsz( |
| dcontext, |
| (long)(save ? get_clean_call_save(dcontext) : get_clean_call_restore(dcontext)), |
| opnd_create_reg(DR_REG_X30), ilist, instr, NULL, NULL); |
| PRE(ilist, instr, INSTR_CREATE_blr(dcontext, opnd_create_reg(DR_REG_X30))); |
| |
| /* Restore original value of X30, which was changed by BLR. |
| * |
| * ldr x30, [sp, #x30_offset] |
| */ |
| PRE(ilist, instr, |
| INSTR_CREATE_ldr(dcontext, opnd_create_reg(DR_REG_X30), |
| OPND_CREATE_MEM64(DR_REG_SP, REG_OFFSET(DR_REG_X30)))); |
| |
| if (!save) { |
| /* add sp, sp, #clean_call_switch_stack_size */ |
| PRE(ilist, instr, |
| XINST_CREATE_add(dcontext, opnd_create_reg(DR_REG_SP), |
| OPND_CREATE_INT16(get_clean_call_switch_stack_size()))); |
| } |
| |
| return get_clean_call_switch_stack_size(); |
| #else |
| ASSERT_NOT_IMPLEMENTED(false); /* FIXME i#1621: NYI on AArch32. */ |
| return 0; |
| #endif |
| } |
| |
| /*########################################################################### |
| *########################################################################### |
| * |
| * M A N G L I N G R O U T I N E S |
| */ |
| |
| /* forward declaration */ |
| static void |
| mangle_stolen_reg(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, |
| instr_t *next_instr, bool instr_to_be_removed); |
| |
| #ifndef AARCH64 |
| |
| /* i#1662 optimization: we try to pick the same scratch register during |
| * mangling to provide more opportunities for optimization, |
| * xref insert_save_to_tls_if_necessary(). |
| * |
| * Returns the prev reg restore instruction. |
| */ |
| static instr_t * |
| find_prior_scratch_reg_restore(dcontext_t *dcontext, instr_t *instr, reg_id_t *prior_reg) |
| { |
| instr_t *prev = instr_get_prev(instr); |
| bool tls, spill; |
| |
| ASSERT(prior_reg != NULL); |
| *prior_reg = REG_NULL; |
| if (INTERNAL_OPTION(opt_mangle) == 0) |
| return NULL; |
| while (prev != NULL && |
| /* We can eliminate the restore/respill pair only if they are executed |
| * together, so only our own mangling label instruction is allowed in |
| * between. |
| */ |
| instr_is_label(prev) && instr_is_our_mangling(prev)) |
| prev = instr_get_prev(prev); |
| if (prev != NULL && |
| instr_is_DR_reg_spill_or_restore(dcontext, prev, &tls, &spill, prior_reg, NULL)) { |
| if (tls && !spill && *prior_reg >= SCRATCH_REG0 && *prior_reg <= SCRATCH_REG_LAST) |
| return prev; |
| } |
| *prior_reg = REG_NULL; |
| return NULL; |
| } |
| |
| #endif /* !AARCH64 */ |
| |
| /* optimized spill: only if not immediately spilled already */ |
| static void |
| insert_save_to_tls_if_necessary(dcontext_t *dcontext, instrlist_t *ilist, instr_t *where, |
| reg_id_t reg, ushort slot) |
| { |
| #ifdef AARCH64 |
| /* FIXME i#1569: not yet optimized */ |
| PRE(ilist, where, instr_create_save_to_tls(dcontext, reg, slot)); |
| #else |
| instr_t *prev; |
| reg_id_t prior_reg; |
| DEBUG_DECLARE(bool tls;) |
| DEBUG_DECLARE(bool spill;) |
| |
| /* this routine is only called for non-mbr mangling */ |
| STATS_INC(non_mbr_spills); |
| prev = find_prior_scratch_reg_restore(dcontext, where, &prior_reg); |
| if (INTERNAL_OPTION(opt_mangle) > 0 && prev != NULL && prior_reg == reg) { |
| /* XXX: This looks only for dr_reg_stolen. */ |
| ASSERT(instr_is_DR_reg_spill_or_restore(dcontext, prev, &tls, &spill, &prior_reg, |
| NULL) && |
| tls && !spill && prior_reg == reg); |
| /* remove the redundant restore-spill pair */ |
| instrlist_remove(ilist, prev); |
| instr_destroy(dcontext, prev); |
| STATS_INC(non_mbr_respill_avoided); |
| } else { |
| PRE(ilist, where, instr_create_save_to_tls(dcontext, reg, slot)); |
| } |
| #endif |
| } |
| |
| #ifndef AARCH64 |
| |
| /* If instr is inside an IT block, removes it from the block and |
| * leaves it as an isolated (un-encodable) predicated instr, with any |
| * other instrs from the same block made to be legal on both sides by |
| * modifying and adding new OP_it instrs as necessary, which are marked |
| * as app instrs. |
| * Returns a new next_instr. |
| */ |
| static instr_t * |
| mangle_remove_from_it_block(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr) |
| { |
| instr_t *prev, *it; |
| uint prior, count; |
| if (instr_get_isa_mode(instr) != DR_ISA_ARM_THUMB || !instr_is_predicated(instr)) |
| return instr_get_next(instr); /* nothing to do */ |
| for (prior = 0, prev = instr_get_prev(instr); prev != NULL; |
| prior++, prev = instr_get_prev(prev)) { |
| if (instr_get_opcode(prev) == OP_it) |
| break; |
| ASSERT(instr_is_predicated(prev)); |
| } |
| ASSERT(prev != NULL); |
| it = prev; |
| count = instr_it_block_get_count(it); |
| ASSERT(count > prior && count <= IT_BLOCK_MAX_INSTRS); |
| if (prior > 0) { |
| instrlist_preinsert(ilist, it, |
| instr_it_block_create( |
| dcontext, instr_it_block_get_pred(it, 0), |
| prior > 1 ? instr_it_block_get_pred(it, 1) : DR_PRED_NONE, |
| prior > 2 ? instr_it_block_get_pred(it, 2) : DR_PRED_NONE, |
| DR_PRED_NONE)); |
| count -= prior; |
| } |
| count--; /* this instr */ |
| if (count > 0) { |
| instrlist_postinsert( |
| ilist, instr, |
| instr_it_block_create( |
| dcontext, instr_it_block_get_pred(it, prior + 1), |
| count > 1 ? instr_it_block_get_pred(it, prior + 2) : DR_PRED_NONE, |
| count > 2 ? instr_it_block_get_pred(it, prior + 3) : DR_PRED_NONE, |
| DR_PRED_NONE)); |
| } |
| /* It is now safe to remove the original OP_it instr */ |
| instrlist_remove(ilist, it); |
| instr_destroy(dcontext, it); |
| DOLOG(5, LOG_INTERP, { |
| LOG(THREAD, LOG_INTERP, 4, "bb ilist after removing from IT block:\n"); |
| instrlist_disassemble(dcontext, NULL, ilist, THREAD); |
| }); |
| return instr_get_next(instr); |
| } |
| |
| /* Adds enough OP_it instrs to ensure that each predicated instr in [start, end) |
| * (open-ended, so pass NULL to go to the final instr in ilist) is inside an IT |
| * block and is thus legally encodable. Marks the OP_it instrs as app instrs. |
| */ |
| int |
| reinstate_it_blocks(dcontext_t *dcontext, instrlist_t *ilist, instr_t *start, |
| instr_t *end) |
| { |
| instr_t *instr, *block_start = NULL; |
| app_pc block_xl8 = NULL; |
| int res = 0; |
| uint it_count = 0, block_count = 0; |
| dr_pred_type_t block_pred[IT_BLOCK_MAX_INSTRS]; |
| for (instr = start; instr != NULL && instr != end; instr = instr_get_next(instr)) { |
| bool instr_predicated = instr_is_predicated(instr) && |
| /* A label instruction may be used as a cti target, so we stop |
| * the IT block on label instructions. |
| */ |
| !instr_is_label(instr) && |
| /* Do not put OP_b exit cti into block: patch_branch can't handle */ |
| instr_get_opcode(instr) != OP_b && instr_get_opcode(instr) != OP_b_short; |
| if (block_start != NULL) { |
| bool matches = true; |
| ASSERT(block_count < IT_BLOCK_MAX_INSTRS); |
| if (instr_predicated) { |
| if (instr_get_predicate(instr) != block_pred[0] && |
| instr_get_predicate(instr) != instr_invert_predicate(block_pred[0])) |
| matches = false; |
| else |
| block_pred[block_count++] = instr_get_predicate(instr); |
| } |
| if (!matches || !instr_predicated || block_count == IT_BLOCK_MAX_INSTRS || |
| /* i#1702: a cti must end the IT-block */ |
| instr_is_cti(instr)) { |
| res++; |
| instrlist_preinsert( |
| ilist, block_start, |
| INSTR_XL8(instr_it_block_create( |
| dcontext, block_pred[0], |
| block_count > 1 ? block_pred[1] : DR_PRED_NONE, |
| block_count > 2 ? block_pred[2] : DR_PRED_NONE, |
| block_count > 3 ? block_pred[3] : DR_PRED_NONE), |
| block_xl8)); |
| block_start = NULL; |
| if (instr_predicated && matches) |
| continue; |
| } else |
| continue; |
| } |
| /* Skip existing IT blocks. |
| * XXX: merge w/ adjacent blocks. |
| */ |
| if (it_count > 0) |
| it_count--; |
| else if (instr_get_opcode(instr) == OP_it) |
| it_count = instr_it_block_get_count(instr); |
| else if (instr_predicated) { |
| instr_t *app; |
| block_start = instr; |
| block_pred[0] = instr_get_predicate(instr); |
| block_count = 1; |
| /* XXX i#1695: we want the xl8 to be the original app IT instr, if |
| * it existed, as using the first instr inside the block will not |
| * work on relocation. Should we insert labels to keep that info |
| * when we remove IT instrs? |
| */ |
| for (app = instr; app != NULL && instr_get_app_pc(app) == NULL; |
| app = instr_get_next(app)) |
| /*nothing*/; |
| if (app != NULL) |
| block_xl8 = instr_get_app_pc(app); |
| else |
| block_xl8 = NULL; |
| } |
| } |
| if (block_start != NULL) { |
| res++; |
| instrlist_preinsert(ilist, block_start, |
| INSTR_XL8(instr_it_block_create( |
| dcontext, block_pred[0], |
| block_count > 1 ? block_pred[1] : DR_PRED_NONE, |
| block_count > 2 ? block_pred[2] : DR_PRED_NONE, |
| block_count > 3 ? block_pred[3] : DR_PRED_NONE), |
| block_xl8)); |
| } |
| return res; |
| } |
| |
| static void |
| mangle_reinstate_it_blocks(dcontext_t *dcontext, instrlist_t *ilist, instr_t *start, |
| instr_t *end) |
| { |
| if (dr_get_isa_mode(dcontext) != DR_ISA_ARM_THUMB) |
| return; /* nothing to do */ |
| reinstate_it_blocks(dcontext, ilist, start, end); |
| DOLOG(5, LOG_INTERP, { |
| LOG(THREAD, LOG_INTERP, 4, "bb ilist after reinstating IT blocks:\n"); |
| instrlist_disassemble(dcontext, NULL, ilist, THREAD); |
| }); |
| } |
| |
| #endif /* !AARCH64 */ |
| |
| void |
| patch_mov_immed_arch(dcontext_t *dcontext, ptr_int_t val, byte *pc, instr_t *first, |
| instr_t *last) |
| { |
| ASSERT_NOT_IMPLEMENTED(false); /* FIXME i#1551, i#1569 */ |
| } |
| |
| /* Used for fault translation */ |
| bool |
| instr_check_xsp_mangling(dcontext_t *dcontext, instr_t *inst, int *xsp_adjust) |
| { |
| ASSERT(xsp_adjust != NULL); |
| /* No current ARM/AArch64 mangling splits an atomic push/pop into emulated pieces: |
| * the OP_ldm/OP_stm splits shouldn't need special translation handling. |
| */ |
| return false; |
| } |
| |
| void |
| mangle_syscall_arch(dcontext_t *dcontext, instrlist_t *ilist, uint flags, instr_t *instr, |
| instr_t *next_instr) |
| { |
| /* inlined conditional system call mangling is not supported */ |
| ASSERT(!instr_is_predicated(instr)); |
| |
| /* Shared routine already checked method, handled INSTR_NI_SYSCALL*, |
| * and inserted the signal barrier and non-auto-restart nop. |
| * If we get here, we're dealing with an ignorable syscall. |
| */ |
| |
| /* We assume that the stolen register will, in effect, be neither |
| * read nor written by a system call as it is above the highest |
| * register used for the syscall arguments or number. This assumption |
| * currently seems to be valid on arm/arm64 Linux, which only writes the |
| * return value (with system calls that return). When other kernels are |
| * supported it may be necessary to move the stolen register value to a |
| * safer register (one that is "callee-saved" and not used by the gateway |
| * mechanism) before the system call, and restore it afterwards. |
| */ |
| ASSERT(DR_REG_STOLEN_MIN > DR_REG_SYSNUM); |
| } |
| |
| #ifdef UNIX |
| /* Inserts code to handle clone into ilist. |
| * instr is the syscall instr itself. |
| * Assumes that instructions exist beyond instr in ilist. |
| */ |
| void |
| mangle_insert_clone_code(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr) |
| { |
| /* svc 0 |
| * cbnz r0, parent |
| * jmp new_thread_dynamo_start |
| * parent: |
| * <post system call, etc.> |
| */ |
| instr_t *in = instr_get_next(instr); |
| instr_t *parent = INSTR_CREATE_label(dcontext); |
| ASSERT(in != NULL); |
| PRE(ilist, in, |
| INSTR_CREATE_cbnz(dcontext, opnd_create_instr(parent), |
| opnd_create_reg(DR_REG_R0))); |
| insert_reachable_cti(dcontext, ilist, in, vmcode_get_start(), |
| (byte *)get_new_thread_start(dcontext), true /*jmp*/, |
| false /*!returns*/, false /*!precise*/, DR_REG_R0 /*scratch*/, |
| NULL); |
| instr_set_meta(instr_get_prev(in)); |
| PRE(ilist, in, parent); |
| } |
| #endif /* UNIX */ |
| |
| void |
| mangle_interrupt(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, |
| instr_t *next_instr) |
| { |
| ASSERT_NOT_IMPLEMENTED(false); /* FIXME i#1551, i#1569 */ |
| } |
| |
| #ifndef AARCH64 |
| |
| /* Adds a mov of the fall-through address into IBL_TARGET_REG, predicated |
| * with the inverse of instr's predicate. |
| * The caller must call mangle_reinstate_it_blocks() in Thumb mode afterward |
| * in order to make for legal encodings. |
| */ |
| static void |
| mangle_add_predicated_fall_through(dcontext_t *dcontext, instrlist_t *ilist, |
| instr_t *instr, instr_t *next_instr, |
| instr_t *mangle_start) |
| { |
| /* Our approach is to simply add a move-immediate of the fallthrough |
| * address under the inverted predicate. This is much simpler to |
| * implement than adding a new kind of indirect branch ("conditional |
| * indirect") and plumbing it through all the optimized emit and link |
| * code (in particular, cbr stub sharing and other complex features). |
| */ |
| dr_pred_type_t pred = instr_get_predicate(instr); |
| ptr_int_t fall_through = get_call_return_address(dcontext, ilist, instr); |
| instr_t *first, *last; |
| ASSERT(instr_is_predicated(instr)); /* caller should check */ |
| |
| /* Mark the taken mangling as predicated. We are starting after our r2 |
| * spill. It gets complex w/ interactions with mangle_stolen_reg() (b/c |
| * we aren't starting far enough back) so we bail for that. |
| * For mangle_pc_read(), we simply don't predicate the restore (b/c |
| * we aren't predicating the save). |
| */ |
| if (!instr_uses_reg(instr, dr_reg_stolen)) { |
| instr_t *prev = instr_get_next(mangle_start); |
| for (; prev != next_instr; prev = instr_get_next(prev)) { |
| if (instr_is_app(prev) || |
| !instr_is_DR_reg_spill_or_restore(dcontext, prev, NULL, NULL, NULL, NULL)) |
| instr_set_predicate(prev, pred); |
| } |
| } |
| |
| insert_mov_immed_ptrsz( |
| dcontext, |
| (ptr_int_t)PC_AS_JMP_TGT(instr_get_isa_mode(instr), (app_pc)fall_through), |
| opnd_create_reg(IBL_TARGET_REG), ilist, next_instr, &first, &last); |
| for (;; first = instr_get_next(first)) { |
| instr_set_predicate(first, instr_invert_predicate(pred)); |
| if (last == NULL || first == last) |
| break; |
| } |
| } |
| |
| static inline bool |
| app_instr_is_in_it_block(dcontext_t *dcontext, instr_t *instr) |
| { |
| ASSERT(instr_is_app(instr)); |
| return (instr_get_isa_mode(instr) == DR_ISA_ARM_THUMB && instr_is_predicated(instr)); |
| } |
| |
| #endif /* !AARCH64 */ |
| |
| instr_t * |
| mangle_direct_call(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, |
| instr_t *next_instr, bool mangle_calls, uint flags) |
| { |
| #ifdef AARCH64 |
| ptr_int_t target, retaddr; |
| |
| ASSERT(instr_get_opcode(instr) == OP_bl); |
| ASSERT(opnd_is_pc(instr_get_target(instr))); |
| target = (ptr_int_t)opnd_get_pc(instr_get_target(instr)); |
| retaddr = get_call_return_address(dcontext, ilist, instr); |
| insert_mov_immed_ptrsz(dcontext, retaddr, opnd_create_reg(DR_REG_X30), ilist, instr, |
| NULL, NULL); |
| instrlist_remove(ilist, instr); /* remove OP_bl */ |
| instr_destroy(dcontext, instr); |
| return next_instr; |
| #else |
| /* Strategy: replace OP_bl with 2-step mov immed into lr + OP_b */ |
| ptr_uint_t retaddr; |
| uint opc = instr_get_opcode(instr); |
| ptr_int_t target; |
| instr_t *first, *last; |
| bool in_it = app_instr_is_in_it_block(dcontext, instr); |
| instr_t *bound_start = INSTR_CREATE_label(dcontext); |
| if (in_it) { |
| /* split instr off from its IT block for easier mangling (we reinstate later) */ |
| next_instr = mangle_remove_from_it_block(dcontext, ilist, instr); |
| } |
| PRE(ilist, instr, bound_start); |
| ASSERT(opc == OP_bl || opc == OP_blx); |
| ASSERT(opnd_is_pc(instr_get_target(instr))); |
| target = (ptr_int_t)opnd_get_pc(instr_get_target(instr)); |
| retaddr = get_call_return_address(dcontext, ilist, instr); |
| insert_mov_immed_ptrsz( |
| dcontext, (ptr_int_t)PC_AS_JMP_TGT(instr_get_isa_mode(instr), (app_pc)retaddr), |
| opnd_create_reg(DR_REG_LR), ilist, instr, &first, &last); |
| if (opc == OP_bl) { |
| /* OP_blx predication is handled below */ |
| if (instr_is_predicated(instr)) { |
| for (;; first = instr_get_next(first)) { |
| instr_set_predicate(first, instr_get_predicate(instr)); |
| if (last == NULL || first == last) |
| break; |
| } |
| /* Add exit cti for taken direction b/c we're removing the OP_bl */ |
| instrlist_preinsert( |
| ilist, instr, |
| INSTR_PRED(XINST_CREATE_jump(dcontext, opnd_create_pc((app_pc)target)), |
| instr_get_predicate(instr))); |
| } |
| } else { |
| /* Unfortunately while there is OP_blx with an immed, OP_bx requires |
| * indirection through a register. We thus need to swap modes separately, |
| * but our ISA doesn't support mixing modes in one fragment, making |
| * a local "blx next_instr" not easy. We have two potential solutions: |
| * A) Implement far linking through stub's "ldr pc, [pc + 8]" and use |
| * it for blx. We need to implement that anyway for reachability, |
| * but as it's not implemented yet, I'm going w/ B) for now. |
| * B) Pretend this is an indirect branch and use the ibl. |
| * This is slower so XXX i#1612: switch to A once we have far links. |
| */ |
| if (instr_get_isa_mode(instr) == DR_ISA_ARM_A32) |
| target = (ptr_int_t)PC_AS_JMP_TGT(DR_ISA_ARM_THUMB, (app_pc)target); |
| PRE(ilist, instr, |
| instr_create_save_to_tls(dcontext, IBL_TARGET_REG, IBL_TARGET_SLOT)); |
| insert_mov_immed_ptrsz(dcontext, target, opnd_create_reg(IBL_TARGET_REG), ilist, |
| instr, NULL, NULL); |
| if (instr_is_predicated(instr)) { |
| mangle_add_predicated_fall_through(dcontext, ilist, instr, next_instr, |
| bound_start); |
| ASSERT(in_it || instr_get_isa_mode(instr) != DR_ISA_ARM_THUMB); |
| } |
| } |
| /* remove OP_bl (final added jmp already targets the callee) or OP_blx */ |
| instrlist_remove(ilist, instr); |
| instr_destroy(dcontext, instr); |
| if (in_it) |
| mangle_reinstate_it_blocks(dcontext, ilist, bound_start, next_instr); |
| return next_instr; |
| #endif |
| } |
| |
| instr_t * |
| mangle_indirect_call(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, |
| instr_t *next_instr, bool mangle_calls, uint flags) |
| { |
| #ifdef AARCH64 |
| ASSERT(instr_get_opcode(instr) == OP_blr); |
| PRE(ilist, instr, |
| instr_create_save_to_tls(dcontext, IBL_TARGET_REG, IBL_TARGET_SLOT)); |
| ASSERT(opnd_is_reg(instr_get_target(instr))); |
| if (opnd_same(instr_get_target(instr), opnd_create_reg(dr_reg_stolen))) { |
| /* if the target reg is dr_reg_stolen, the app value is in TLS */ |
| PRE(ilist, instr, |
| instr_create_restore_from_tls(dcontext, IBL_TARGET_REG, TLS_REG_STOLEN_SLOT)); |
| } else { |
| PRE(ilist, instr, |
| XINST_CREATE_move(dcontext, opnd_create_reg(IBL_TARGET_REG), |
| instr_get_target(instr))); |
| } |
| insert_mov_immed_ptrsz(dcontext, get_call_return_address(dcontext, ilist, instr), |
| opnd_create_reg(DR_REG_X30), ilist, next_instr, NULL, NULL); |
| instrlist_remove(ilist, instr); /* remove OP_blr */ |
| instr_destroy(dcontext, instr); |
| return next_instr; |
| #else |
| ptr_uint_t retaddr; |
| bool in_it = app_instr_is_in_it_block(dcontext, instr); |
| instr_t *bound_start = INSTR_CREATE_label(dcontext); |
| if (in_it) { |
| /* split instr off from its IT block for easier mangling (we reinstate later) */ |
| next_instr = mangle_remove_from_it_block(dcontext, ilist, instr); |
| } |
| PRE(ilist, instr, |
| instr_create_save_to_tls(dcontext, IBL_TARGET_REG, IBL_TARGET_SLOT)); |
| /* We need the spill to be unconditional so start pred processing here */ |
| PRE(ilist, instr, bound_start); |
| |
| if (!opnd_same(instr_get_target(instr), opnd_create_reg(IBL_TARGET_REG))) { |
| if (opnd_same(instr_get_target(instr), opnd_create_reg(dr_reg_stolen))) { |
| /* if the target reg is dr_reg_stolen, the app value is in TLS */ |
| PRE(ilist, instr, |
| instr_create_restore_from_tls(dcontext, IBL_TARGET_REG, |
| TLS_REG_STOLEN_SLOT)); |
| } else { |
| PRE(ilist, instr, |
| XINST_CREATE_move(dcontext, opnd_create_reg(IBL_TARGET_REG), |
| instr_get_target(instr))); |
| } |
| } |
| retaddr = get_call_return_address(dcontext, ilist, instr); |
| insert_mov_immed_ptrsz( |
| dcontext, (ptr_int_t)PC_AS_JMP_TGT(instr_get_isa_mode(instr), (app_pc)retaddr), |
| opnd_create_reg(DR_REG_LR), ilist, instr, NULL, NULL); |
| |
| if (instr_is_predicated(instr)) { |
| mangle_add_predicated_fall_through(dcontext, ilist, instr, next_instr, |
| bound_start); |
| ASSERT(in_it || instr_get_isa_mode(instr) != DR_ISA_ARM_THUMB); |
| } |
| /* remove OP_blx_ind (final added jmp already targets the callee) */ |
| instrlist_remove(ilist, instr); |
| instr_destroy(dcontext, instr); |
| if (in_it) |
| mangle_reinstate_it_blocks(dcontext, ilist, bound_start, next_instr); |
| return next_instr; |
| #endif |
| } |
| |
| void |
| mangle_return(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, |
| instr_t *next_instr, uint flags) |
| { |
| /* The mangling is identical */ |
| mangle_indirect_jump(dcontext, ilist, instr, next_instr, flags); |
| } |
| |
| instr_t * |
| mangle_indirect_jump(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, |
| instr_t *next_instr, uint flags) |
| { |
| #ifdef AARCH64 |
| ASSERT(instr_get_opcode(instr) == OP_br || instr_get_opcode(instr) == OP_ret); |
| PRE(ilist, instr, |
| instr_create_save_to_tls(dcontext, IBL_TARGET_REG, IBL_TARGET_SLOT)); |
| ASSERT(opnd_is_reg(instr_get_target(instr))); |
| if (opnd_same(instr_get_target(instr), opnd_create_reg(dr_reg_stolen))) { |
| /* if the target reg is dr_reg_stolen, the app value is in TLS */ |
| PRE(ilist, instr, |
| instr_create_restore_from_tls(dcontext, IBL_TARGET_REG, TLS_REG_STOLEN_SLOT)); |
| } else { |
| PRE(ilist, instr, |
| XINST_CREATE_move(dcontext, opnd_create_reg(IBL_TARGET_REG), |
| instr_get_target(instr))); |
| } |
| instrlist_remove(ilist, instr); /* remove OP_br or OP_ret */ |
| instr_destroy(dcontext, instr); |
| return next_instr; |
| #else |
| bool remove_instr = false; |
| int opc = instr_get_opcode(instr); |
| dr_isa_mode_t isa_mode = instr_get_isa_mode(instr); |
| bool in_it = app_instr_is_in_it_block(dcontext, instr); |
| instr_t *bound_start = INSTR_CREATE_label(dcontext); |
| if (in_it) { |
| /* split instr off from its IT block for easier mangling (we reinstate later) */ |
| next_instr = mangle_remove_from_it_block(dcontext, ilist, instr); |
| } |
| PRE(ilist, instr, |
| instr_create_save_to_tls(dcontext, IBL_TARGET_REG, IBL_TARGET_SLOT)); |
| /* We need the spill to be unconditional so start pred processing here */ |
| PRE(ilist, instr, bound_start); |
| /* Most gpr_list writes are handled by mangle_gpr_list_write() by extracting |
| * a single "ldr pc" instr out for mangling here, except simple instructions |
| * like "pop pc". Xref mangle_gpr_list_write() for details. |
| */ |
| if (instr_writes_gpr_list(instr)) { |
| opnd_t memop = instr_get_src(instr, 0); |
| /* must be simple cases like "pop pc" */ |
| ASSERT(opnd_is_base_disp(memop)); |
| ASSERT(opnd_get_reg(instr_get_dst(instr, 0)) == DR_REG_PC); |
| /* FIXME i#1551: on A32, ldm* can have only one reg in the reglist, |
| * i.e., "ldm r10, {pc}" is valid, so we should check dr_reg_stolen usage. |
| */ |
| ASSERT_NOT_IMPLEMENTED(!opnd_uses_reg(memop, dr_reg_stolen)); |
| opnd_set_size(&memop, OPSZ_VAR_REGLIST); |
| instr_set_src(instr, 0, memop); |
| instr_set_dst(instr, 0, opnd_create_reg(IBL_TARGET_REG)); |
| # ifdef CLIENT_INTERFACE |
| /* We target only the typical return instructions: multi-pop here */ |
| if (TEST(INSTR_CLOBBER_RETADDR, instr->flags) && opc == OP_ldmia) { |
| bool writeback = instr_num_srcs(instr) > 1; |
| if (writeback) { |
| opnd_set_disp(&memop, -sizeof(void *)); |
| opnd_set_size(&memop, OPSZ_PTR); |
| /* We do not support writing a passed-in value as it would require |
| * spilling another reg. We write the only non-retaddr-guaranteed |
| * reg we have, our stolen reg. |
| */ |
| POST(ilist, instr, |
| XINST_CREATE_store(dcontext, memop, opnd_create_reg(dr_reg_stolen))); |
| } /* else not a pop */ |
| } |
| # endif |
| } else if (opc == OP_bx || opc == OP_bxj) { |
| ASSERT(opnd_is_reg(instr_get_target(instr))); |
| if (opnd_same(instr_get_target(instr), opnd_create_reg(dr_reg_stolen))) { |
| /* if the target reg is dr_reg_stolen, the app value is in TLS */ |
| PRE(ilist, instr, |
| instr_create_restore_from_tls(dcontext, IBL_TARGET_REG, |
| TLS_REG_STOLEN_SLOT)); |
| } else { |
| PRE(ilist, instr, |
| XINST_CREATE_move(dcontext, opnd_create_reg(IBL_TARGET_REG), |
| instr_get_target(instr))); |
| } |
| /* remove the bx */ |
| remove_instr = true; |
| } else if (opc == OP_tbb || opc == OP_tbh) { |
| /* XXX: should we add add dr_insert_get_mbr_branch_target() for use |
| * internally and by clients? OP_tb{b,h} break our assumptions of the target |
| * simply being stored as an absolute address at the memory operand location. |
| * Instead, these are pc-relative: pc += memval*2. However, it's non-trivial |
| * to add that, as it requires duplicating all this mangling code. Really |
| * clients should use dr_insert_mbr_instrumentation(), and instr_get_target() |
| * isn't that useful for mbrs. |
| */ |
| ptr_int_t cur_pc = (ptr_int_t)decode_cur_pc( |
| instr_get_raw_bits(instr), instr_get_isa_mode(instr), opc, instr); |
| /* for case like tbh [pc, r10, lsl, #1] */ |
| if (instr_uses_reg(instr, dr_reg_stolen)) |
| mangle_stolen_reg(dcontext, ilist, instr, instr_get_next(instr), false); |
| |
| if (opc == OP_tbb) { |
| PRE(ilist, instr, |
| INSTR_CREATE_ldrb(dcontext, opnd_create_reg(IBL_TARGET_REG), |
| instr_get_src(instr, 0))); |
| } else { |
| PRE(ilist, instr, |
| INSTR_CREATE_ldrh(dcontext, opnd_create_reg(IBL_TARGET_REG), |
| instr_get_src(instr, 0))); |
| } |
| PRE(ilist, instr, |
| INSTR_CREATE_lsl(dcontext, opnd_create_reg(IBL_TARGET_REG), |
| opnd_create_reg(IBL_TARGET_REG), OPND_CREATE_INT(1))); |
| /* Rather than steal another register and using movw,movt to put the pc |
| * into it, we split the add up into 4 pieces. |
| * Even if the memref is pc-relative, this is still faster than sharing |
| * the pc from mangle_rel_addr() if we have mangle_rel_addr() use r2 |
| * as the scratch reg. |
| * XXX: arrange for that to happen, when we refactor the ind br vs PC |
| * and stolen reg mangling, if memref doesn't already use r2. |
| */ |
| if (opc == OP_tbb) { |
| /* One byte x2 won't touch the top half, so we use a movt to add: */ |
| PRE(ilist, instr, |
| INSTR_CREATE_movt(dcontext, opnd_create_reg(IBL_TARGET_REG), |
| OPND_CREATE_INT((cur_pc & 0xffff0000) >> 16))); |
| } else { |
| PRE(ilist, instr, |
| XINST_CREATE_add(dcontext, opnd_create_reg(IBL_TARGET_REG), |
| OPND_CREATE_INT(cur_pc & 0xff000000))); |
| PRE(ilist, instr, |
| XINST_CREATE_add(dcontext, opnd_create_reg(IBL_TARGET_REG), |
| OPND_CREATE_INT(cur_pc & 0x00ff0000))); |
| } |
| PRE(ilist, instr, |
| XINST_CREATE_add(dcontext, opnd_create_reg(IBL_TARGET_REG), |
| OPND_CREATE_INT(cur_pc & 0x0000ff00))); |
| PRE(ilist, instr, |
| XINST_CREATE_add(dcontext, opnd_create_reg(IBL_TARGET_REG), |
| /* These do not switch modes so we set LSB */ |
| OPND_CREATE_INT((cur_pc & 0x000000ff) | 0x1))); |
| /* remove the instr */ |
| remove_instr = true; |
| } else if (opc == OP_rfe || opc == OP_rfedb || opc == OP_rfeda || opc == OP_rfeib || |
| opc == OP_eret) { |
| /* FIXME i#1551: NYI on ARM */ |
| ASSERT_NOT_IMPLEMENTED(false); |
| } else { |
| /* Explicitly writes just the pc */ |
| uint i; |
| bool found_pc; |
| instr_t *immed_next = instr_get_next(instr); |
| /* XXX: can anything (non-OP_ldm) have r2 as an additional dst? */ |
| ASSERT_NOT_IMPLEMENTED( |
| !instr_writes_to_reg(instr, IBL_TARGET_REG, DR_QUERY_INCLUDE_ALL)); |
| for (i = 0; i < instr_num_dsts(instr); i++) { |
| if (opnd_is_reg(instr_get_dst(instr, i)) && |
| opnd_get_reg(instr_get_dst(instr, i)) == DR_REG_PC) { |
| found_pc = true; |
| instr_set_dst(instr, i, opnd_create_reg(IBL_TARGET_REG)); |
| break; |
| } |
| } |
| ASSERT(found_pc); |
| if (isa_mode == DR_ISA_ARM_THUMB && |
| (instr_get_opcode(instr) == OP_mov || instr_get_opcode(instr) == OP_add)) { |
| /* Some Thumb write-to-PC instructions (OP_add and OP_mov) are simple |
| * non-mode-changing branches, so we set LSB to 1. |
| */ |
| opnd_t src = opnd_create_reg(IBL_TARGET_REG); |
| if (instr_get_opcode(instr) == OP_mov && !instr_is_predicated(instr)) { |
| /* Optimization: we can replace the mov */ |
| src = instr_get_src(instr, 0); |
| remove_instr = true; |
| } |
| if (instr_get_opcode(instr) == OP_add) { |
| /* We need to add shift immeds: easiest to create a new add (i#1919) */ |
| PRE(ilist, instr, |
| INSTR_CREATE_add(dcontext, instr_get_dst(instr, 0), |
| instr_get_src(instr, 0), instr_get_src(instr, 1))); |
| remove_instr = true; |
| } |
| /* We want this before any mangle_rel_addr mangling */ |
| POST(ilist, instr, |
| INSTR_CREATE_orr(dcontext, opnd_create_reg(IBL_TARGET_REG), src, |
| OPND_CREATE_INT(1))); |
| } |
| if (instr_uses_reg(instr, dr_reg_stolen)) { |
| /* Stolen register mangling must happen after orr instr |
| * inserted above but before any mangle_rel_addr mangling. |
| */ |
| mangle_stolen_reg(dcontext, ilist, instr, immed_next, remove_instr); |
| } |
| # ifdef CLIENT_INTERFACE |
| /* We target only the typical return instructions: single pop here */ |
| if (TEST(INSTR_CLOBBER_RETADDR, instr->flags) && opc == OP_ldr) { |
| bool writeback = instr_num_srcs(instr) > 1; |
| if (writeback && opnd_is_immed_int(instr_get_src(instr, 1))) { |
| opnd_t memop = instr_get_src(instr, 0); |
| opnd_set_disp(&memop, -opnd_get_immed_int(instr_get_src(instr, 1))); |
| /* See above: we just write our stolen reg value */ |
| POST(ilist, instr, |
| XINST_CREATE_store(dcontext, memop, opnd_create_reg(dr_reg_stolen))); |
| } /* else not a pop */ |
| } |
| # endif |
| } |
| if (instr_is_predicated(instr)) { |
| mangle_add_predicated_fall_through(dcontext, ilist, instr, next_instr, |
| bound_start); |
| ASSERT(in_it || isa_mode != DR_ISA_ARM_THUMB); |
| } |
| if (remove_instr) { |
| instrlist_remove(ilist, instr); |
| instr_destroy(dcontext, instr); |
| } |
| if (in_it) |
| mangle_reinstate_it_blocks(dcontext, ilist, bound_start, next_instr); |
| return next_instr; |
| #endif |
| } |
| |
| /* Local single-instr-window scratch reg picker. Only considers r0-r5, so the caller |
| * must split up any GPR reg list first. Assumes we only care about instrs that read |
| * or write regs outside of r0-r5, so we'll only fail on instrs that can access 7 |
| * GPR's, and again caller should split those up. |
| * |
| * For some use case (e.g., mangle stolen reg), the scratch reg will be |
| * used across the app instr, so we cannot pick a dead reg. |
| * |
| * Returns REG_NULL if fail to find a scratch reg. |
| */ |
| static reg_id_t |
| pick_scratch_reg(dcontext_t *dcontext, instr_t *instr, reg_id_t do_not_pick_a, |
| reg_id_t do_not_pick_b, reg_id_t do_not_pick_c, bool dead_reg_ok, |
| ushort *scratch_slot OUT, bool *should_restore OUT) |
| { |
| reg_id_t reg; |
| ushort slot = 0; |
| if (should_restore != NULL) |
| *should_restore = true; |
| |
| #ifndef AARCH64 /* FIXME i#1569: not yet optimized */ |
| if (find_prior_scratch_reg_restore(dcontext, instr, ®) != NULL && |
| reg != REG_NULL && !instr_uses_reg(instr, reg) && |
| !reg_overlap(reg, do_not_pick_a) && !reg_overlap(reg, do_not_pick_b) && |
| !reg_overlap(reg, do_not_pick_c) && |
| /* Ensure no conflict in scratch regs for PC or stolen reg |
| * mangling vs ind br mangling. We can't just check for mbr b/c |
| * of OP_blx. |
| */ |
| (!instr_is_cti(instr) || reg != IBL_TARGET_REG)) { |
| ASSERT(reg >= SCRATCH_REG0 && reg <= SCRATCH_REG_LAST); |
| slot = TLS_REG0_SLOT + sizeof(reg_t) * (reg - SCRATCH_REG0); |
| DOLOG(4, LOG_INTERP, { |
| dcontext_t *dcontext = get_thread_private_dcontext(); |
| LOG(THREAD, LOG_INTERP, 4, "use last scratch reg %s\n", reg_names[reg]); |
| }); |
| } else |
| #endif |
| reg = REG_NULL; |
| |
| if (reg == REG_NULL) { |
| for (reg = SCRATCH_REG0, slot = TLS_REG0_SLOT; reg <= SCRATCH_REG_LAST; |
| reg++, slot += sizeof(reg_t)) { |
| if (!instr_uses_reg(instr, reg) && !reg_overlap(reg, do_not_pick_a) && |
| !reg_overlap(reg, do_not_pick_b) && !reg_overlap(reg, do_not_pick_c) && |
| /* not pick IBL_TARGET_REG if instr is a cti */ |
| (!instr_is_cti(instr) || reg != IBL_TARGET_REG)) |
| break; |
| } |
| } |
| /* We can only try to pick a dead register if the scratch reg usage |
| * allows so (e.g., not across the app instr). |
| */ |
| if (reg > SCRATCH_REG_LAST && dead_reg_ok) { |
| /* Likely OP_ldm. We'll have to pick a dead reg (non-ideal b/c a fault |
| * could come in: i#400). |
| */ |
| for (reg = SCRATCH_REG0, slot = TLS_REG0_SLOT; reg <= SCRATCH_REG_LAST; |
| reg++, slot += sizeof(reg_t)) { |
| if (!instr_reads_from_reg(instr, reg, DR_QUERY_INCLUDE_ALL) && |
| !reg_overlap(reg, do_not_pick_a) && !reg_overlap(reg, do_not_pick_b) && |
| !reg_overlap(reg, do_not_pick_c) && |
| /* Ensure no conflict vs ind br mangling */ |
| (!instr_is_cti(instr) || reg != IBL_TARGET_REG)) |
| break; |
| } |
| if (reg <= SCRATCH_REG_LAST && should_restore != NULL) |
| *should_restore = false; |
| } |
| if (reg > SCRATCH_REG_LAST) { |
| /* As a last resort, use a higher reg with a lower slot. This may seem |
| * confusing but the translation spill walk code handles it. |
| */ |
| for (reg = SCRATCH_REG0, slot = TLS_REG0_SLOT; reg <= SCRATCH_REG_LAST; |
| reg++, slot += sizeof(reg_t)) { |
| if (!reg_overlap(reg, do_not_pick_a) && !reg_overlap(reg, do_not_pick_b) && |
| !reg_overlap(reg, do_not_pick_c) && |
| /* not pick IBL_TARGET_REG if instr is a cti */ |
| (!instr_is_cti(instr) || reg != IBL_TARGET_REG)) |
| break; |
| } |
| /* We have a slot. Now pick a reg. */ |
| for (reg = SCRATCH_REG_LAST + 1; reg <= DR_REG_STOP_GPR; reg++) { |
| if (!instr_uses_reg(instr, reg) && !reg_overlap(reg, do_not_pick_a) && |
| !reg_overlap(reg, do_not_pick_b) && !reg_overlap(reg, do_not_pick_c) && |
| /* not pick IBL_TARGET_REG if instr is a cti */ |
| (!instr_is_cti(instr) || reg != IBL_TARGET_REG)) |
| break; |
| } |
| } |
| /* Only OP_stm could read all 6 of our scratch regs and also read or write |
| * the PC or stolen reg (OP_smlal{b,t}{b,t} can read 4 GPR's but not a 4th), |
| * and it's not allowed to have PC as a base reg (it's "unpredictable" at |
| * least). For stolen reg as base, we should split it up before calling here. |
| */ |
| if (reg > DR_REG_STOP_GPR) |
| reg = REG_NULL; |
| if (scratch_slot != NULL) |
| *scratch_slot = slot; |
| return reg; |
| } |
| |
| /* Should return NULL if it destroys "instr". */ |
| instr_t * |
| mangle_rel_addr(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, |
| instr_t *next_instr) |
| { |
| #ifdef AARCH64 |
| uint opc = instr_get_opcode(instr); |
| opnd_t dst = instr_get_dst(instr, 0); |
| opnd_t src = instr_get_src(instr, 0); |
| app_pc tgt; |
| ASSERT(opc == OP_adr || opc == OP_adrp || opc == OP_ldr || opc == OP_ldrsw); |
| ASSERT(instr_has_rel_addr_reference(instr)); |
| instr_get_rel_addr_target(instr, &tgt); |
| ASSERT(opnd_is_reg(dst)); |
| ASSERT(opnd_is_rel_addr(src)); |
| ASSERT(opnd_get_addr(src) == tgt); |
| |
| if (instr_uses_reg(instr, dr_reg_stolen)) { |
| dst = opnd_create_reg(reg_resize_to_opsz(DR_REG_X0, opnd_get_size(dst))); |
| PRE(ilist, next_instr, |
| instr_create_save_to_tls(dcontext, DR_REG_X0, TLS_REG0_SLOT)); |
| } |
| |
| if ((opc == OP_ldr || opc == OP_ldrsw) && reg_is_gpr(opnd_get_reg(dst))) { |
| reg_id_t xreg = reg_to_pointer_sized(opnd_get_reg(dst)); |
| insert_mov_immed_ptrsz(dcontext, (ptr_int_t)tgt, opnd_create_reg(xreg), ilist, |
| next_instr, NULL, NULL); |
| PRE(ilist, next_instr, |
| instr_create_1dst_1src( |
| dcontext, opc, dst, |
| opnd_create_base_disp(xreg, REG_NULL, 0, 0, opnd_get_size(src)))); |
| } else if (opc == OP_ldr) { |
| PRE(ilist, instr, instr_create_save_to_tls(dcontext, DR_REG_X0, TLS_REG0_SLOT)); |
| insert_mov_immed_ptrsz(dcontext, (ptr_int_t)tgt, opnd_create_reg(DR_REG_X0), |
| ilist, next_instr, NULL, NULL); |
| PRE(ilist, next_instr, |
| XINST_CREATE_load( |
| dcontext, dst, |
| opnd_create_base_disp(DR_REG_X0, REG_NULL, 0, 0, opnd_get_size(dst)))); |
| PRE(ilist, next_instr, |
| instr_create_restore_from_tls(dcontext, DR_REG_X0, TLS_REG0_SLOT)); |
| } else { |
| /* OP_adr, OP_adrp */ |
| insert_mov_immed_ptrsz(dcontext, (ptr_int_t)tgt, dst, ilist, next_instr, NULL, |
| NULL); |
| } |
| |
| if (instr_uses_reg(instr, dr_reg_stolen)) { |
| PRE(ilist, next_instr, |
| instr_create_save_to_tls(dcontext, DR_REG_X0, TLS_REG_STOLEN_SLOT)); |
| PRE(ilist, next_instr, |
| instr_create_restore_from_tls(dcontext, DR_REG_X0, TLS_REG0_SLOT)); |
| } |
| |
| instrlist_remove(ilist, instr); |
| instr_destroy(dcontext, instr); |
| return NULL; |
| #else |
| /* Compute the value of r15==pc for orig app instr */ |
| ptr_int_t r15 = |
| (ptr_int_t)decode_cur_pc(instr_get_raw_bits(instr), instr_get_isa_mode(instr), |
| instr_get_opcode(instr), instr); |
| opnd_t mem_op; |
| ushort slot; |
| bool should_restore; |
| reg_id_t reg = pick_scratch_reg(dcontext, instr, DR_REG_NULL, DR_REG_NULL, |
| DR_REG_NULL, true, &slot, &should_restore); |
| opnd_t new_op; |
| dr_shift_type_t shift_type; |
| uint shift_amt, disp; |
| bool store = instr_writes_memory(instr); |
| bool in_it = app_instr_is_in_it_block(dcontext, instr); |
| instr_t *bound_start = INSTR_CREATE_label(dcontext); |
| if (in_it) { |
| /* split instr off from its IT block for easier mangling (we reinstate later) */ |
| next_instr = mangle_remove_from_it_block(dcontext, ilist, instr); |
| } |
| PRE(ilist, instr, bound_start); |
| |
| ASSERT(instr_has_rel_addr_reference(instr)); |
| /* Manual says "unpredicatable" if PC is base of ldm/stm */ |
| ASSERT(!instr_reads_gpr_list(instr) && !instr_writes_gpr_list(instr)); |
| ASSERT(reg != REG_NULL); |
| if (store) { |
| mem_op = instr_get_dst(instr, 0); |
| } else { |
| mem_op = instr_get_src(instr, 0); |
| } |
| ASSERT(opnd_is_base_disp(mem_op)); |
| ASSERT(opnd_get_base(mem_op) == DR_REG_PC); |
| |
| disp = opnd_get_disp(mem_op); |
| /* For Thumb, there is a special-cased subtract from PC with a 12-bit immed that |
| * has no analogue with a non-PC base. |
| */ |
| if (instr_get_isa_mode(instr) == DR_ISA_ARM_THUMB && |
| TEST(DR_OPND_NEGATED, opnd_get_flags(mem_op)) && disp >= 256) { |
| /* Apply the disp now */ |
| r15 -= disp; |
| disp = 0; |
| } |
| |
| insert_save_to_tls_if_necessary(dcontext, ilist, instr, reg, slot); |
| insert_mov_immed_ptrsz(dcontext, r15, opnd_create_reg(reg), ilist, instr, NULL, NULL); |
| |
| shift_type = opnd_get_index_shift(mem_op, &shift_amt); |
| new_op = |
| opnd_create_base_disp_arm(reg, opnd_get_index(mem_op), shift_type, shift_amt, |
| disp, opnd_get_flags(mem_op), opnd_get_size(mem_op)); |
| if (store) { |
| instr_set_dst(instr, 0, new_op); |
| } else { |
| instr_set_src(instr, 0, new_op); |
| } |
| |
| if (should_restore) |
| PRE(ilist, next_instr, instr_create_restore_from_tls(dcontext, reg, slot)); |
| |
| if (in_it) { |
| /* XXX: we could mark our mangling as predicated in some cases, |
| * like mangle_add_predicated_fall_through() does. |
| */ |
| mangle_reinstate_it_blocks(dcontext, ilist, bound_start, next_instr); |
| } |
| return next_instr; |
| #endif |
| } |
| |
| #ifndef AARCH64 |
| |
| /* mangle simple pc read, pc read in gpr_list is handled in mangle_gpr_list_read */ |
| static void |
| mangle_pc_read(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, |
| instr_t *next_instr) |
| { |
| ushort slot; |
| bool should_restore; |
| reg_id_t reg = pick_scratch_reg(dcontext, instr, DR_REG_NULL, DR_REG_NULL, |
| DR_REG_NULL, true, &slot, &should_restore); |
| ptr_int_t app_r15 = |
| (ptr_int_t)decode_cur_pc(instr_get_raw_bits(instr), instr_get_isa_mode(instr), |
| instr_get_opcode(instr), instr); |
| int i; |
| |
| ASSERT(reg != REG_NULL); |
| ASSERT(!instr_is_meta(instr) && |
| instr_reads_from_reg(instr, DR_REG_PC, DR_QUERY_INCLUDE_ALL)); |
| |
| insert_save_to_tls_if_necessary(dcontext, ilist, instr, reg, slot); |
| insert_mov_immed_ptrsz(dcontext, app_r15, opnd_create_reg(reg), ilist, instr, NULL, |
| NULL); |
| for (i = 0; i < instr_num_srcs(instr); i++) { |
| if (opnd_uses_reg(instr_get_src(instr, i), DR_REG_PC)) { |
| /* A memref should have been mangled already in mangle_rel_addr */ |
| opnd_t orig = instr_get_src(instr, i); |
| ASSERT(opnd_is_reg(orig)); |
| instr_set_src( |
| instr, i, |
| opnd_create_reg_ex(reg, opnd_get_size(orig), opnd_get_flags(orig))); |
| } |
| } |
| if (should_restore) |
| PRE(ilist, next_instr, instr_create_restore_from_tls(dcontext, reg, slot)); |
| } |
| |
| #endif /* !AARCH64 */ |
| |
| /* save tls_base from dr_reg_stolen to reg and load app value to dr_reg_stolen */ |
| static void |
| restore_app_value_to_stolen_reg(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, |
| reg_id_t reg, ushort slot) |
| { |
| insert_save_to_tls_if_necessary(dcontext, ilist, instr, reg, slot); |
| /* This precise opcode (OP_orr) is checked for in instr_is_stolen_reg_move(). */ |
| PRE(ilist, instr, |
| XINST_CREATE_move(dcontext, opnd_create_reg(reg), |
| opnd_create_reg(dr_reg_stolen))); |
| /* We always read the app value to make sure we write back |
| * the correct value in the case of predicated execution. |
| */ |
| /* load the app value if the dr_reg_stolen might be read |
| * or it is not always be written. |
| */ |
| if (instr_reads_from_reg(instr, dr_reg_stolen, DR_QUERY_DEFAULT) || |
| !instr_writes_to_exact_reg(instr, dr_reg_stolen, DR_QUERY_DEFAULT)) { |
| PRE(ilist, instr, |
| instr_create_restore_from_tls(dcontext, dr_reg_stolen, TLS_REG_STOLEN_SLOT)); |
| } else { |
| DOLOG(4, LOG_INTERP, { |
| LOG(THREAD, LOG_INTERP, 4, "skip restore stolen reg app value for: "); |
| instr_disassemble(dcontext, instr, THREAD); |
| LOG(THREAD, LOG_INTERP, 4, "\n"); |
| }); |
| } |
| } |
| |
| /* store app value from dr_reg_stolen to slot if writback is true and |
| * restore tls_base from reg back to dr_reg_stolen |
| */ |
| static void |
| restore_tls_base_to_stolen_reg(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, |
| instr_t *next_instr, reg_id_t reg, ushort slot) |
| { |
| /* store app val back if it might be written */ |
| if (instr_writes_to_reg(instr, dr_reg_stolen, DR_QUERY_INCLUDE_COND_DSTS)) { |
| PRE(ilist, next_instr, |
| XINST_CREATE_store(dcontext, |
| opnd_create_base_disp(reg, REG_NULL, 0, |
| os_tls_offset(TLS_REG_STOLEN_SLOT), |
| OPSZ_PTR), |
| opnd_create_reg(dr_reg_stolen))); |
| } else { |
| DOLOG(4, LOG_INTERP, { |
| LOG(THREAD, LOG_INTERP, 4, "skip save stolen reg app value for: "); |
| instr_disassemble(dcontext, instr, THREAD); |
| LOG(THREAD, LOG_INTERP, 4, "\n"); |
| }); |
| } |
| /* restore stolen reg from spill reg */ |
| /* This precise opcode (OP_orr) is checked for in instr_is_stolen_reg_move(). */ |
| PRE(ilist, next_instr, |
| XINST_CREATE_move(dcontext, opnd_create_reg(dr_reg_stolen), |
| opnd_create_reg(reg))); |
| } |
| |
| /* XXX: merge with or refactor out old STEAL_REGISTER x86 code? */ |
| /* Mangle simple dr_reg_stolen access. |
| * dr_reg_stolen in gpr_list is handled in mangle_gpr_list_{read/write}. |
| * |
| * Because this routine switches the register that hold DR's TLS base, |
| * it should be called after all other mangling routines that perform |
| * reg save/restore. |
| */ |
| static void |
| mangle_stolen_reg(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, |
| instr_t *next_instr, bool instr_to_be_removed) |
| { |
| ushort slot; |
| bool should_restore; |
| reg_id_t tmp; |
| |
| /* Our stolen reg model is to expose to the client. We assume that any |
| * meta instrs using it are using it as TLS. |
| */ |
| ASSERT(!instr_is_meta(instr) && instr_uses_reg(instr, dr_reg_stolen)); |
| |
| #ifndef AARCH64 /* FIXME i#1569: recognise "move" on AArch64 */ |
| /* optimization, convert simple mov to ldr/str: |
| * - "mov r0 -> r10" ==> "str r0 -> [r10_slot]" |
| * - "mov r10 -> r0" ==> "ldr [r10_slot] -> r0" |
| */ |
| if (instr_get_opcode(instr) == OP_mov && opnd_is_reg(instr_get_src(instr, 0))) { |
| opnd_t opnd; |
| ASSERT(instr_num_srcs(instr) == 1 && instr_num_dsts(instr) == 1); |
| ASSERT(opnd_is_reg(instr_get_dst(instr, 0))); |
| /* mov rx -> rx, do nothing */ |
| if (opnd_same(instr_get_src(instr, 0), instr_get_dst(instr, 0))) |
| return; |
| /* this optimization changes the original instr, so it is only applied |
| * if instr_to_be_removed is false |
| */ |
| if (!instr_to_be_removed) { |
| opnd = opnd_create_tls_slot(os_tls_offset(TLS_REG_STOLEN_SLOT)); |
| if (opnd_get_reg(instr_get_src(instr, 0)) == dr_reg_stolen) { |
| /* mov r10 -> rx, convert to a ldr */ |
| instr_set_opcode(instr, OP_ldr); |
| instr_set_src(instr, 0, opnd); |
| return; |
| } else { |
| ASSERT(opnd_get_reg(instr_get_dst(instr, 0)) == dr_reg_stolen); |
| /* mov rx -> r10, convert to a str */ |
| instr_set_opcode(instr, OP_str); |
| instr_set_dst(instr, 0, opnd); |
| return; |
| } |
| ASSERT_NOT_REACHED(); |
| } |
| } |
| #endif |
| |
| /* move stolen reg value into tmp reg for app instr execution */ |
| tmp = pick_scratch_reg(dcontext, instr, DR_REG_NULL, DR_REG_NULL, DR_REG_NULL, false, |
| &slot, &should_restore); |
| ASSERT(tmp != REG_NULL); |
| restore_app_value_to_stolen_reg(dcontext, ilist, instr, tmp, slot); |
| |
| /* -- app instr executes here -- */ |
| |
| /* restore tls_base back to dr_reg_stolen */ |
| restore_tls_base_to_stolen_reg(dcontext, ilist, instr, next_instr, tmp, slot); |
| /* restore tmp if necessary */ |
| if (should_restore) |
| PRE(ilist, next_instr, instr_create_restore_from_tls(dcontext, tmp, slot)); |
| } |
| |
| /* replace thread register read instruction with a TLS load instr */ |
| instr_t * |
| mangle_reads_thread_register(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, |
| instr_t *next_instr) |
| { |
| #ifdef AARCH64 |
| reg_id_t reg = opnd_get_reg(instr_get_dst(instr, 0)); |
| ASSERT(instr->opcode == OP_mrs); |
| if (reg != dr_reg_stolen) { |
| PRE(ilist, instr, |
| instr_create_restore_from_tls(dcontext, reg, |
| os_get_app_tls_base_offset(TLS_REG_LIB))); |
| } else { |
| PRE(ilist, instr, instr_create_save_to_tls(dcontext, DR_REG_X0, TLS_REG0_SLOT)); |
| PRE(ilist, instr, |
| instr_create_restore_from_tls(dcontext, DR_REG_X0, |
| os_get_app_tls_base_offset(TLS_REG_LIB))); |
| PRE(ilist, instr, |
| instr_create_save_to_tls(dcontext, DR_REG_X0, TLS_REG_STOLEN_SLOT)); |
| PRE(ilist, instr, |
| instr_create_restore_from_tls(dcontext, DR_REG_X0, TLS_REG0_SLOT)); |
| } |
| instrlist_remove(ilist, instr); |
| instr_destroy(dcontext, instr); |
| return next_instr; |
| #else |
| opnd_t opnd; |
| reg_id_t reg; |
| bool in_it = app_instr_is_in_it_block(dcontext, instr); |
| instr_t *bound_start = INSTR_CREATE_label(dcontext); |
| if (in_it) { |
| /* split instr off from its IT block for easier mangling (we reinstate later) */ |
| next_instr = mangle_remove_from_it_block(dcontext, ilist, instr); |
| } |
| PRE(ilist, instr, bound_start); |
| ASSERT(!instr_is_meta(instr) && instr_reads_thread_register(instr)); |
| reg = opnd_get_reg(instr_get_dst(instr, 0)); |
| ASSERT(reg_is_gpr(reg) && opnd_get_size(instr_get_dst(instr, 0)) == OPSZ_PTR); |
| /* convert mrc to load */ |
| opnd = opnd_create_sized_tls_slot( |
| os_tls_offset(os_get_app_tls_base_offset(TLS_REG_LIB)), OPSZ_PTR); |
| instr_remove_srcs(dcontext, instr, 1, instr_num_srcs(instr)); |
| instr_set_src(instr, 0, opnd); |
| instr_set_opcode(instr, OP_ldr); |
| ASSERT(reg != DR_REG_PC); |
| /* special case: dst reg is dr_reg_stolen */ |
| if (reg == dr_reg_stolen) { |
| instr_t *immed_nexti; |
| /* we do not mangle r10 in [r10, disp], but need save r10 after execution, |
| * so we cannot use mangle_stolen_reg. |
| */ |
| insert_save_to_tls_if_necessary(dcontext, ilist, instr, SCRATCH_REG0, |
| TLS_REG0_SLOT); |
| PRE(ilist, instr, |
| INSTR_CREATE_mov(dcontext, opnd_create_reg(SCRATCH_REG0), |
| opnd_create_reg(dr_reg_stolen))); |
| |
| /* -- "ldr r10, [r10, disp]" executes here -- */ |
| |
| immed_nexti = instr_get_next(instr); |
| restore_tls_base_to_stolen_reg(dcontext, ilist, instr, immed_nexti, SCRATCH_REG0, |
| TLS_REG0_SLOT); |
| PRE(ilist, immed_nexti, |
| instr_create_restore_from_tls(dcontext, SCRATCH_REG0, TLS_REG0_SLOT)); |
| } |
| if (in_it) |
| mangle_reinstate_it_blocks(dcontext, ilist, bound_start, next_instr); |
| return next_instr; |
| #endif |
| } |
| |
| #ifdef AARCH64 |
| instr_t * |
| mangle_writes_thread_register(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, |
| instr_t *next_instr) |
| { |
| reg_id_t reg = opnd_get_reg(instr_get_src(instr, 0)); |
| ASSERT(instr->opcode == OP_msr); |
| if (reg != dr_reg_stolen) { |
| PRE(ilist, instr, |
| instr_create_save_to_tls(dcontext, reg, |
| os_get_app_tls_base_offset(TLS_REG_LIB))); |
| } else { |
| PRE(ilist, instr, instr_create_save_to_tls(dcontext, DR_REG_X0, TLS_REG0_SLOT)); |
| PRE(ilist, instr, |
| instr_create_restore_from_tls(dcontext, DR_REG_X0, TLS_REG_STOLEN_SLOT)); |
| PRE(ilist, instr, |
| instr_create_save_to_tls(dcontext, DR_REG_X0, |
| os_get_app_tls_base_offset(TLS_REG_LIB))); |
| PRE(ilist, instr, |
| instr_create_restore_from_tls(dcontext, DR_REG_X0, TLS_REG0_SLOT)); |
| } |
| instrlist_remove(ilist, instr); |
| instr_destroy(dcontext, instr); |
| return next_instr; |
| } |
| #endif |
| |
| #ifndef AARCH64 |
| |
| static void |
| store_reg_to_memlist(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, |
| instr_t *next_instr, |
| reg_id_t base_reg, /* reg holding memlist base */ |
| ushort app_val_slot, /* slot holding app value */ |
| reg_id_t tmp_reg, /* scratch reg */ |
| reg_id_t fix_reg, /* reg to be fixed up */ |
| uint fix_reg_idx) |
| { |
| bool writeback = instr_num_dsts(instr) > 1; |
| uint num_srcs = instr_num_srcs(instr); |
| int offs; |
| instr_t *store; |
| |
| switch (instr_get_opcode(instr)) { |
| case OP_stmia: |
| if (writeback) |
| offs = -((num_srcs - 1 /*writeback*/ - fix_reg_idx) * sizeof(reg_t)); |
| else |
| offs = fix_reg_idx * sizeof(reg_t); |
| break; |
| case OP_stmda: |
| if (writeback) |
| offs = (fix_reg_idx + 1) * sizeof(reg_t); |
| else |
| offs = -((num_srcs - fix_reg_idx - 1) * sizeof(reg_t)); |
| break; |
| case OP_stmdb: |
| if (writeback) |
| offs = fix_reg_idx * sizeof(reg_t); |
| else |
| offs = -((num_srcs - fix_reg_idx) * sizeof(reg_t)); |
| break; |
| case OP_stmib: |
| if (writeback) |
| offs = -((num_srcs - 1 /*writeback*/ - fix_reg_idx - 1) * sizeof(reg_t)); |
| else |
| offs = (fix_reg_idx + 1) * sizeof(reg_t); |
| break; |
| default: offs = 0; ASSERT_NOT_REACHED(); |
| } |
| |
| /* load proper value into spill reg */ |
| if (fix_reg == DR_REG_PC) { |
| ptr_int_t app_r15 = |
| (ptr_int_t)decode_cur_pc(instr_get_raw_bits(instr), instr_get_isa_mode(instr), |
| instr_get_opcode(instr), instr); |
| insert_mov_immed_ptrsz(dcontext, app_r15, opnd_create_reg(tmp_reg), ilist, |
| next_instr, NULL, NULL); |
| } else { |
| /* load from app_val_slot */ |
| PRE(ilist, next_instr, |
| instr_create_restore_from_tls(dcontext, tmp_reg, app_val_slot)); |
| } |
| |
| /* store to proper location */ |
| store = XINST_CREATE_store( |
| dcontext, opnd_create_base_disp(base_reg, REG_NULL, 0, offs, OPSZ_PTR), |
| opnd_create_reg(tmp_reg)); |
| /* we must use the same predicate to avoid crashing here when original didn't run */ |
| instr_set_predicate(store, instr_get_predicate(instr)); |
| /* app instr, not meta */ |
| instr_set_translation(store, instr_get_translation(instr)); |
| instrlist_preinsert(ilist, next_instr, store); |
| } |
| |
| /* mangle dr_reg_stolen or pc read in a reglist store (i.e., stm). |
| * Approach: fix up memory slot w/ app value after the store. |
| */ |
| static void |
| mangle_gpr_list_read(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, |
| instr_t *next_instr) |
| { |
| reg_id_t spill_regs[2] = { DR_REG_R0, DR_REG_R1 }; |
| reg_id_t spill_slots[2] = { TLS_REG0_SLOT, TLS_REG1_SLOT }; |
| /* regs that need fix up in the memory slots */ |
| reg_id_t fix_regs[2] = { DR_REG_PC, dr_reg_stolen }; |
| bool reg_found[2] = { false, false }; |
| uint reg_pos[2]; /* position of those fix_regs in reglist */ |
| uint i, j, num_srcs = instr_num_srcs(instr); |
| bool writeback = instr_num_dsts(instr) > 1; |
| bool stolen_reg_is_base = false; |
| opnd_t memop = instr_get_dst(instr, 0); |
| |
| ASSERT(dr_reg_stolen != spill_regs[0] && dr_reg_stolen != spill_regs[1]); |
| |
| /* check base reg */ |
| /* base reg cannot be PC, so could only be dr_reg_stolen */ |
| if (opnd_uses_reg(memop, dr_reg_stolen)) { |
| stolen_reg_is_base = true; |
| restore_app_value_to_stolen_reg(dcontext, ilist, instr, spill_regs[0], |
| spill_slots[0]); |
| /* We do not need fix up memory slot for dr_reg_stolen since it holds |
| * app value now, but we may need fix up the slot for spill_regs[0]. |
| */ |
| fix_regs[1] = spill_regs[0]; |
| } |
| |
| /* -- app instr executes here -- */ |
| |
| /* restore dr_reg_stolen if used as base */ |
| if (stolen_reg_is_base) { |
| ASSERT(fix_regs[1] == spill_regs[0]); |
| ASSERT(opnd_uses_reg(memop, dr_reg_stolen)); |
| /* restore dr_reg_stolen from spill_regs[0] */ |
| restore_tls_base_to_stolen_reg(dcontext, ilist, instr, |
| /* XXX: we must restore tls base right after instr |
| * for other TLS usage, so we use instr_get_next |
| * instead of next_instr. |
| */ |
| instr_get_next(instr), spill_regs[0], |
| spill_slots[0]); |
| /* do not restore spill_reg[0] as we may use it as scratch reg later */ |
| } |
| |
| /* fix up memory slot w/ app value after the store */ |
| for (i = 0; i < (writeback ? (num_srcs - 1) : num_srcs); i++) { |
| reg_id_t reg; |
| ASSERT(opnd_is_reg(instr_get_src(instr, i))); |
| reg = opnd_get_reg(instr_get_src(instr, i)); |
| for (j = 0; j < 2; j++) { |
| if (reg == fix_regs[j]) { |
| reg_found[j] = true; |
| reg_pos[j] = i; |
| } |
| } |
| } |
| |
| if (reg_found[0] || reg_found[1]) { |
| ushort app_val_slot; /* slot holding app value */ |
| reg_id_t base_reg; |
| reg_id_t scratch = spill_regs[1]; |
| if (stolen_reg_is_base) { |
| /* dr_reg_stolen is used as the base in the app, but it is holding |
| * TLS base, so we now put dr_reg_stolen app value into spill_regs[0] |
| * to use it as the base instead. |
| */ |
| ASSERT(fix_regs[1] == spill_regs[0]); |
| app_val_slot = spill_slots[0]; |
| base_reg = spill_regs[0]; |
| PRE(ilist, next_instr, |
| instr_create_restore_from_tls(dcontext, spill_regs[0], |
| TLS_REG_STOLEN_SLOT)); |
| } else { |
| ASSERT(fix_regs[1] == dr_reg_stolen); |
| app_val_slot = TLS_REG_STOLEN_SLOT; |
| base_reg = opnd_get_base(memop); |
| if (opnd_uses_reg(memop, scratch)) { |
| /* We know !stolen_reg_is_base so we can use r0 as scratch instead |
| * and not have any conflicts. We keep same TLS slot. |
| */ |
| scratch = spill_regs[0]; |
| } |
| } |
| ASSERT(!opnd_uses_reg(memop, scratch)); |
| |
| /* save spill reg */ |
| insert_save_to_tls_if_necessary(dcontext, ilist, next_instr, scratch, |
| spill_slots[1]); |
| |
| /* fixup the slot in memlist */ |
| for (i = 0; i < 2; i++) { |
| if (reg_found[i]) { |
| store_reg_to_memlist(dcontext, ilist, instr, next_instr, base_reg, |
| app_val_slot, scratch, fix_regs[i], reg_pos[i]); |
| } |
| } |
| |
| /* restore spill reg */ |
| PRE(ilist, next_instr, |
| instr_create_restore_from_tls(dcontext, scratch, spill_slots[1])); |
| } |
| |
| if (stolen_reg_is_base) { |
| ASSERT(fix_regs[1] == spill_regs[0]); |
| PRE(ilist, next_instr, |
| instr_create_restore_from_tls(dcontext, spill_regs[0], spill_slots[0])); |
| } |
| } |
| |
| /* We normalize a ldm{ia,ib,da,db} instruction to a sequence of instructions: |
| * 1. adjust base |
| * 2. ldr r0 [base] # optional split for getting a scratch reg |
| * 3. ldmia |
| * 4. adjust base |
| * 5. ldr pc [base, disp] |
| */ |
| static void |
| normalize_ldm_instr(dcontext_t *dcontext, instr_t *instr, /* ldm */ |
| instr_t **pre_ldm_adjust, instr_t **pre_ldm_ldr, |
| instr_t **post_ldm_adjust, instr_t **ldr_pc) |
| { |
| int opcode = instr_get_opcode(instr); |
| reg_id_t base = opnd_get_base(instr_get_src(instr, 0)); |
| bool writeback = instr_num_srcs(instr) > 1; |
| bool write_pc = instr_writes_to_reg(instr, DR_REG_PC, DR_QUERY_INCLUDE_ALL); |
| bool use_pop_pc = false; |
| uint num_dsts = instr_num_dsts(instr); |
| int memsz = sizeof(reg_t) * (writeback ? (num_dsts - 1) : num_dsts); |
| int adjust_pre = 0, adjust_post = 0, ldr_pc_disp = 0; |
| dr_pred_type_t pred = instr_get_predicate(instr); |
| app_pc pc = get_app_instr_xl8(instr); |
| |
| /* FIXME i#1551: NYI on case like "ldm r10, {r10, pc}": if base reg |
| * is clobbered, "ldr pc [base, disp]" will use wrong base value. |
| * It seems the only solution is load the target value first and store |
| * it into some TLS slot for later "ldr pc". |
| */ |
| ASSERT_NOT_IMPLEMENTED(!(write_pc && !writeback && |
| /* base reg is in the reglist */ |
| instr_writes_to_reg(instr, base, DR_QUERY_INCLUDE_ALL))); |
| |
| ASSERT(pre_ldm_adjust != NULL && pre_ldm_ldr != NULL && post_ldm_adjust != NULL && |
| ldr_pc != NULL); |
| *pre_ldm_adjust = NULL; |
| *pre_ldm_ldr = NULL; |
| *post_ldm_adjust = NULL; |
| *ldr_pc = NULL; |
| |
| if (opnd_get_reg(instr_get_dst(instr, 0)) == DR_REG_PC) { |
| /* special case like "pop pc" in T32.16, do nothing */ |
| ASSERT(write_pc && memsz == sizeof(reg_t)); |
| return; |
| } |
| |
| /* using an example to better understand the code below: |
| * - ldm{*} r0{!}, {r1-r4} ==> ldmia r0{!}, {r1-r4} |
| * - ldm{*} r0{!}, {r1-r3,pc} ==> ldmia r0{!}, {r1-r3,pc} |
| */ |
| switch (opcode) { |
| case OP_ldmia: |
| /* ldmia r0, {r1-r4}: r0: X->X, read [X, X+0x10) |
| * ldmia r0!, {r1-r4}: r0: X->X+0x10, read [X, X+0x10) |
| * ldmia r0, {r1-r3,pc}: r0: X->X, read [X, X+0xc), [X+0xc, X+0x10) |
| * ldmia r0!, {r1-r3,pc}: r0: X->X+0x10, read [X, X+0xc), [X+0xc, X+0x10) |
| */ |
| adjust_pre = 0; |
| if (write_pc) { |
| /* we take pc out of reglist, so need post ldm adjust if w/ writeback */ |
| if (writeback) { |
| /* use "pop pc" instead of "ldr pc" to avoid beyond TOS access */ |
| if (base == DR_REG_SP) { |
| use_pop_pc = true; |
| adjust_post = 0; |
| ldr_pc_disp = 0; |
| } else { |
| adjust_post = sizeof(reg_t); |
| ldr_pc_disp = -sizeof(reg_t); |
| } |
| } else { |
| adjust_post = 0; |
| ldr_pc_disp = memsz - sizeof(reg_t); |
| } |
| } else { |
| adjust_post = 0; |
| } |
| break; |
| case OP_ldmda: |
| /* ldmda r0, {r1-r4}: r0: X->X, read [X-0xc, X+0x4) |
| * ldmda r0!, {r1-r4}: r0: X->X-0x10, read [X-0xc, X+0x4) |
| * ldmda r0, {r1-r3,pc}: r0: X->X, read [X-0xc, X), [X, X+0x4) |
| * ldmda r0!, {r1-r3,pc}: r0: X->X-0x10, read [X-0xc, X), [X, X+0x4) |
| */ |
| adjust_pre = -memsz + sizeof(reg_t); |
| if (write_pc) { |
| if (writeback) { |
| adjust_post = -memsz; |
| ldr_pc_disp = memsz + sizeof(reg_t); |
| } else { |
| /* XXX: optimize, add writeback to skip post ldm adjust */ |
| adjust_post = -adjust_pre; |
| ldr_pc_disp = 0; |
| } |
| } else { |
| if (writeback) { |
| adjust_post = -memsz - sizeof(reg_t); |
| } else { |
| adjust_post = -adjust_pre; |
| } |
| } |
| break; |
| case OP_ldmdb: |
| /* ldmdb r0, {r1-r4}: r0: X->X, read [X-0x10, X) |
| * ldmdb r0!, {r1-r4}: r0: X->X-0x10, read [X-0x10, X) |
| * ldmdb r0, {r1-r3,pc}: r0: X->X, read [X-0x10, X-0x4), [X-0x4, X) |
| * ldmdb r0!, {r1-r3,pc}: r0: X->X-0x10, read [X-0x10, X-0x4), [X-0x4, X) |
| */ |
| adjust_pre = -memsz; |
| if (write_pc) { |
| if (writeback) { |
| adjust_post = -(memsz - sizeof(reg_t)); |
| ldr_pc_disp = memsz - sizeof(reg_t); |
| } else { |
| adjust_post = -adjust_pre; |
| ldr_pc_disp = -sizeof(reg_t); |
| } |
| } else { |
| if (writeback) { |
| /* XXX: optimize, remove writeback to avoid post ldm adjust */ |
| adjust_post = adjust_pre; |
| } else { |
| /* XXX: optimize, add writeback to avoid post ldm adjust */ |
| adjust_post = -adjust_pre; |
| } |
| } |
| break; |
| case OP_ldmib: |
| /* ldmib r0, {r1-r4}: r0: X->X, read [X+4, X+0x14) |
| * ldmib r0!, {r1-r4}: r0: X->X+0x10, read [X+4, X+0x14) |
| * ldmib r0, {r1-r3,pc}: r0: X->X, read [X+4, X+0x10), [X+0x10, X+0x14) |
| * ldmib r0!, {r1-r3,pc}: r0: X->X+0x10, read [X+4, X+0x10), [X+0x10, X+0x14) |
| */ |
| adjust_pre = sizeof(reg_t); |
| if (write_pc) { |
| if (writeback) { |
| adjust_post = 0; |
| ldr_pc_disp = 0; |
| } else { |
| adjust_post = -adjust_pre; |
| ldr_pc_disp = memsz; |
| } |
| } else { |
| if (writeback) |
| adjust_post = -sizeof(reg_t); |
| else |
| adjust_post = -adjust_pre; |
| } |
| break; |
| default: ASSERT_NOT_REACHED(); |
| } |
| |
| if (instr_uses_reg(instr, dr_reg_stolen) && |
| pick_scratch_reg(dcontext, instr, DR_REG_NULL, DR_REG_NULL, DR_REG_NULL, false, |
| NULL, NULL) == REG_NULL) { |
| /* We need split the ldm. |
| * We need a scratch reg from r0-r3, so by splitting the bottom reg we're |
| * guaranteed to get one. And since cti uses r2 it works out there. |
| */ |
| adjust_pre += sizeof(reg_t); |
| /* adjust base back if base won't be over-written, e.g.,: |
| * ldm (%r10)[16byte] -> %r0 %r1 %r2 %r3 |
| */ |
| if (!instr_writes_to_reg(instr, base, DR_QUERY_INCLUDE_ALL)) |
| adjust_post -= sizeof(reg_t); |
| /* pre_ldm_adjust makes sure that the base reg points to the start address of |
| * the ldmia memory, so we know the slot to be load is at [base, -4]. |
| */ |
| *pre_ldm_ldr = XINST_CREATE_load(dcontext, instr_get_dst(instr, 0), |
| OPND_CREATE_MEMPTR(base, -sizeof(reg_t))); |
| /* We remove the reg from reglist later after removing pc from reglist, |
| * so it won't mess up the index when removing pc. |
| */ |
| instr_set_predicate(*pre_ldm_ldr, pred); |
| instr_set_translation(*pre_ldm_ldr, pc); |
| } |
| |
| if (adjust_pre != 0) { |
| *pre_ldm_adjust = adjust_pre > 0 |
| ? XINST_CREATE_add(dcontext, opnd_create_reg(base), |
| OPND_CREATE_INT(adjust_pre)) |
| : XINST_CREATE_sub(dcontext, opnd_create_reg(base), |
| OPND_CREATE_INT(-adjust_pre)); |
| instr_set_predicate(*pre_ldm_adjust, pred); |
| instr_set_translation(*pre_ldm_adjust, pc); |
| } |
| |
| if (write_pc) { |
| instr_remove_dsts(dcontext, instr, writeback ? num_dsts - 2 : num_dsts - 1, |
| writeback ? num_dsts - 1 : num_dsts); |
| } |
| if (*pre_ldm_ldr != NULL) |
| instr_remove_dsts(dcontext, instr, 0, 1); |
| |
| /* check how many registers left in the reglist */ |
| ASSERT(instr_num_dsts(instr) != (writeback ? 1 : 0)); |
| if (instr_num_dsts(instr) == (writeback ? 2 : 1)) { |
| /* only one reg is left in the reglist, convert it to ldr */ |
| instr_set_opcode(instr, OP_ldr); |
| instr_set_src(instr, 0, OPND_CREATE_MEMPTR(base, 0)); |
| if (writeback) { |
| adjust_post += sizeof(reg_t); |
| instr_remove_srcs(dcontext, instr, 1, 2); |
| instr_remove_dsts(dcontext, instr, 1, 2); |
| } |
| } else { |
| instr_set_opcode(instr, OP_ldmia); |
| instr_set_src(instr, 0, OPND_CREATE_MEMLIST(base)); |
| } |
| |
| /* post ldm base register adjustment */ |
| if (!writeback && instr_writes_to_reg(instr, base, DR_QUERY_INCLUDE_ALL)) { |
| /* if the base reg is in the reglist, we do not need to post adjust */ |
| adjust_post = 0; |
| } |
| if (adjust_post != 0) { |
| *post_ldm_adjust = adjust_post > 0 |
| ? XINST_CREATE_add(dcontext, opnd_create_reg(base), |
| OPND_CREATE_INT(adjust_post)) |
| : XINST_CREATE_sub(dcontext, opnd_create_reg(base), |
| OPND_CREATE_INT(-adjust_post)); |
| instr_set_predicate(*post_ldm_adjust, pred); |
| instr_set_translation(*post_ldm_adjust, pc); |
| } |
| |
| /* post ldm load-pc */ |
| if (write_pc) { |
| if (use_pop_pc) { |
| ASSERT(ldr_pc_disp == 0 && base == DR_REG_SP && writeback); |
| /* we use pop_list to generate A32.T16 (2-byte) code in Thumb mode */ |
| *ldr_pc = INSTR_CREATE_pop_list(dcontext, 1, opnd_create_reg(DR_REG_PC)); |
| } else { |
| *ldr_pc = XINST_CREATE_load(dcontext, opnd_create_reg(DR_REG_PC), |
| OPND_CREATE_MEMPTR(base, ldr_pc_disp)); |
| } |
| instr_set_predicate(*ldr_pc, pred); |
| instr_set_translation(*ldr_pc, pc); |
| if (TEST(INSTR_CLOBBER_RETADDR, instr->flags)) |
| (*ldr_pc)->flags |= INSTR_CLOBBER_RETADDR; |
| } |
| } |
| |
| /* Mangling reglist write is complex: ldm{ia,ib,da,db} w/ and w/o writeback. |
| * One possible solution is to split the ldm into multiple ldm instructions. |
| * However it has several challenges, for examples: |
| * - we need additional base reg adjust instr for ldm w/o writeback |
| * as ldm does not have disp for the memlist, |
| * - we need different execution order of split-ldms for ldmia and ldmdb, |
| * - ldmib/ldmda add additional complexity, |
| * - we still need a "ldr pc" if it writes to pc |
| * - etc. |
| * |
| * Another solution is to convert them into a squence of ldr with base reg |
| * adjustments, which may cause large runtime overhead. |
| * |
| * Our approach is to convert any gpr_list write instrucition into five parts: |
| * 1. base reg adjustment |
| * 2. ldr r0 [base] # optional split for getting a scratch reg |
| * 3. ldmia base, {reglist} |
| * 4. base reg adjustment |
| * 5. ldr pc, [base, offset] |
| * and mangle each separately. |
| */ |
| static instr_t * |
| mangle_gpr_list_write(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, |
| instr_t *next_instr) |
| { |
| instr_t *pre_ldm_adjust, *pre_ldm_ldr, *post_ldm_adjust, *ldr_pc; |
| |
| ASSERT(!instr_is_meta(instr) && instr_writes_gpr_list(instr)); |
| |
| /* convert ldm{*} instr to a sequence of instructions */ |
| normalize_ldm_instr(dcontext, instr, &pre_ldm_adjust, &pre_ldm_ldr, &post_ldm_adjust, |
| &ldr_pc); |
| |
| /* pc cannot be used as the base in ldm, so now we only care dr_reg_stolen */ |
| if (pre_ldm_adjust != NULL) { |
| instrlist_preinsert(ilist, instr, pre_ldm_adjust); /* non-meta */ |
| if (instr_uses_reg(pre_ldm_adjust, dr_reg_stolen)) { |
| mangle_stolen_reg(dcontext, ilist, pre_ldm_adjust, |
| /* dr_reg_stolen must be restored right after */ |
| instr_get_next(pre_ldm_adjust), false); |
| } |
| } |
| if (pre_ldm_ldr != NULL) { |
| /* special case: ldm r0, {r0-rx}, separate ldr r0, [r0] clobbers base r0 */ |
| if (opnd_get_reg(instr_get_dst(pre_ldm_ldr, 0)) == SCRATCH_REG0 && |
| opnd_get_base(instr_get_src(pre_ldm_ldr, 0)) == SCRATCH_REG0) { |
| instr_t *mov; |
| /* save the r1 for possible context restore on signal */ |
| insert_save_to_tls_if_necessary(dcontext, ilist, instr, SCRATCH_REG1, |
| TLS_REG1_SLOT); |
| /* mov r0 => r1, */ |
| mov = XINST_CREATE_move(dcontext, opnd_create_reg(SCRATCH_REG1), |
| opnd_create_reg(SCRATCH_REG0)); |
| instr_set_predicate(mov, instr_get_predicate(instr)); |
| PRE(ilist, instr, mov); |
| /* We will only come to here iff instr is "ldm r0, {r0-rx}", |
| * otherwise we will be able to pick a scratch reg without split. |
| * Thus the first dst reg must be r1 after split and the base is r0. |
| * Now we change "ldm r0, {r1-rx}" to "ldm r1, {r1-rx}". |
| */ |
| ASSERT(opnd_get_reg(instr_get_dst(instr, 0)) == SCRATCH_REG1 && |
| opnd_get_base(instr_get_src(instr, 0)) == SCRATCH_REG0); |
| instr_set_src(instr, 0, OPND_CREATE_MEMLIST(SCRATCH_REG1)); |
| } |
| |
| instrlist_preinsert(ilist, instr, pre_ldm_ldr); /* non-meta */ |
| |
| if (instr_uses_reg(pre_ldm_ldr, dr_reg_stolen)) { |
| mangle_stolen_reg(dcontext, ilist, pre_ldm_ldr, |
| /* dr_reg_stolen must be restored right after */ |
| instr_get_next(pre_ldm_ldr), false); |
| } |
| } |
| |
| if (instr_uses_reg(instr, dr_reg_stolen)) { |
| /* dr_reg_stolen must be restored right after instr */ |
| mangle_stolen_reg(dcontext, ilist, instr, instr_get_next(instr), false); |
| } |
| |
| if (post_ldm_adjust != NULL) { |
| instrlist_preinsert(ilist, next_instr, post_ldm_adjust); |
| if (instr_uses_reg(post_ldm_adjust, dr_reg_stolen)) { |
| mangle_stolen_reg(dcontext, ilist, post_ldm_adjust, |
| /* dr_reg_stolen must be restored right after */ |
| instr_get_next(post_ldm_adjust), false); |
| } |
| } |
| |
| if (ldr_pc != NULL) { |
| /* we leave ldr_pc to mangle_indirect_jump */ |
| instrlist_preinsert(ilist, next_instr, ldr_pc); |
| next_instr = ldr_pc; |
| } |
| return next_instr; |
| } |
| |
| #endif /* !AARCH64 */ |
| |
| #ifdef AARCH64 |
| /* We mangle a conditional branch that uses the stolen register like this: |
| * |
| * cbz x28, target # x28 is stolen register |
| * => |
| * str x0, [x28] # spill x0 |
| * ldr x0, [x28, #32] # x28 in memory loaded to x0 |
| * cbnz x0, fall |
| * ldr x0, [x28] # restore x0 (original branch taken) |
| * b target |
| * fall: |
| * ldr x0, [x28] # restore x0 (original branch not taken) |
| * |
| * The CBNZ will need special handling when we decode from the cache for |
| * traces (i#1668). |
| */ |
| static void |
| mangle_cbr_stolen_reg(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, |
| instr_t *next_instr) |
| { |
| instr_t *fall = INSTR_CREATE_label(dcontext); |
| int opcode = instr_get_opcode(instr); |
| reg_id_t reg = DR_REG_X0; |
| ushort slot = TLS_REG0_SLOT; |
| opnd_t opnd; |
| |
| PRE(ilist, instr, instr_create_save_to_tls(dcontext, reg, slot)); |
| PRE(ilist, instr, instr_create_restore_from_tls(dcontext, reg, TLS_REG_STOLEN_SLOT)); |
| switch (opcode) { |
| case OP_cbnz: |
| case OP_cbz: |
| opnd = instr_get_src(instr, 1); |
| opnd = opnd_create_reg(reg_resize_to_opsz(reg, opnd_get_size(opnd))); |
| PRE(ilist, instr, |
| instr_create_0dst_2src(dcontext, (opcode == OP_cbz ? OP_cbnz : OP_cbz), |
| opnd_create_instr(fall), opnd)); |
| break; |
| case OP_tbnz: |
| case OP_tbz: |
| PRE(ilist, instr, |
| instr_create_0dst_3src(dcontext, (opcode == OP_tbz ? OP_tbnz : OP_tbz), |
| opnd_create_instr(fall), opnd_create_reg(reg), |
| instr_get_src(instr, 2))); |
| break; |
| default: ASSERT_NOT_REACHED(); |
| } |
| PRE(ilist, instr, instr_create_restore_from_tls(dcontext, reg, slot)); |
| |
| /* Replace original instruction with unconditional branch. */ |
| opnd = instr_get_src(instr, 0); |
| instr_reset(dcontext, instr); |
| instr_set_opcode(instr, OP_b); |
| instr_set_num_opnds(dcontext, instr, 0, 1); |
| instr_set_src(instr, 0, opnd); |
| instr_set_translation(instr, instrlist_get_translation_target(ilist)); |
| |
| PRE(ilist, next_instr, fall); |
| PRE(ilist, next_instr, instr_create_restore_from_tls(dcontext, reg, slot)); |
| } |
| #endif /* AARCH64 */ |
| |
| /* On ARM, we need mangle app instr accessing registers pc and dr_reg_stolen. |
| * We use this centralized mangling routine here to handle complex issues with |
| * more efficient mangling code. |
| */ |
| instr_t * |
| mangle_special_registers(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr, |
| instr_t *next_instr) |
| { |
| #ifdef AARCH64 |
| if (!instr_uses_reg(instr, dr_reg_stolen)) |
| return next_instr; |
| if (instr_is_cbr(instr)) |
| mangle_cbr_stolen_reg(dcontext, ilist, instr, instr_get_next(instr)); |
| else if (!instr_is_mbr(instr)) |
| mangle_stolen_reg(dcontext, ilist, instr, instr_get_next(instr), false); |
| return next_instr; |
| #else |
| bool finished = false; |
| bool in_it = |
| instr_get_isa_mode(instr) == DR_ISA_ARM_THUMB && instr_is_predicated(instr); |
| instr_t *bound_start = NULL, *bound_end = next_instr; |
| if (in_it) { |
| /* split instr off from its IT block for easier mangling (we reinstate later) */ |
| next_instr = mangle_remove_from_it_block(dcontext, ilist, instr); |
| /* We do NOT want the next_instr from mangle_gpr_list_write(), which can |
| * point at the split-off OP_ldr of pc: but we need to go past that. |
| */ |
| bound_end = next_instr; |
| bound_start = INSTR_CREATE_label(dcontext); |
| PRE(ilist, instr, bound_start); |
| } |
| |
| /* FIXME i#1551: for indirect branch mangling, we first mangle the instr here |
| * for possible pc read and dr_reg_stolen read/write, |
| * and leave pc write mangling later in mangle_indirect_jump, which is |
| * error-prone and inefficient. |
| * We should split the mangling and only mangle non-ind-branch instructions |
| * here and leave mbr instruction mangling to mangle_indirect_jump. |
| */ |
| /* special handling reglist read */ |
| if (instr_reads_gpr_list(instr)) { |
| mangle_gpr_list_read(dcontext, ilist, instr, next_instr); |
| finished = true; |
| } |
| |
| /* special handling reglist write */ |
| if (!finished && instr_writes_gpr_list(instr)) { |
| next_instr = mangle_gpr_list_write(dcontext, ilist, instr, next_instr); |
| finished = true; |
| } |
| |