| /* ********************************************************** |
| * Copyright (c) 2011-2013 Google, Inc. All rights reserved. |
| * Copyright (c) 2003-2010 VMware, Inc. All rights reserved. |
| * **********************************************************/ |
| |
| /* |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are met: |
| * |
| * * Redistributions of source code must retain the above copyright notice, |
| * this list of conditions and the following disclaimer. |
| * |
| * * Redistributions in binary form must reproduce the above copyright notice, |
| * this list of conditions and the following disclaimer in the documentation |
| * and/or other materials provided with the distribution. |
| * |
| * * Neither the name of VMware, Inc. nor the names of its contributors may be |
| * used to endorse or promote products derived from this software without |
| * specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| * ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE |
| * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
| * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
| * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH |
| * DAMAGE. |
| */ |
| |
| /* Copyright (c) 2003-2007 Determina Corp. */ |
| /* Copyright (c) 2003 Massachusetts Institute of Technology */ |
| |
| /* |
| * retcheck.c |
| * Routines for the RETURN_AFTER_CALL and CHECK_RETURNS_SSE2 security features. |
| * FIXME: Experimental. |
| */ |
| |
| #include "../globals.h" |
| #include "arch.h" |
| #include "instr.h" |
| #include "instr_create.h" |
| #include "instrlist.h" |
| #include "decode.h" |
| |
| #include "../link.h" /* for frag tag */ |
| #include "../fragment.h" |
| #include "../rct.h" |
| #include "instrument.h" /* for dr_insert_clean_call */ |
| |
| #ifdef CHECK_RETURNS_SSE2 |
| /* |
| * retcheck.c |
| * Routines for the CHECK_RETURNS_SSE2 security feature. |
| * |
| * UNFINISHED: |
| * There are two methods, one use a stack pointer the other a |
| * constant top of stack. Both can be optimized by using a |
| * shared routine to reduce code bloat. Need to evaluate an |
| * optimized form of both and decide which is better! |
| * Without shared code, the shift version is faster on gcc, |
| * crafty, vortex, but table version is actually faster on the others! |
| * |
| * Crashes on release build on some programs |
| * Stack ptr dies on eon & swim |
| * |
| * Need to provide asm code for win32 (currently #error) |
| */ |
| |
| #include <string.h> /* for memcpy */ |
| |
| /* we have two ways of keeping our stack in the xmm registers: |
| * use one of them as a stack pointer, or have a constant top of |
| * stack and always shift the registers. |
| */ |
| #define SSE2_USE_STACK_POINTER 0 |
| |
| /* keep mprotected stack in local or global heap? */ |
| #define USE_LOCAL_MPROT_STACK 0 |
| |
| #if SSE2_USE_STACK_POINTER /* stack pointer and jump table method */ |
| # include "../fragment.h" |
| # include "../link.h" |
| #endif |
| |
| /* make code more readable by shortening long lines */ |
| #define POST instrlist_postinsert |
| #define PRE instrlist_preinsert |
| |
| |
| /* UNFINISHED: |
| * start of code to have a shared routine for big table of sse2 instrs, |
| * to reduce code bloat. |
| * there is also code in arch.c and arch.h, under the same define |
| * (CHECK_RETURNS_SSE2_EMIT) |
| */ |
| #ifdef CHECK_RETURNS_SSE2_EMIT |
| /* in arch.c */ |
| cache_pc get_pextrw_entry(dcontext_t *dcontext); |
| cache_pc get_pinsrw_entry(dcontext_t *dcontext); |
| |
| byte * |
| emit_pextrw(dcontext_t *dcontext, byte *pc) |
| { |
| instrlist_t ilist; |
| |
| /* initialize the ilist */ |
| instrlist_init(&ilist); |
| |
| for (i=0; i<62; i++) { |
| instrlist_append(&ilist, |
| INSTR_CREATE_pinsrw(dcontext, opnd_create_reg(REG_START_XMM + (i / 8)), |
| OPND_CREATE_MEM32(REG_ESP, 0), |
| OPND_CREATE_INT8(i % 8))); |
| instrlist_append(&ilist, INSTR_CREATE_jmp(dcontext, opnd_create_instr(end))); |
| instrlist_append(&ilist, INSTR_CREATE_nop(dcontext)); |
| } |
| /* entry 62 */ |
| instrlist_append(&ilist, |
| INSTR_CREATE_pinsrw(dcontext, opnd_create_reg(REG_START_XMM + (62 / 8)), |
| OPND_CREATE_MEM32(REG_ESP, 0), |
| OPND_CREATE_INT8(62 % 8))); |
| dr_insert_clean_call(dcontext, ilist, instr, (app_pc)check_return_too_deep, |
| false/*!fp*/, 1, OPND_CREATE_INTPTR(dcontext)); |
| |
| |
| /* now encode the instructions */ |
| pc = instrlist_encode(dcontext, &ilist, pc, false /* no instr targets */); |
| ASSERT(pc != NULL); |
| |
| /* free the instrlist_t elements */ |
| instrlist_clear(dcontext, &ilist); |
| |
| return pc; |
| } |
| #endif /* CHECK_RETURNS_SSE2_EMIT */ |
| |
| |
| #if SSE2_USE_STACK_POINTER /* stack pointer and jump table method */ |
| /* ################################################################################# */ |
| |
| /* instr should be the instr AFTER the call instr */ |
| void |
| check_return_handle_call(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr) |
| { |
| /* ON CALL, STORE RETURN ADDRESS: |
| push ra // normal push of ra |
| save ecx |
| pextrw xmm7,7 -> ecx |
| lea (,ecx,4) -> ecx // ecx = ecx * 4, then ecx + ecx*2 = 12 |
| lea next_addr(ecx,ecx,2) -> ecx |
| jmp ecx // pinsrw,jmp = 6+5 = 11 bytes, pad to 12 |
| 0: pinsrw (esp),0 -> xmm0; jmp end; nop |
| 1: pinsrw (esp),1 -> xmm0; jmp end; nop |
| 8: pinsrw (esp),0 -> xmm1; jmp end; nop |
| 62: pinsrw (esp),6 -> xmm7 |
| <clean call to check_return_too_deep> |
| // move 0..31 -> memory, mprotect the memory |
| // then slide 32..63 down |
| // set xmm7:7 to 30, let next instr inc it to get 31 |
| end: |
| pextrw xmm7,7 -> ecx |
| lea 1(ecx) -> ecx // inc ecx |
| pinsrw ecx,7 -> xmm7 |
| restore ecx |
| */ |
| int i; |
| instr_t *end = |
| INSTR_CREATE_pextrw(dcontext, opnd_create_reg(REG_ECX), |
| opnd_create_reg(REG_XMM7), |
| OPND_CREATE_INT8(7)); |
| PRE(ilist, instr, |
| instr_create_save_to_dcontext(dcontext, REG_ECX, XCX_OFFSET)); |
| PRE(ilist, instr, |
| INSTR_CREATE_pextrw(dcontext, opnd_create_reg(REG_ECX), |
| opnd_create_reg(REG_XMM7), |
| OPND_CREATE_INT8(7))); |
| /* to get base+ecx*12, we do "ecx=ecx*4, ecx=base + ecx + ecx*2" */ |
| PRE(ilist, instr, |
| INSTR_CREATE_lea(dcontext, opnd_create_reg(REG_ECX), |
| opnd_create_base_disp(REG_NULL, REG_ECX, 4, 0, OPSZ_lea))); |
| PRE(ilist, instr, |
| INSTR_CREATE_lea(dcontext, opnd_create_reg(REG_ECX), |
| opnd_create_base_disp(REG_ECX, REG_ECX, 2, 0xaaaaaaaa, OPSZ_lea))); |
| #if DISABLE_FOR_ANALYSIS |
| PRE(ilist, instr, |
| INSTR_CREATE_jmp(dcontext, opnd_create_instr(end))); |
| #else |
| PRE(ilist, instr, |
| INSTR_CREATE_jmp_ind(dcontext, opnd_create_reg(REG_ECX))); |
| #endif |
| for (i=0; i<62; i++) { |
| PRE(ilist, instr, |
| INSTR_CREATE_pinsrw(dcontext, opnd_create_reg(REG_START_XMM + (i / 8)), |
| OPND_CREATE_MEM32(REG_ESP, 0), |
| OPND_CREATE_INT8(i % 8))); |
| PRE(ilist, instr, INSTR_CREATE_jmp(dcontext, opnd_create_instr(end))); |
| PRE(ilist, instr, INSTR_CREATE_nop(dcontext)); |
| } |
| /* entry 62 */ |
| PRE(ilist, instr, |
| INSTR_CREATE_pinsrw(dcontext, opnd_create_reg(REG_START_XMM + (62 / 8)), |
| OPND_CREATE_MEM32(REG_ESP, 0), |
| OPND_CREATE_INT8(62 % 8))); |
| dr_insert_clean_call(dcontext, ilist, instr, (app_pc)check_return_too_deep, |
| false/*!fp*/, 1, OPND_CREATE_INTPTR(dcontext)); |
| PRE(ilist, instr, end); |
| PRE(ilist, instr, |
| INSTR_CREATE_lea(dcontext, opnd_create_reg(REG_ECX), |
| opnd_create_base_disp(REG_ECX, REG_NULL, 0, 1, OPSZ_lea))); |
| #if !DISABLE_FOR_ANALYSIS |
| PRE(ilist, instr, |
| INSTR_CREATE_pinsrw(dcontext, opnd_create_reg(REG_XMM7), |
| opnd_create_reg(REG_ECX), |
| OPND_CREATE_INT8(7))); |
| #endif |
| PRE(ilist, instr, |
| instr_create_restore_from_dcontext(dcontext, REG_ECX, XCX_OFFSET)); |
| } |
| |
| void |
| check_return_handle_return(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr) |
| { |
| /* ON RETURN, CHECK RETURN ADDRESS: |
| pop ra -> ecx // normal pop |
| save edx |
| mov ecx, edx |
| pextrw xmm7,7 -> ecx |
| jecxz at_zero |
| lea -1(ecx) -> ecx // dec ecx |
| jmp non_zero |
| at_zero: |
| mov 31, ecx |
| <clean call to check_return_too_shallow> // restore 0..31 from memory |
| non_zero: |
| pinsrw ecx,7 -> xmm7 // store it back |
| lea (,ecx,2) -> ecx // ecx = ecx * 2 |
| lea next_addr(ecx,ecx,4) -> ecx // ecx = ecx+ecx*4 = old_ecx*10 |
| jmp ecx // pextrw,jmp = 5+5 = 10 bytes |
| 0: pextrw xmm0,0 -> ecx; jmp end |
| 1: pextrw xmm0,1 -> ecx; jmp end |
| 8: pextrw xmm1,0 -> ecx; jmp end |
| 62: pextrw xmm7,6 -> ecx; |
| end: |
| movzx dx,edx // clear top 16 bits, for cmp w/ stored bottom 16 bits |
| not %ecx |
| lea 1(%ecx,%edx,1),%ecx // "not ecx + 1" => -ecx, to cmp w/ edx |
| jecxz ra_not_mangled |
| call ra_mangled |
| ra_not_mangled: |
| restore edx |
| // FIXME: can't count on below esp not being clobbered! (could get signal->handler!) |
| mov -4(esp),ecx // restore return address |
| */ |
| int i; |
| instr_t *ra_not_mangled = |
| instr_create_restore_from_dcontext(dcontext, REG_EDX, XDX_OFFSET); |
| instr_t *end = |
| INSTR_CREATE_movzx(dcontext, opnd_create_reg(REG_EDX), opnd_create_reg(REG_DX)); |
| instr_t *at_zero = |
| INSTR_CREATE_mov_imm(dcontext, opnd_create_reg(REG_ECX), OPND_CREATE_INT32(31)); |
| instr_t *non_zero = |
| INSTR_CREATE_pinsrw(dcontext, opnd_create_reg(REG_XMM7), |
| opnd_create_reg(REG_ECX), |
| OPND_CREATE_INT8(7)); |
| PRE(ilist, instr, |
| instr_create_save_to_dcontext(dcontext, REG_EDX, XDX_OFFSET)); |
| PRE(ilist, instr, |
| INSTR_CREATE_mov_ld(dcontext, opnd_create_reg(REG_EDX), opnd_create_reg(REG_ECX))); |
| PRE(ilist, instr, |
| INSTR_CREATE_pextrw(dcontext, opnd_create_reg(REG_ECX), |
| opnd_create_reg(REG_XMM7), |
| OPND_CREATE_INT8(7))); |
| #if DISABLE_FOR_ANALYSIS |
| PRE(ilist, instr, |
| INSTR_CREATE_jmp(dcontext, opnd_create_instr(ra_not_mangled))); |
| #endif |
| PRE(ilist, instr, |
| INSTR_CREATE_jecxz(dcontext, opnd_create_instr(at_zero))); |
| PRE(ilist, instr, |
| INSTR_CREATE_lea(dcontext, opnd_create_reg(REG_ECX), |
| opnd_create_base_disp(REG_ECX, REG_NULL, 0, -1, OPSZ_lea))); |
| PRE(ilist, instr, |
| INSTR_CREATE_jmp(dcontext, opnd_create_instr(non_zero))); |
| PRE(ilist, instr, at_zero); |
| dr_insert_clean_call(dcontext, ilist, instr, (app_pc)check_return_too_shallow, |
| false/*!fp*/, 1, OPND_CREATE_INTPTR(dcontext)); |
| PRE(ilist, instr, non_zero); |
| /* to get base+ecx*10, we do "ecx=ecx*2, ecx=base + ecx + ecx*4" */ |
| PRE(ilist, instr, |
| INSTR_CREATE_lea(dcontext, opnd_create_reg(REG_ECX), |
| opnd_create_base_disp(REG_NULL, REG_ECX, 2, 0, OPSZ_lea))); |
| PRE(ilist, instr, |
| INSTR_CREATE_lea(dcontext, opnd_create_reg(REG_ECX), |
| opnd_create_base_disp(REG_ECX, REG_ECX, 4, 0xaaaaaaaa, OPSZ_lea))); |
| PRE(ilist, instr, |
| INSTR_CREATE_jmp_ind(dcontext, opnd_create_reg(REG_ECX))); |
| for (i=0; i<63; i++) { |
| PRE(ilist, instr, |
| INSTR_CREATE_pextrw(dcontext, opnd_create_reg(REG_ECX), |
| opnd_create_reg(REG_START_XMM + (i / 8)), |
| OPND_CREATE_INT8(i % 8))); |
| PRE(ilist, instr, |
| INSTR_CREATE_jmp(dcontext, opnd_create_instr(end))); |
| } |
| PRE(ilist, instr, end); |
| PRE(ilist, instr, INSTR_CREATE_not(dcontext, opnd_create_reg(REG_ECX))); |
| PRE(ilist, instr, |
| INSTR_CREATE_lea(dcontext, opnd_create_reg(REG_ECX), |
| opnd_create_base_disp(REG_ECX, REG_EDX, 1, 1, OPSZ_lea))); |
| PRE(ilist, instr, |
| INSTR_CREATE_jecxz(dcontext, opnd_create_instr(ra_not_mangled))); |
| dr_insert_clean_call(dcontext, ilist, instr, (app_pc)check_return_ra_mangled, |
| false/*!fp*/, 1, OPND_CREATE_INTPTR(dcontext)); |
| PRE(ilist, instr, ra_not_mangled); |
| PRE(ilist, instr, |
| INSTR_CREATE_mov_ld(dcontext, opnd_create_reg(REG_ECX), OPND_CREATE_MEM32(REG_ESP, -4))); |
| } |
| |
| /* touches up jmp* for table (needs address of start of table) */ |
| void |
| finalize_return_check(dcontext_t *dcontext, fragment_t *f) |
| { |
| byte *start_pc = (byte *) FCACHE_ENTRY_PC(f); |
| byte *end_pc = fragment_body_end_pc(dcontext, f); |
| byte *pc, *prev_pc; |
| int leas_next = 0; |
| instr_t instr; |
| instr_init(dcontext, &instr); |
| LOG(THREAD, LOG_ALL, 3, "finalize_return_check\n"); |
| |
| SELF_PROTECT_CACHE(dcontext, f, WRITABLE); |
| |
| /* must fix up indirect jmp */ |
| pc = start_pc; |
| do { |
| prev_pc = pc; |
| instr_reset(dcontext, &instr); |
| pc = decode(dcontext, pc, &instr); |
| ASSERT(instr_valid(&instr)); /* our own code! */ |
| if (leas_next == 2) { |
| loginst(dcontext, 3, &instr, "\tlea 2"); |
| if (instr_get_opcode(&instr) == OP_lea) { |
| opnd_t op = instr_get_src(&instr, 0); |
| int scale = opnd_get_scale(op); |
| DEBUG_DECLARE(byte *nxt_pc;) |
| /* put in pc of instr after jmp: jmp is 2 bytes long */ |
| instr_set_src(&instr, 0, |
| opnd_create_base_disp(REG_ECX, REG_ECX, scale, |
| (int)(pc+2), OPSZ_lea)); |
| DEBUG_DECLARE(nxt_pc = ) instr_encode(dcontext, &instr, prev_pc); |
| ASSERT(nxt_pc != NULL); |
| } |
| leas_next = 0; |
| } |
| if (leas_next == 1) { |
| loginst(dcontext, 3, &instr, "\tlea 1"); |
| if (instr_get_opcode(&instr) == OP_lea) |
| leas_next = 2; |
| else |
| leas_next = 0; |
| } |
| /* we don't allow program to use sse, so pextrw/pinsrw are all ours */ |
| if (leas_next == 0 && |
| instr_get_opcode(&instr) == OP_pextrw && |
| opnd_is_reg(instr_get_src(&instr, 0)) && |
| opnd_get_reg(instr_get_src(&instr, 0)) == REG_XMM7 && |
| opnd_is_immed_int(instr_get_src(&instr, 1)) && |
| opnd_get_immed_int(instr_get_src(&instr, 1)) == 7) { |
| loginst(dcontext, 3, &instr, "\tfound pextrw"); |
| leas_next = 1; |
| } |
| else if (leas_next == 0 && |
| instr_get_opcode(&instr) == OP_pinsrw && |
| opnd_is_reg(instr_get_dst(&instr, 0)) && |
| opnd_get_reg(instr_get_dst(&instr, 0)) == REG_XMM7 && |
| opnd_is_immed_int(instr_get_src(&instr, 1)) && |
| opnd_get_immed_int(instr_get_src(&instr, 1)) == 7) { |
| loginst(dcontext, 3, &instr, "\tfound pinsrw"); |
| leas_next = 1; |
| } |
| } while (pc < end_pc); |
| instr_free(dcontext, &instr); |
| |
| SELF_PROTECT_CACHE(dcontext, f, READONLY); |
| } |
| |
| typedef struct _call_stack_32 { |
| byte retaddr[32][2]; |
| struct _call_stack_32 *next; |
| } call_stack_32_t; |
| |
| /* move 0..31 -> memory, mprotect the memory |
| * then slide 32..63 down |
| * set xmm7:7 to 30, let next instr inc it to get 31 |
| */ |
| void |
| check_return_too_deep(dcontext_t *dcontext, |
| volatile int errno, volatile reg_t eflags, |
| volatile reg_t reg_edi, volatile reg_t reg_esi, |
| volatile reg_t reg_ebp, volatile reg_t reg_esp, |
| volatile reg_t reg_ebx, volatile reg_t reg_edx, |
| volatile reg_t reg_ecx, volatile reg_t reg_eax) |
| { |
| byte xmm[8][16]; /* each sse2 is 128 bits = 16 bytes */ |
| call_stack_32_t *stack; |
| |
| ENTERING_DR(); |
| #if USE_LOCAL_MPROT_STACK |
| SELF_PROTECT_LOCAL(dcontext, WRITABLE); |
| #endif |
| |
| #if USE_LOCAL_MPROT_STACK |
| stack = heap_alloc(dcontext, sizeof(call_stack_32_t)); |
| #else |
| stack = global_heap_alloc(sizeof(call_stack_32_t) HEAPACCT(ACCT_OTHER)); |
| #endif |
| stack->next = dcontext->call_stack; |
| dcontext->call_stack = stack; |
| |
| LOG(THREAD, LOG_ALL, 3, "check_return_too_deep\n"); |
| |
| /* move from registers into memory where we can work with it */ |
| /* FIXME: align xmm so can use movdqa! */ |
| #ifdef UNIX |
| asm("movdqu %%xmm0, %0" : "=m"(xmm[0])); |
| asm("movdqu %%xmm1, %0" : "=m"(xmm[1])); |
| asm("movdqu %%xmm2, %0" : "=m"(xmm[2])); |
| asm("movdqu %%xmm3, %0" : "=m"(xmm[3])); |
| asm("movdqu %%xmm4, %0" : "=m"(xmm[4])); |
| asm("movdqu %%xmm5, %0" : "=m"(xmm[5])); |
| asm("movdqu %%xmm6, %0" : "=m"(xmm[6])); |
| asm("movdqu %%xmm7, %0" : "=m"(xmm[7])); |
| #else |
| #error NYI |
| #endif |
| |
| LOG(THREAD, LOG_ALL, 3, "\tjust copied registers\n"); |
| |
| /* we want 0..31 into our stack, that's the first 64 bytes */ |
| memcpy(stack->retaddr, xmm[0], 64); |
| |
| #ifdef DEBUG |
| if (stats->loglevel >= 3) { |
| int i,j; |
| LOG(THREAD, LOG_ALL, 3, "Copied into stored stack:\n"); |
| for (i=0; i<4; i++) { |
| for (j=0; j<8; j++) { |
| LOG(THREAD, LOG_ALL, 3, "\t%d %d 0x%02x%02x", i, j, |
| stack->retaddr[i*8+j][0], stack->retaddr[i*8+j][1]); |
| if (j % 4 == 3) |
| LOG(THREAD, LOG_ALL, 3, "\n"); |
| } |
| } |
| LOG(THREAD, LOG_ALL, 3, "Before shifting:\n"); |
| for (i=0; i<8; i++) { |
| for (j=0; j<8; j++) { |
| LOG(THREAD, LOG_ALL, 3, "\t%d %d 0x%02x%02x", i, j, xmm[i][j*2], xmm[i][j*2+1]); |
| if (j % 4 == 3) |
| LOG(THREAD, LOG_ALL, 3, "\n"); |
| } |
| } |
| } |
| #endif |
| |
| /* now slide 32..63 down */ |
| memcpy(xmm[0], xmm[4], 64); |
| |
| /* move back into registers */ |
| #ifdef UNIX |
| asm("movdqu %0, %%xmm0" : : "m"(xmm[0][0])); |
| asm("movdqu %0, %%xmm1" : : "m"(xmm[1][0])); |
| asm("movdqu %0, %%xmm2" : : "m"(xmm[2][0])); |
| asm("movdqu %0, %%xmm3" : : "m"(xmm[3][0])); |
| asm("movl $30, %eax"); |
| asm("pinsrw $7,%eax,%xmm7"); |
| #else |
| #error NYI |
| #endif |
| |
| dcontext->call_depth++; |
| |
| LOG(THREAD, LOG_ALL, 3, "\tdone, call depth is now %d\n", dcontext->call_depth); |
| |
| #if USE_LOCAL_MPROT_STACK |
| SELF_PROTECT_LOCAL(dcontext, READONLY); |
| #endif |
| EXITING_DR(); |
| } |
| |
| void |
| check_return_too_shallow(dcontext_t *dcontext, |
| volatile int errno, volatile reg_t eflags, |
| volatile reg_t reg_edi, volatile reg_t reg_esi, |
| volatile reg_t reg_ebp, volatile reg_t reg_esp, |
| volatile reg_t reg_ebx, volatile reg_t reg_edx, |
| volatile reg_t reg_ecx, volatile reg_t reg_eax) |
| { |
| ENTERING_DR(); |
| #if USE_LOCAL_MPROT_STACK |
| SELF_PROTECT_LOCAL(dcontext, WRITABLE); |
| #endif |
| |
| LOG(THREAD, LOG_ALL, 3, "check_return_too_shallow\n"); |
| if (dcontext->call_depth == 0) { |
| LOG(THREAD, LOG_ALL, 3, "\tbottomed out of dynamo, ignoring\n"); |
| reg_ecx = 0; /* undo the set to 31 prior to this call */ |
| /* FIXME: would like to avoid rest of checks...but then have to put |
| * clean-call-cleanup at bottom...instead we have a hack where we put |
| * in a ret addr that will match, namely the real ret addr, sitting in edx |
| */ |
| #ifdef UNIX |
| asm("movl %0, %%eax" : : "m"(reg_edx)); |
| asm("pinsrw $0,%eax,%xmm0"); |
| #else |
| #error NYI |
| #endif |
| LOG(THREAD, LOG_ALL, 3, "\tset xmm0:0 to "PFX"\n", reg_edx); |
| } else { |
| /* restore 0..31 from memory */ |
| call_stack_32_t *stack = dcontext->call_stack; |
| ASSERT(stack != NULL); |
| /* move back into registers */ |
| #ifdef UNIX |
| asm("movl %0, %%eax" : : "m"(stack->retaddr)); |
| asm("movdqu (%eax), %xmm0"); |
| asm("movdqu 0x10(%eax), %xmm1"); |
| asm("movdqu 0x20(%eax), %xmm2"); |
| asm("movdqu 0x30(%eax), %xmm3"); |
| #else |
| #error NYI |
| #endif |
| #ifdef DEBUG |
| if (stats->loglevel >= 3) { |
| int i,j; |
| LOG(THREAD, LOG_ALL, 3, "Restored:\n"); |
| for (i=0; i<4; i++) { |
| for (j=0; j<8; j++) { |
| LOG(THREAD, LOG_ALL, 3, "\t%d %d 0x%02x%02x", i, j, |
| stack->retaddr[i*8+j][0], stack->retaddr[i*8+j][1]); |
| if (j % 4 == 3) |
| LOG(THREAD, LOG_ALL, 3, "\n"); |
| } |
| } |
| } |
| #endif |
| stack = stack->next; |
| #if USE_LOCAL_MPROT_STACK |
| heap_free(dcontext, dcontext->call_stack, sizeof(call_stack_32_t)); |
| #else |
| global_heap_free(dcontext->call_stack, sizeof(call_stack_32_t) HEAPACCT(ACCT_OTHER)); |
| #endif |
| dcontext->call_stack = stack; |
| dcontext->call_depth--; |
| LOG(THREAD, LOG_ALL, 3, "\tdone, call depth is now %d\n", dcontext->call_depth); |
| } |
| |
| #if USE_LOCAL_MPROT_STACK |
| SELF_PROTECT_LOCAL(dcontext, READONLY); |
| #endif |
| EXITING_DR(); |
| } |
| |
| void |
| check_return_ra_mangled(dcontext_t *dcontext, |
| volatile int errno, volatile reg_t eflags, |
| volatile reg_t reg_edi, volatile reg_t reg_esi, |
| volatile reg_t reg_ebp, volatile reg_t reg_esp, |
| volatile reg_t reg_ebx, volatile reg_t reg_edx, |
| volatile reg_t reg_ecx, volatile reg_t reg_eax) |
| { |
| /* ecx had addr, then we did ecx' = edx-ecx, so old ecx = edx - ecx' */ |
| int stored_addr = reg_edx - reg_ecx; |
| |
| ENTERING_DR(); |
| SELF_PROTECT_LOCAL(dcontext, WRITABLE); |
| |
| #ifdef DEBUG |
| if (stats->loglevel >= 3 && (stats->logmask & LOG_ALL) != 0) { |
| int idx; |
| # ifdef UNIX |
| asm("pextrw $7,%xmm7,%eax"); |
| asm("movl %%eax, %0" : "=m"(idx)); |
| # else |
| # error NYI |
| # endif |
| LOG(THREAD, LOG_ALL, 3, |
| "check_return_ra_mangled: stored="PFX" vs real="PFX", idx=%d\n", |
| stored_addr, reg_edx, idx); |
| } |
| #endif |
| |
| SYSLOG_INTERNAL_ERROR("ERROR: return address was mangled (bottom 16 bits: " |
| "0x%04x => 0x%04x)", (reg_edx & 0x0000ffff), stored_addr); |
| ASSERT_NOT_REACHED(); |
| |
| SELF_PROTECT_LOCAL(dcontext, READONLY); |
| EXITING_DR(); |
| } |
| |
| #else /* !SSE2_USE_STACK_POINTER */ |
| /* ################################################################################# */ |
| /* instr should be the instr AFTER the call instr */ |
| void |
| check_return_handle_call(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr) |
| { |
| /* ON CALL, STORE RETURN ADDRESS: |
| push ra // normal push of ra |
| save ecx |
| pextrw xmm7,7 -> ecx |
| lea -63(ecx) -> ecx |
| jecxz overflow |
| jmp non_overflow |
| overflow: |
| <clean call to check_return_too_deep> |
| // move 31..62 -> memory, mprotect the memory |
| // set xmm7:7 to 32 by setting ecx = 32-64 |
| non_overflow: |
| pslldq xmm7,2 # shift left one word |
| lea 64(ecx) -> ecx # restore, plus increment, the index |
| pinsrw ecx,7 -> xmm7 # put index in its slot |
| pextrw xmm6,7 -> ecx # move top of 6 to bottom of 7 |
| pinsrw ecx,0 -> xmm7 # |
| pslldq xmm6,2 # now shift 6 left one word |
| pextrw xmm5,7 -> ecx # move top of 5 to bottom of 6 |
| pinsrw ecx,0 -> xmm6 # |
| pslldq xmm5,2 # now shift 5 left one word |
| pextrw xmm4,7 -> ecx # move top of 4 to bottom of 5 |
| pinsrw ecx,0 -> xmm5 # |
| pslldq xmm4,2 # now shift 4 left one word |
| pextrw xmm3,7 -> ecx # move top of 3 to bottom of 4 |
| pinsrw ecx,0 -> xmm4 # |
| pslldq xmm3,2 # now shift 3 left one word |
| pextrw xmm2,7 -> ecx # move top of 2 to bottom of 3 |
| pinsrw ecx,0 -> xmm3 # |
| pslldq xmm2,2 # now shift 2 left one word |
| pextrw xmm1,7 -> ecx # move top of 1 to bottom of 2 |
| pinsrw ecx,0 -> xmm2 # |
| pslldq xmm1,2 # now shift 1 left one word |
| pextrw xmm0,7 -> ecx # move top of 0 to bottom of 1 |
| pinsrw ecx,0 -> xmm1 # |
| pslldq xmm0,2 # now shift 0 left one word |
| pinsrw (esp),0 -> xmm0 # now store new return address |
| end: |
| restore ecx |
| */ |
| int i; |
| instr_t *end = |
| instr_create_restore_from_dcontext(dcontext, REG_ECX, XCX_OFFSET); |
| instr_t *overflow = INSTR_CREATE_nop(dcontext); |
| instr_t *non_overflow = |
| INSTR_CREATE_pslldq(dcontext, opnd_create_reg(REG_XMM7), OPND_CREATE_INT8(2)); |
| PRE(ilist, instr, |
| instr_create_save_to_dcontext(dcontext, REG_ECX, XCX_OFFSET)); |
| PRE(ilist, instr, |
| INSTR_CREATE_pextrw(dcontext, opnd_create_reg(REG_ECX), |
| opnd_create_reg(REG_XMM7), OPND_CREATE_INT8(7))); |
| PRE(ilist, instr, |
| INSTR_CREATE_lea(dcontext, opnd_create_reg(REG_ECX), |
| opnd_create_base_disp(REG_ECX, REG_NULL, 0, -63, OPSZ_lea))); |
| PRE(ilist, instr, |
| INSTR_CREATE_jecxz(dcontext, opnd_create_instr(overflow))); |
| PRE(ilist, instr, |
| INSTR_CREATE_jmp(dcontext, opnd_create_instr(non_overflow))); |
| PRE(ilist, instr, overflow); |
| dr_insert_clean_call(dcontext, ilist, instr, (app_pc)check_return_too_deep, |
| false/*!fp*/, 1, OPND_CREATE_INTPTR(dcontext)); |
| PRE(ilist, instr, non_overflow); |
| PRE(ilist, instr, |
| INSTR_CREATE_lea(dcontext, opnd_create_reg(REG_ECX), |
| opnd_create_base_disp(REG_ECX, REG_NULL, 0, 64, OPSZ_lea))); |
| PRE(ilist, instr, |
| INSTR_CREATE_pinsrw(dcontext, opnd_create_reg(REG_XMM7), |
| opnd_create_reg(REG_ECX), OPND_CREATE_INT8(7))); |
| for (i=6; i>=0; i--) { |
| PRE(ilist, instr, |
| INSTR_CREATE_pextrw(dcontext, opnd_create_reg(REG_ECX), |
| opnd_create_reg(REG_START_XMM + i), OPND_CREATE_INT8(7))); |
| PRE(ilist, instr, |
| INSTR_CREATE_pinsrw(dcontext, opnd_create_reg(REG_START_XMM + i + 1), |
| opnd_create_reg(REG_ECX), OPND_CREATE_INT8(0))); |
| PRE(ilist, instr, |
| INSTR_CREATE_pslldq(dcontext, opnd_create_reg(REG_START_XMM + i), |
| OPND_CREATE_INT8(2))); |
| } |
| PRE(ilist, instr, |
| INSTR_CREATE_pinsrw(dcontext, opnd_create_reg(REG_XMM0), |
| OPND_CREATE_MEM32(REG_ESP, 0), OPND_CREATE_INT8(0))); |
| PRE(ilist, instr, end); |
| } |
| |
| #ifdef DEBUG |
| # if 0 /* not used */ |
| static void |
| check_debug_regs(dcontext_t *dcontext, |
| volatile int errno, volatile reg_t eflags, |
| volatile reg_t reg_edi, volatile reg_t reg_esi, |
| volatile reg_t reg_ebp, volatile reg_t reg_esp, |
| volatile reg_t reg_ebx, volatile reg_t reg_edx, |
| volatile reg_t reg_ecx, volatile reg_t reg_eax) |
| { |
| LOG(THREAD, LOG_ALL, 3, "check_debug2: eax="PFX" ecx="PFX" edx="PFX" ebx="PFX"\n" |
| "esp="PFX" ebp="PFX" esi="PFX" edi="PFX"\n", |
| reg_eax, reg_ecx, reg_edx, reg_ebx, reg_esp, reg_ebp, reg_esi, reg_edi); |
| } |
| # endif |
| |
| static void |
| check_debug(dcontext_t *dcontext, |
| volatile int errno, volatile reg_t eflags, |
| volatile reg_t reg_edi, volatile reg_t reg_esi, |
| volatile reg_t reg_ebp, volatile reg_t reg_esp, |
| volatile reg_t reg_ebx, volatile reg_t reg_edx, |
| volatile reg_t reg_ecx, volatile reg_t reg_eax) |
| { |
| ENTERING_DR(); |
| SELF_PROTECT_LOCAL(dcontext, WRITABLE); |
| if (stats->loglevel >= 3) { |
| int i, j; |
| byte xmm[8][16]; /* each sse2 is 128 bits = 16 bytes */ |
| /* move from registers into memory where we can work with it */ |
| #ifdef UNIX |
| asm("movdqu %%xmm0, %0" : "=m"(xmm[0])); |
| asm("movdqu %%xmm1, %0" : "=m"(xmm[1])); |
| asm("movdqu %%xmm2, %0" : "=m"(xmm[2])); |
| asm("movdqu %%xmm3, %0" : "=m"(xmm[3])); |
| asm("movdqu %%xmm4, %0" : "=m"(xmm[4])); |
| asm("movdqu %%xmm5, %0" : "=m"(xmm[5])); |
| asm("movdqu %%xmm6, %0" : "=m"(xmm[6])); |
| asm("movdqu %%xmm7, %0" : "=m"(xmm[7])); |
| #else |
| #error NYI |
| #endif |
| LOG(THREAD, LOG_ALL, 3, "on our stack (in edx is "PFX"):\n", reg_edx); |
| for (i=0; i<8; i++) { |
| for (j=0; j<8; j++) { |
| LOG(THREAD, LOG_ALL, 3, "\t%d %d 0x%02x%02x", |
| i, j, xmm[i][j*2+1], xmm[i][j*2]); |
| if (j % 4 == 3) |
| LOG(THREAD, LOG_ALL, 3, "\n"); |
| } |
| } |
| } |
| SELF_PROTECT_LOCAL(dcontext, READONLY); |
| EXITING_DR(); |
| } |
| #endif /* DEBUG */ |
| |
| void |
| check_return_handle_return(dcontext_t *dcontext, instrlist_t *ilist, instr_t *instr) |
| { |
| /* ON RETURN, CHECK RETURN ADDRESS: |
| pop ra -> ecx // normal pop |
| save edx |
| mov ecx, edx |
| save ebx |
| pextrw xmm7,7 -> ecx |
| jecxz at_zero |
| lea -1(ecx) -> ecx # dec ecx |
| pinsrw ecx,7 -> xmm7 # store index |
| jmp non_zero |
| at_zero: |
| <clean call to check_return_too_shallow> |
| // restore from memory to 0..31 |
| // copy xmm0:0 into ebx |
| // shift 1..31 down into 0..30 |
| // set xmm7:7 to 31 |
| jmp end |
| non_zero: |
| pextrw xmm0,0 -> ebx |
| psrldq xmm0,2 # shift 0 right one word |
| pextrw xmm1,0 -> ecx # move bottom of 1 to top of 0 |
| pinsrw ecx,7 -> xmm0 |
| psrldq xmm1,2 |
| pextrw xmm2,0 -> ecx # move bottom of 2 to top of 1 |
| pinsrw ecx,7 -> xmm1 |
| psrldq xmm2,2 |
| pextrw xmm3,0 -> ecx # move bottom of 3 to top of 2 |
| pinsrw ecx,7 -> xmm2 |
| psrldq xmm3,2 |
| pextrw xmm4,0 -> ecx # move bottom of 4 to top of 3 |
| pinsrw ecx,7 -> xmm3 |
| psrldq xmm4,2 |
| pextrw xmm5,0 -> ecx # move bottom of 5 to top of 4 |
| pinsrw ecx,7 -> xmm4 |
| psrldq xmm5,2 |
| pextrw xmm6,0 -> ecx # move bottom of 6 to top of 5 |
| pinsrw ecx,7 -> xmm5 |
| psrldq xmm6,2 |
| pextrw xmm7,0 -> ecx # move bottom of 7 to top of 6 |
| pinsrw ecx,7 -> xmm6 |
| psrldq xmm7,2 |
| pextrw xmm7,6 -> ecx # shift index back to top slot |
| pinsrw ecx,7 -> xmm7 |
| end: |
| mov edx,ecx |
| movzx cx,ecx // clear top 16 bits, for cmp w/ stored bottom 16 bits |
| not %ebx |
| lea 1(%ebx,%ecx,1),%ecx // "not ebx + 1" => -ecx, to cmp w/ ecx |
| jecxz ra_not_mangled |
| call ra_mangled |
| ra_not_mangled: |
| restore ebx |
| mov edx, ecx // restore ret addr |
| restore edx |
| */ |
| int i; |
| instr_t *ra_not_mangled = |
| instr_create_restore_from_dcontext(dcontext, REG_EBX, XBX_OFFSET); |
| instr_t *end = |
| INSTR_CREATE_mov_ld(dcontext, opnd_create_reg(REG_ECX), opnd_create_reg(REG_EDX)); |
| instr_t *at_zero = INSTR_CREATE_nop(dcontext); |
| instr_t *non_zero = |
| INSTR_CREATE_pextrw(dcontext, opnd_create_reg(REG_EBX), |
| opnd_create_reg(REG_XMM0), OPND_CREATE_INT8(0)); |
| PRE(ilist, instr, |
| instr_create_save_to_dcontext(dcontext, REG_EDX, XDX_OFFSET)); |
| PRE(ilist, instr, |
| INSTR_CREATE_mov_ld(dcontext, opnd_create_reg(REG_EDX), opnd_create_reg(REG_ECX))); |
| PRE(ilist, instr, |
| instr_create_save_to_dcontext(dcontext, REG_EBX, XBX_OFFSET)); |
| |
| #ifdef DEBUG |
| if (stats->loglevel >= 4) { |
| dr_insert_clean_call(dcontext, ilist, instr, (app_pc)check_debug, |
| false/*!fp*/, 1, OPND_CREATE_INTPTR(dcontext)); |
| } |
| #endif |
| |
| PRE(ilist, instr, |
| INSTR_CREATE_pextrw(dcontext, opnd_create_reg(REG_ECX), |
| opnd_create_reg(REG_XMM7), OPND_CREATE_INT8(7))); |
| PRE(ilist, instr, |
| INSTR_CREATE_mov_ld(dcontext, opnd_create_reg(REG_EBX), opnd_create_reg(REG_ECX))); |
| PRE(ilist, instr, |
| INSTR_CREATE_jecxz(dcontext, opnd_create_instr(at_zero))); |
| PRE(ilist, instr, |
| INSTR_CREATE_lea(dcontext, opnd_create_reg(REG_ECX), |
| opnd_create_base_disp(REG_ECX, REG_NULL, 0, -1, OPSZ_lea))); |
| PRE(ilist, instr, |
| INSTR_CREATE_pinsrw(dcontext, opnd_create_reg(REG_XMM7), |
| opnd_create_reg(REG_ECX), OPND_CREATE_INT8(7))); |
| PRE(ilist, instr, |
| INSTR_CREATE_jmp(dcontext, opnd_create_instr(non_zero))); |
| PRE(ilist, instr, at_zero); |
| dr_insert_clean_call(dcontext, ilist, instr, (app_pc)check_return_too_shallow, |
| false/*!fp*/, 1, OPND_CREATE_INTPTR(dcontext)); |
| PRE(ilist, instr, |
| INSTR_CREATE_jmp(dcontext, opnd_create_instr(end))); |
| PRE(ilist, instr, non_zero); |
| PRE(ilist, instr, |
| INSTR_CREATE_psrldq(dcontext, opnd_create_reg(REG_XMM0), OPND_CREATE_INT8(2))); |
| for (i=1; i<=7; i++) { |
| PRE(ilist, instr, |
| INSTR_CREATE_pextrw(dcontext, opnd_create_reg(REG_ECX), |
| opnd_create_reg(REG_START_XMM + i), OPND_CREATE_INT8(0))); |
| PRE(ilist, instr, |
| INSTR_CREATE_pinsrw(dcontext, opnd_create_reg(REG_START_XMM + i - 1), |
| opnd_create_reg(REG_ECX), OPND_CREATE_INT8(7))); |
| PRE(ilist, instr, |
| INSTR_CREATE_psrldq(dcontext, opnd_create_reg(REG_START_XMM + i), |
| OPND_CREATE_INT8(2))); |
| } |
| PRE(ilist, instr, |
| INSTR_CREATE_pextrw(dcontext, opnd_create_reg(REG_ECX), |
| opnd_create_reg(REG_XMM7), OPND_CREATE_INT8(6))); |
| PRE(ilist, instr, |
| INSTR_CREATE_pinsrw(dcontext, opnd_create_reg(REG_XMM7), |
| opnd_create_reg(REG_ECX), OPND_CREATE_INT8(7))); |
| PRE(ilist, instr, end); |
| PRE(ilist, instr, |
| INSTR_CREATE_movzx(dcontext, opnd_create_reg(REG_ECX), opnd_create_reg(REG_CX))); |
| PRE(ilist, instr, INSTR_CREATE_not(dcontext, opnd_create_reg(REG_EBX))); |
| PRE(ilist, instr, |
| INSTR_CREATE_lea(dcontext, opnd_create_reg(REG_ECX), |
| opnd_create_base_disp(REG_EBX, REG_ECX, 1, 1, OPSZ_lea))); |
| PRE(ilist, instr, |
| INSTR_CREATE_jecxz(dcontext, opnd_create_instr(ra_not_mangled))); |
| dr_insert_clean_call(dcontext, ilist, instr, (app_pc)check_return_ra_mangled, |
| false/*!fp*/, 1, OPND_CREATE_INTPTR(dcontext)); |
| PRE(ilist, instr, ra_not_mangled); |
| PRE(ilist, instr, |
| INSTR_CREATE_mov_ld(dcontext, opnd_create_reg(REG_ECX), opnd_create_reg(REG_EDX))); |
| PRE(ilist, instr, |
| instr_create_restore_from_dcontext(dcontext, REG_EDX, XDX_OFFSET)); |
| } |
| |
| /* touches up jmp* for table (needs address of start of table) */ |
| void |
| finalize_return_check(dcontext_t *dcontext, fragment_t *f) |
| { |
| } |
| |
| typedef struct _call_stack_32 { |
| byte retaddr[32][2]; |
| struct _call_stack_32 *next; |
| } call_stack_32_t; |
| |
| /* move 0..31 -> memory, mprotect the memory |
| * then slide 32..63 down |
| * set xmm7:7 to 30, let next instr inc it to get 31 |
| */ |
| void |
| check_return_too_deep(dcontext_t *dcontext, |
| volatile int errno, volatile reg_t eflags, |
| volatile reg_t reg_edi, volatile reg_t reg_esi, |
| volatile reg_t reg_ebp, volatile reg_t reg_esp, |
| volatile reg_t reg_ebx, volatile reg_t reg_edx, |
| volatile reg_t reg_ecx, volatile reg_t reg_eax) |
| { |
| byte xmm[8][16]; /* each sse2 is 128 bits = 16 bytes */ |
| call_stack_32_t *stack; |
| |
| ENTERING_DR(); |
| #if USE_LOCAL_MPROT_STACK |
| SELF_PROTECT_LOCAL(dcontext, WRITABLE); |
| #endif |
| |
| #if USE_LOCAL_MPROT_STACK |
| stack = heap_alloc(dcontext, sizeof(call_stack_32_t)); |
| #else |
| stack = global_heap_alloc(sizeof(call_stack_32_t) HEAPACCT(ACCT_OTHER)); |
| #endif |
| stack->next = dcontext->call_stack; |
| dcontext->call_stack = stack; |
| |
| LOG(THREAD, LOG_ALL, 3, "check_return_too_deep\n"); |
| |
| /* move from registers into memory where we can work with it */ |
| /* FIXME: align xmm so can use movdqa! */ |
| #ifdef UNIX |
| asm("movdqu %%xmm0, %0" : "=m"(xmm[0])); |
| asm("movdqu %%xmm1, %0" : "=m"(xmm[1])); |
| asm("movdqu %%xmm2, %0" : "=m"(xmm[2])); |
| asm("movdqu %%xmm3, %0" : "=m"(xmm[3])); |
| asm("movdqu %%xmm4, %0" : "=m"(xmm[4])); |
| asm("movdqu %%xmm5, %0" : "=m"(xmm[5])); |
| asm("movdqu %%xmm6, %0" : "=m"(xmm[6])); |
| asm("movdqu %%xmm7, %0" : "=m"(xmm[7])); |
| #else |
| #error NYI |
| #endif |
| |
| LOG(THREAD, LOG_ALL, 3, "\tjust copied registers\n"); |
| |
| /* we want 31..62 into our stack, that's the last 64 bytes before index */ |
| memcpy(stack->retaddr, &xmm[3][14], 64); |
| |
| #ifdef DEBUG |
| if (stats->loglevel >= 3) { |
| int i,j; |
| LOG(THREAD, LOG_ALL, 3, "Copied into stored stack:\n"); |
| for (i=0; i<4; i++) { |
| for (j=0; j<8; j++) { |
| LOG(THREAD, LOG_ALL, 3, "\t%d %d 0x%02x%02x", i, j, |
| stack->retaddr[i*8+j][1], stack->retaddr[i*8+j][0]); |
| if (j % 4 == 3) |
| LOG(THREAD, LOG_ALL, 3, "\n"); |
| } |
| } |
| LOG(THREAD, LOG_ALL, 3, "Before shifting:\n"); |
| for (i=0; i<8; i++) { |
| for (j=0; j<8; j++) { |
| LOG(THREAD, LOG_ALL, 3, "\t%d %d 0x%02x%02x", i, j, xmm[i][j*2+1], xmm[i][j*2]); |
| if (j % 4 == 3) |
| LOG(THREAD, LOG_ALL, 3, "\n"); |
| } |
| } |
| } |
| #endif |
| |
| #if !DISABLE_FOR_ANALYSIS |
| /* move back into registers */ |
| #ifdef UNIX |
| asm("movdqu %0, %%xmm0" : : "m"(xmm[0][0])); |
| asm("movdqu %0, %%xmm1" : : "m"(xmm[1][0])); |
| asm("movdqu %0, %%xmm2" : : "m"(xmm[2][0])); |
| asm("movdqu %0, %%xmm3" : : "m"(xmm[3][0])); |
| asm("movl $30, %eax"); |
| asm("pinsrw $7,%eax,%xmm7"); |
| #else |
| #error NYI |
| #endif |
| #endif |
| |
| /* set to 32...but will have 64 added to it, so sub that now */ |
| reg_ecx = 32 - 64; |
| |
| dcontext->call_depth++; |
| |
| LOG(THREAD, LOG_ALL, 3, "\tdone, call depth is now %d\n", dcontext->call_depth); |
| |
| #if USE_LOCAL_MPROT_STACK |
| SELF_PROTECT_LOCAL(dcontext, READONLY); |
| #endif |
| EXITING_DR(); |
| } |
| |
| void |
| check_return_too_shallow(dcontext_t *dcontext, |
| volatile int errno, volatile reg_t eflags, |
| volatile reg_t reg_edi, volatile reg_t reg_esi, |
| volatile reg_t reg_ebp, volatile reg_t reg_esp, |
| volatile reg_t reg_ebx, volatile reg_t reg_edx, |
| volatile reg_t reg_ecx, volatile reg_t reg_eax) |
| { |
| ENTERING_DR(); |
| #if USE_LOCAL_MPROT_STACK |
| SELF_PROTECT_LOCAL(dcontext, WRITABLE); |
| #endif |
| |
| LOG(THREAD, LOG_ALL, 3, "check_return_too_shallow\n"); |
| if (dcontext->call_depth == 0) { |
| LOG(THREAD, LOG_ALL, 3, "\tbottomed out of dynamo, ignoring\n"); |
| #ifdef UNIX |
| asm("movl $0, %eax"); |
| asm("pinsrw $7,%eax,%xmm7"); |
| #else |
| #error NYI |
| #endif |
| /* we set ebx so that check will succeed */ |
| reg_ebx = (reg_edx & 0x0000ffff); |
| } else { |
| /* restore 0..31 from memory */ |
| call_stack_32_t *stack = dcontext->call_stack; |
| ASSERT(stack != NULL); |
| |
| reg_ebx = (stack->retaddr[0][1] << 8) | (stack->retaddr[0][0]); |
| LOG(THREAD, LOG_ALL, 3, "\tsetting reg_ebx to stored retaddr "PFX"\n", reg_ebx); |
| |
| /* move back into registers */ |
| #ifdef UNIX |
| /* gcc 4.0 doesn't like: "m"(stack->retaddr) */ |
| void *retaddr = stack->retaddr; |
| asm("movl %0, %%eax" : : "m"(retaddr)); |
| /* off by one to get 1..31 into slots 0..30 */ |
| asm("movdqu 0x02(%eax), %xmm0"); |
| asm("movdqu 0x12(%eax), %xmm1"); |
| asm("movdqu 0x22(%eax), %xmm2"); |
| asm("movdqu 0x32(%eax), %xmm3"); |
| asm("movl $31, %eax"); |
| asm("pinsrw $7,%eax,%xmm7"); |
| #else |
| #error NYI |
| #endif |
| #ifdef DEBUG |
| if (stats->loglevel >= 3) { |
| int i,j; |
| LOG(THREAD, LOG_ALL, 3, "Restored:\n"); |
| for (i=0; i<4; i++) { |
| for (j=0; j<8; j++) { |
| LOG(THREAD, LOG_ALL, 3, "\t%d %d 0x%02x%02x", i, j, |
| stack->retaddr[i*8+j][1], stack->retaddr[i*8+j][0]); |
| if (j % 4 == 3) |
| LOG(THREAD, LOG_ALL, 3, "\n"); |
| } |
| } |
| } |
| #endif |
| stack = stack->next; |
| #if USE_LOCAL_MPROT_STACK |
| heap_free(dcontext, dcontext->call_stack, sizeof(call_stack_32_t)); |
| #else |
| global_heap_free(dcontext->call_stack, sizeof(call_stack_32_t) HEAPACCT(ACCT_OTHER)); |
| #endif |
| dcontext->call_stack = stack; |
| dcontext->call_depth--; |
| LOG(THREAD, LOG_ALL, 3, "\tdone, call depth is now %d\n", dcontext->call_depth); |
| } |
| |
| #if USE_LOCAL_MPROT_STACK |
| SELF_PROTECT_LOCAL(dcontext, READONLY); |
| #endif |
| EXITING_DR(); |
| } |
| |
| void |
| check_return_ra_mangled(dcontext_t *dcontext, |
| volatile int errno, volatile reg_t eflags, |
| volatile reg_t reg_edi, volatile reg_t reg_esi, |
| volatile reg_t reg_ebp, volatile reg_t reg_esp, |
| volatile reg_t reg_ebx, volatile reg_t reg_edx, |
| volatile reg_t reg_ecx, volatile reg_t reg_eax) |
| { |
| /* ebx had addr, then we did ebx = ~ebx */ |
| int stored_addr = ~reg_ebx; |
| |
| ENTERING_DR(); |
| SELF_PROTECT_LOCAL(dcontext, WRITABLE); |
| |
| #ifdef DEBUG |
| if (stats->loglevel >= 3) { |
| int idx, i, j; |
| byte xmm[8][16]; /* each sse2 is 128 bits = 16 bytes */ |
| /* move from registers into memory where we can work with it */ |
| #ifdef UNIX |
| asm("movdqu %%xmm0, %0" : "=m"(xmm[0])); |
| asm("movdqu %%xmm1, %0" : "=m"(xmm[1])); |
| asm("movdqu %%xmm2, %0" : "=m"(xmm[2])); |
| asm("movdqu %%xmm3, %0" : "=m"(xmm[3])); |
| asm("movdqu %%xmm4, %0" : "=m"(xmm[4])); |
| asm("movdqu %%xmm5, %0" : "=m"(xmm[5])); |
| asm("movdqu %%xmm6, %0" : "=m"(xmm[6])); |
| asm("movdqu %%xmm7, %0" : "=m"(xmm[7])); |
| #else |
| #error NYI |
| #endif |
| LOG(THREAD, LOG_ALL, 3, "on our stack:\n"); |
| for (i=0; i<8; i++) { |
| for (j=0; j<8; j++) { |
| LOG(THREAD, LOG_ALL, 3, "\t%d %d 0x%02x%02x", |
| i, j, xmm[i][j*2+1], xmm[i][j*2]); |
| if (j % 4 == 3) |
| LOG(THREAD, LOG_ALL, 3, "\n"); |
| } |
| } |
| |
| # ifdef UNIX |
| asm("pextrw $7,%xmm7,%eax"); |
| asm("movl %%eax, %0" : "=m"(idx)); |
| # else |
| # error NYI |
| # endif |
| LOG(THREAD, LOG_ALL, 3, "check_return_ra_mangled: stored="PFX" vs real="PFX", idx=%d\n", |
| stored_addr, reg_edx, idx); |
| } |
| #endif |
| SYSLOG_INTERNAL_ERROR("ERROR: return address was mangled (bottom 16 bits: 0x%04x => 0x%04x)", |
| (reg_edx & 0x0000ffff), stored_addr); |
| ASSERT_NOT_REACHED(); |
| |
| SELF_PROTECT_LOCAL(dcontext, READONLY); |
| EXITING_DR(); |
| } |
| |
| #endif /* !SSE2_USE_STACK_POINTER */ |
| /*################################################################################*/ |
| |
| |
| #endif /* CHECK_RETURNS_SSE2 */ |
| |
| #ifdef RETURN_AFTER_CALL |
| /* Return instructions are allowed to target only instructions immediately following */ |
| /* a call instruction that has already been executed. */ |
| |
| static void |
| add_call_site(dcontext_t *dcontext, app_pc target_pc, bool direct) |
| { |
| /* TODO: should be part of vm_area_t to allow flushing */ |
| fragment_add_after_call(dcontext, target_pc); |
| } |
| |
| /* return 0 if not found */ |
| static int |
| find_call_site(dcontext_t *dcontext, app_pc target_pc) |
| { |
| if (fragment_after_call_lookup(dcontext, target_pc) != NULL) |
| return 1; |
| else |
| return 0; /* not found */ |
| } |
| |
| /* check only the table */ |
| bool |
| is_observed_call_site(dcontext_t *dcontext, app_pc retaddr) |
| { |
| return (find_call_site(dcontext, retaddr) != 0); |
| } |
| |
| static int INLINE_ONCE |
| start_enforcing(dcontext_t *dcontext, app_pc target_pc) |
| { |
| static int start_enforcing = 0; /* FIXME: should be thread local. this will handle vfork */ |
| int at_bottom; |
| |
| LOG(THREAD, LOG_INTERP, 3, "RCT: start_enforcing = %d\n", start_enforcing); |
| |
| if (start_enforcing) |
| return 1; |
| |
| at_bottom = at_initial_stack_bottom(dcontext, target_pc); |
| if (!at_bottom) { |
| LOG(THREAD, LOG_INTERP, 1, "RCT: no bottom - start enforcing now\n"); |
| SELF_UNPROTECT_DATASEC(DATASEC_RARELY_PROT); |
| start_enforcing = 1; |
| SELF_PROTECT_DATASEC(DATASEC_RARELY_PROT); |
| return 1; |
| } |
| |
| /* FIXME: we reach the stack bottom on Windows quite late at |
| fragment_t 2768, tag 0x77f9fb67 <ntdll.dll~KiUserApcDispatcher+0x7> |
| can we do better? |
| All other threads running at that time will ignore attacks. |
| FIXME: therefore start_enforcing should be thread local |
| */ |
| |
| if (at_bottom == 1) { |
| LOG(THREAD, LOG_INTERP, 1, "RCT: seen bottom - start enforcing after this \n"); |
| SELF_UNPROTECT_DATASEC(DATASEC_RARELY_PROT); |
| start_enforcing = 1; |
| SELF_PROTECT_DATASEC(DATASEC_RARELY_PROT); |
| return 0; /* let this last one through */ |
| } |
| |
| return 0; /* do not enforce yet */ |
| } |
| |
| void |
| add_return_target(dcontext_t *dcontext, app_pc instr_pc, instr_t *instr) |
| { |
| bool direct = instr_is_call_direct(instr); |
| app_pc after_call_pc = instr_pc + instr_length(dcontext, instr); |
| /* CHECK: is this always faster than decode_next_pc(dcontext, instr_pc) */ |
| add_call_site(dcontext, after_call_pc, direct); |
| STATS_INC(ret_after_call_added); |
| |
| DOLOG(1, LOG_INTERP, { |
| if (direct) { |
| LOG(THREAD, LOG_INTERP, 3, |
| "RCT: call at "PFX"\tafter_call="PFX"\ttarget="PFX"\n", |
| instr_pc, after_call_pc, opnd_get_pc(instr_get_target(instr))); |
| } else { |
| /* of course, while building a basic block we can't tell the indirect call target */ |
| LOG(THREAD, LOG_INTERP, 3, "RCT: ind call at "PFX"\tafter_call="PFX"\n", |
| instr_pc, after_call_pc); |
| } |
| }); |
| } |
| |
| #ifdef DIRECT_CALL_CHECK |
| #warning not yet implemented |
| /* Further restrict return to existing code, to only target indirect after call sites, |
| since direct calls have known return targets. Usually compilers generate only a single |
| RET instruction, but if we cannot count on that (i.e. assembly hacks), |
| then this check will also have false positives |
| */ |
| /* This reverse check of (call 1->1 return) can be implemented relatively efficiently: |
| we have to have _all_ return lookups actually check if the stored tag is of a |
| direct call (which should be the common case so check can be made on miss path). |
| If target is indeed a direct call then they compare themselves with the stored value, |
| [unless first call in which case the valid value is yet unknown] |
| |
| Note that we have a many-to-one relationship of (calls *->1 return) |
| and also a 1-to-many for (ind call 1->* returns). |
| */ |
| |
| unsigned first_ret_from[MAX_CALL_CNT]; /* the first registered return */ |
| |
| enum { |
| RETURN_FROM_EXPECTED_CALLEE = 1, /* all good */ |
| RETURN_FOR_FIRST_TIME = 2, /* probably good, |
| as long as no one corrupted it before first use. |
| unfortunately, for attacks on uncommon paths |
| this protection doesn't add much |
| */ |
| RETURN_UNKNOWN_CALLEE = -1 |
| }; |
| |
| /* return >0 if ok */ |
| int |
| reverse_check_ret_source(app_pc target_pc, app_pc source_pc) |
| { |
| uint call_site_ndx = find_call_site(dcontext, target_pc); |
| ASSERT_NOT_TESTED(); |
| ASSERT(call_site_ndx < MAX_CALL_CNT); |
| if (first_ret_from[call_site_ndx] == source_pc) |
| return RETURN_FROM_EXPECTED_CALLEE; /* all good */ |
| if (!first_ret_from[call_site_ndx]) { /* never returned to */ |
| /* assigning first callee */ |
| first_ret_from[call_site_ndx] = source_pc; |
| return RETURN_FOR_FIRST_TIME; |
| } else { |
| /* direct call returned to from a different address than last time */ |
| return 0; /* mismatch - possible RA corruption */ |
| } |
| } |
| #endif /* DIRECT_CALL_CHECK */ |
| |
| static bool |
| at_iret_exception(dcontext_t *dcontext, app_pc target_pc, app_pc source_pc) |
| { |
| #if defined(X64) && defined(WINDOWS) |
| /* Check if this ntdll64!RtlRestoreContext's iret. While my instance |
| * of ntdll64 has RtlRestoreContext as straight-line code, it could |
| * easily be split up in the future, so we only check for an iret |
| * being in ntdll itself. |
| */ |
| bool res = false; |
| instrlist_t *ilist = build_app_bb_ilist(dcontext, source_pc, INVALID_FILE); |
| instr_t *iret = instrlist_last(ilist); |
| instr_t *ipush = (iret != NULL) ? instr_get_prev(iret) : NULL; |
| |
| if (get_module_base(source_pc) == get_ntdll_base()) { |
| /* We could check that this bb starts w/ fxrstor but rather than be |
| * too fragile I'm allowing any iret inside ntdll */ |
| if (instr_get_opcode(instrlist_last(ilist)) == OP_iret) { |
| SYSLOG_INTERNAL_WARNING_ONCE("RCT: iret matched @"PFX, source_pc); |
| res = true; |
| } |
| } |
| instrlist_clear_and_destroy(dcontext, ilist); |
| /* case 9398: build_app_bb_ilist modified last decode page, so restore here */ |
| set_thread_decode_page_start(dcontext, (app_pc) PAGE_START(target_pc)); |
| return res; |
| #else |
| return false; |
| #endif |
| } |
| |
| /* similar to vbjmp, though here we have a push of a register */ |
| static bool |
| at_pushregret_exception(dcontext_t *dcontext, app_pc target_pc, app_pc source_pc) |
| { |
| /* Check if this a "push reg; ret" seen in mscoree (case 7317): |
| * push reg |
| * ret |
| */ |
| bool res = false; |
| |
| instrlist_t *ilist = build_app_bb_ilist(dcontext, source_pc, INVALID_FILE); |
| instr_t *iret = instrlist_last(ilist); |
| instr_t *ipush = (iret != NULL) ? instr_get_prev(iret) : NULL; |
| |
| if (ipush != NULL && instr_get_opcode(ipush) == OP_push |
| && opnd_is_reg(instr_get_src(ipush, 0)) |
| && instr_is_return(iret) && instr_num_srcs(iret) == 2 /* no ret immed */) { |
| /* sanity check: is reg value the ret target? */ |
| reg_id_t reg = opnd_get_reg(instr_get_src(ipush, 0)); |
| reg_t val = reg_get_value_priv(reg, get_mcontext(dcontext)); |
| LOG(GLOBAL, LOG_INTERP, 3, |
| "RCT: at_pushregret_exception: push %d reg == "PFX"; ret\n", |
| reg, val); |
| if ((app_pc)val == target_pc) { |
| SYSLOG_INTERNAL_WARNING_ONCE("RCT: push reg/ret matched @"PFX, target_pc); |
| res = true; |
| } |
| } |
| instrlist_clear_and_destroy(dcontext, ilist); |
| /* case 9398: build_app_bb_ilist modified last decode page, so restore here */ |
| set_thread_decode_page_start(dcontext, (app_pc) PAGE_START(target_pc)); |
| return res; |
| } |
| |
| static bool |
| at_vbjmp_exception(dcontext_t *dcontext, app_pc target_pc, app_pc source_pc) |
| { |
| /* Verify if this a VB generated push/ret, where the immediate in the push has |
| the target_pc as an immediate |
| * (this is seen in winword (case 670)): |
| * push target-address |
| * ret |
| */ |
| bool res = false; |
| |
| instrlist_t *ilist = build_app_bb_ilist(dcontext, source_pc, INVALID_FILE); |
| instr_t *iret = instrlist_last(ilist); |
| instr_t *ipush = iret ? instr_get_prev(iret) : NULL; |
| |
| /* FIXME: need to restrict to only two instructions */ |
| if (ipush && instr_get_opcode(ipush) == OP_push_imm && instr_is_return(iret) && |
| opnd_get_size(instr_get_src(ipush, 0)) == OPSZ_4) { |
| ptr_uint_t immed = (ptr_uint_t) opnd_get_immed_int(instr_get_src(ipush, 0)); |
| IF_X64(ASSERT_TRUNCATE(immed, uint, |
| opnd_get_immed_int(instr_get_src(ipush, 0)))); |
| LOG(GLOBAL, LOG_INTERP, 3, |
| "RCT: at_vbjmp_exception: testing target "PFX" for push $"PFX |
| "; ret pattern\n", target_pc, immed); |
| if ((app_pc)immed == target_pc) { |
| SYSLOG_INTERNAL_WARNING_ONCE("RCT: push/ret matched @"PFX, target_pc); |
| res = true; |
| } |
| } |
| instrlist_clear_and_destroy(dcontext, ilist); |
| /* case 9398: build_app_bb_ilist modified last decode page, so restore here */ |
| set_thread_decode_page_start(dcontext, (app_pc) PAGE_START(target_pc)); |
| return res; |
| } |
| |
| static bool |
| at_vbpop_exception(dcontext_t *dcontext, app_pc target_pc, app_pc source_pc) |
| { |
| /* Verify if this a VB generated sequence where RETurn just goes to the next instruction. |
| * (this is seen in FMStocks_Bus.dll and FMStocks_Bus.dll (case 1718)) |
| * The functions called seem generic enough, to allow for another pattern on this. |
| * All we're checking for now is (source_pc + 1) == target_pc. |
| 110045E0 call ebx ; __vbaStrMove |
| 110045E2 push offset loc_1100462A |
| 110045E7 jmp short loc_11004620 |
| |
| 11004620 loc_11004620: ; CODE XREF: sub_11004510+D7j |
| 11004620 lea ecx, [ebp+var_20] |
| 11004623 call ds:__vbaFreeStr |
| 11004629 retn |
| 1100462A loc_1100462A: ; DATA XREF: sub_11004510+D2o |
| 1100462A mov ecx, [ebp-14h] |
| */ |
| /* FIXME: make this part of at_vbjmp_exception() */ |
| /* FIXME: also see security-common/vbjmp-rac-test.c and why |
| we may end up having to treat specially a "push $code; jmp " |
| for a slightly more general handling of this. |
| */ |
| |
| /* We assume that the RET instruction is a single one and is in |
| its own basic block, so we expect it to be at source_pc. |
| FIXME: If it doesn't works this way, we'll have |
| to build a basic block like at_vbjmp_exception() does. */ |
| /* FIXME: What if the source_pc is a trace, then we'd need to find |
| the exiting branch and make sure it matches? */ |
| if ( (source_pc + 1) == target_pc) { |
| LOG(THREAD, LOG_INTERP, 2, "RCT: at_vbpop_exception; matched ret "PFX" to next "PFX" pattern\n", |
| source_pc, target_pc); |
| SYSLOG_INTERNAL_WARNING_ONCE("RCT: ret/next matched @"PFX, |
| source_pc, target_pc); |
| return true; |
| } |
| return false; |
| } |
| |
| static bool |
| at_mso_rct_exception(dcontext_t *dcontext, app_pc target_pc) |
| { |
| /* winlogon.exe (case 1214) and mso.dll (case 1158) in Office 10 |
| (from Winstone 2002) appear to have a very weird code that for |
| many function calls modifies the return address on the stack so |
| that it skips several bytes to reach the real instruction. |
| |
| The purpose of that code is not yet grokked. I have not |
| identified if that is supposed to be an exception handling |
| mechanism or another language construct, in any case, it breaks the ABI. |
| |
| There are ~500 places in winlogon.exe that have this pattern: |
| jmp *(fptr) |
| pushfd |
| pushad |
| push args dwords |
| call <some func> ; there are only 10 responsible for the 500 call sites in winlogon |
| ac: ; after call instruction yet return targets realac |
| sub esp, 0x400+ function of args |
| popad |
| popfd |
| <a few instrs usually pop's or push'es, or add esp> |
| realac: ; actual return target |
| add esp, 4*args |
| lea ebx, &after |
| mov [fptr], ebx |
| popad |
| popfd |
| after: |
| |
| 37 places in mso.dll dll also match this pattern: |
| |
| 30bf58f9 e87f80f1ff call MSO+0xd97d (30b0d97d) ; two locations here |
| -> this is the pushed real after call address |
| I am not sure if this is supposed to be data or instructions for something |
| 30bf58fe 81ec14040000 sub esp,?0x414? ; immediate varies |
| [30bf5904 83c408 add esp,0x8 ]; this "instruction" not always here... |
| 30bf5907 61 popad |
| 30bf5908 9d popfd |
| 30bf5909 5d pop ebp |
| -> this is the address where the above call returns to |
| 30bf590a 83c428 add esp,?0x28? ; varies |
| |
| The way the callees are passed the offset to the realac is |
| unclear - after several unexplainable hoops of adding known |
| constants to the return address on the stack they end up there. |
| |
| What we do when a RAC fails: |
| |
| I prefer not building a basic block for suspect attacker |
| controlled data - keep in mind we do this check before we check |
| code origins, therefore we'll match raw bytes. |
| |
| is_readable_without_exception(target_pc, 17) |
| |
| For now I'll go with this pattern match on the target_pc: |
| ; this cleans up the arguments to the call so it shouldn't be huge (I've seen 52) |
| 83 c4 at the target_pc, add esp, 0xbyte |
| ; the address they are loading in here is at the end of the code block! |
| 8d 1d [target_pc+1] at target_pc+3 lea ebx, target_pc+17 |
| ; In case we start doing something about indirect jumps - we can keep |
| ; the information about the indirect jump targeting target_pc+17 |
| ; We don't match the next line's pattern |
| 89 1d at target_pc+9 mov [?addr32], ebx |
| 61 at target_pc+15 popad |
| 9d at target_pc+16 popfd |
| target_pc+17: |
| |
| We should then check that there is a valid after call site in |
| the 32 bytes preceding the target_pc (I've seen 16 but giving some margin). |
| This will make it a little stricter in order to keep it |
| independent of code origins - so that attackers can't point us |
| to random code with the above prefix. |
| |
| */ |
| enum {MSO_PATTERN_SIZE = 17, |
| MSO_PATTERN_ADD_ESP = 0xc483, |
| MSO_PATTERN_LEA_EBX_OFFSET = 3, |
| MSO_PATTERN_LEA_EBX = 0x1d8d, |
| MSO_PATTERN_LEA_EBX_DISP_OFFSET= 2 + MSO_PATTERN_LEA_EBX_OFFSET, |
| MSO_PATTERN_POPAD_POPFD_OFFSET = 15, |
| MSO_PATTERN_POPAD_POPFD = 0x9d61, |
| MSO_PATTERN_MAX_AC_OFFSET = 32}; |
| |
| if (!is_readable_without_exception(target_pc, MSO_PATTERN_SIZE)) |
| return false; |
| |
| LOG(GLOBAL, LOG_INTERP, 3, "RCT: at_mso_rct_exception("PFX")\n", target_pc); |
| |
| #ifdef X64 |
| /* let's wait until we hit this so we know what the new pattern |
| * looks like */ |
| return false; |
| #endif |
| |
| if ((*(uint*)(target_pc + MSO_PATTERN_LEA_EBX_DISP_OFFSET) |
| == (uint)(ptr_uint_t)(target_pc + MSO_PATTERN_SIZE)) && |
| (*(ushort*)(target_pc + MSO_PATTERN_LEA_EBX_OFFSET) |
| == MSO_PATTERN_LEA_EBX) && |
| (*(ushort*)(target_pc + MSO_PATTERN_POPAD_POPFD_OFFSET) |
| == MSO_PATTERN_POPAD_POPFD) && |
| *(ushort*)(target_pc) == MSO_PATTERN_ADD_ESP) { |
| uint fromac; |
| |
| LOG(GLOBAL, LOG_INTERP, 2, "RCT: at_mso_rct_exception("PFX"): pattern matched, " |
| "testing if after call\n", target_pc); |
| |
| for (fromac = 0; fromac < MSO_PATTERN_MAX_AC_OFFSET; fromac++) { |
| if (find_call_site(dcontext, target_pc - fromac)) { |
| SYSLOG_INTERNAL_WARNING_ONCE("RCT: mso rct matched @"PFX, target_pc); |
| |
| LOG(GLOBAL, LOG_INTERP, 2, "RCT: at_mso_rct_exception("PFX"): " |
| "pattern matched %d after real after call site", target_pc, fromac); |
| |
| /* CHECK: in case we see many of these exceptions at the same target |
| then we should add this target_pc as a valid after_call_site |
| so we don't have to match it in the future |
| */ |
| return true; |
| } |
| } |
| } |
| |
| return false; |
| } |
| |
| |
| /* licdll.dll (case 1690) Licensing agent |
| that is used by Automatic updates has several RCT violations. |
| |
| I have no idea why are they breaking the API again - only |
| possible explanation is for some sort of obfuscation. I |
| wouldn't be surprised if debugging this changes its behaviour. |
| |
| |
| licdll.dll from XP SP1 |
| 55a6877d 8d1de9813b60 lea ebx,[603b81e9] ; EBX=603b81e9 |
| 55a68783 83ec1c sub esp,0x1c |
| 55a68786 891c24 mov [esp],ebx ; [ESP] = 0x603b81e9 |
| ... |
| 55a687a5 812c2428fa940a sub dword ptr [esp],0xa94fa28 |
| ; [ESP] = 0x603b81e9 - 0xa94fa28 = 0x55a687c1! |
| ... |
| 55a687bc e90a230000 jmp licdll!Ordinal221+0xaacb (55a6aacb) |
| |
| [1] BAD TARGET |
| 55a687c1 8b542424 mov edx,[esp+0x24] |
| 55a687c5 8b0c24 mov ecx,[esp] |
| 55a687c8 891424 mov [esp],edx |
| 55a687cb 894c2424 mov [esp+0x24],ecx |
| 55a687cf 9d popfd |
| 55a687d0 61 popad |
| 55a687d1 c3 ret |
| [2] BAD SOURCE - the ret of this same fragment is then targeting a piece of DGC |
| on two freshly created pages. |
| |
| Exactly the same code appears at 0x55a65446 in the same dll on xp. |
| in the win2003 version of it 0x62FB5478 and 0x62FB87AE have the same fragment. |
| |
| FIXME: Of the three different SUB [esp] offsets 0A94FA28h, 0C98F744h, and 0EEF3E64h, |
| the latter two exhibit different potential target patterns. Need to test those. |
| |
| What we do when a RAC fails, see comments in |
| at_mso_rct_exception() on why we match raw bytes: |
| |
| Note that this same fragment is both a source and a target, so |
| it may be worthwhile matching it as close as possible. |
| |
| 1) FIXME: check if source fragment is in module licdll.dll |
| |
| 2) In case the target is a future executable then we don't look at |
| the target but instead look at the source in the next step. |
| |
| 3) is_readable_without_exception(target_pc, 17) |
| |
| 4) Pattern match: |
| 8b 54 24 24 pc mov edx,[esp+0x24] |
| 24 pc+13 mov [esp+0x24], ecx ; 89 4c 24 24] |
| 9d pc+14 popfd |
| 61 pc+15 popad |
| c3 pc+16 ret |
| pc+17: |
| */ |
| |
| static bool |
| licdll_pattern_match(dcontext_t *dcontext, app_pc pattern_pc) |
| { |
| enum {LICDLL_PATTERN_SIZE = 17, |
| LICDLL_PATTERN_MOV_EDX_ESP_24 = 0x2424548b, |
| LICDLL_PATTERN_24_POPFD_OFFSET = 13, |
| LICDLL_PATTERN_24_POPFD_POPAD_RET = 0xc3619d24 |
| }; |
| |
| if (!is_readable_without_exception(pattern_pc, LICDLL_PATTERN_SIZE)) |
| return false; |
| |
| LOG(THREAD, LOG_INTERP, 2, "RCT: at_licdll_rct_exception("PFX")\n", pattern_pc); |
| |
| #ifdef X64 |
| /* let's wait until we hit this so we know what the new pattern |
| * looks like */ |
| return false; |
| #endif |
| |
| if ((*(uint*)(pattern_pc + LICDLL_PATTERN_24_POPFD_OFFSET) |
| == LICDLL_PATTERN_24_POPFD_POPAD_RET) && |
| (*(uint*)(pattern_pc) |
| == LICDLL_PATTERN_MOV_EDX_ESP_24)) { |
| |
| LOG(THREAD, LOG_INTERP, 1, |
| "RCT: at_licdll_rct_exception("PFX"): pattern matched\n", pattern_pc); |
| |
| return true; |
| } |
| return false; |
| } |
| |
| static bool |
| at_licdll_rct_exception(dcontext_t *dcontext, app_pc target_pc, app_pc source_pc) |
| { |
| |
| /* 1) FIXME: check if source fragment is in module licdll.dll |
| we could do that with get_module_short_name(source_pc), |
| but it looks like both licdll and dpcdll need this */ |
| |
| /* 2) FIXME: In case the target is a future executable then we |
| don't look at the target but instead look at the source in the next step. */ |
| |
| /* CHECK: in case we see many of these exceptions at the same target |
| then we should add this target_pc as a valid after_call_site |
| so we don't have to match it in the future |
| */ |
| |
| if (licdll_pattern_match(dcontext, target_pc)) { |
| SYSLOG_INTERNAL_WARNING_ONCE("RCT: licdll rct matched target @"PFX, |
| target_pc); |
| return true; |
| } |
| /* case 9398: set to source for our check |
| * FIXME: we could read off the end of the page! Should use TRY or safe_read. |
| */ |
| ASSERT(check_in_last_thread_vm_area(dcontext, (app_pc) PAGE_START(target_pc))); |
| set_thread_decode_page_start(dcontext, (app_pc) PAGE_START(source_pc)); |
| /* the same piece of code then is then RETurning into some DGC */ |
| if (licdll_pattern_match(dcontext, source_pc)) { |
| SYSLOG_INTERNAL_WARNING_ONCE("RCT: licdll rct matched source @"PFX, |
| source_pc); |
| /* We assume any match will abort future app derefs so we don't need |
| * to restore the last decode page */ |
| return true; |
| } |
| /* case 9398: now restore (if return true we assume no more derefs) */ |
| set_thread_decode_page_start(dcontext, (app_pc) PAGE_START(target_pc)); |
| |
| return false; |
| } |
| |
| /* return after call check |
| called by dispatch after inlined return lookup routine has failed */ |
| /* FIXME: return value is ignored */ |
| int |
| ret_after_call_check(dcontext_t *dcontext, app_pc target_addr, app_pc src_addr) |
| { |
| /* FIXME If we change shared_syscalls to use the ret table (instead of the jmp |
| * table), we need to fix up the use of instr_addr further down, since it could |
| * store a nonsensical value and cause reverse_check_ret_source() to return a |
| * failure code. |
| */ |
| #if defined(DEBUG) || defined(DIRECT_CALL_CHECK) |
| cache_pc instr_addr = EXIT_CTI_PC(dcontext->last_fragment, dcontext->last_exit); |
| #endif |
| |
| LOG(THREAD, LOG_INTERP, 3, "RCT: return \taddr = "PFX"\ttarget = "PFX"\n", |
| instr_addr, target_addr); |
| |
| STATS_INC(ret_after_call_validations); |
| |
| /* FIXME: currently this is only a partial check, |
| a trace lookup will not exit the fcache for a check like this |
| to fully provide the return-after-call guarantee. |
| |
| [Note that there is an ibl even in basic blocks and currently |
| those simply look for any trace, the next step is to restrict |
| the return hashtable only to valid "after call" targets] |
| |
| Yet false positives with this simpler check |
| would be something to get worried about already. |
| */ |
| |
| /* TODO: write a unit test that forms a trace and then modifies |
| the return address to show this needs to be done from within */ |
| |
| /* Case 9398: handle unreadable races from derefs in checks below. |
| * Any checks that read src must set back to target. |
| * FIXME: better to use TRY, or safe_read for each? if use TRY |
| * then have to make sure to call bb_build_abort() if necessary, |
| * since TRY fault takes precedence over decode fault. |
| * FIXME: we could read off the end of the page! This is just a quick fix, |
| * not foolproof. |
| */ |
| set_thread_decode_page_start(dcontext, (app_pc) PAGE_START(target_addr)); |
| |
| if (!find_call_site(dcontext, target_addr)) { |
| LOG(THREAD, LOG_INTERP, 1, "RCT: bad return target: "PFX"\n", |
| target_addr); |
| if (!start_enforcing(dcontext, target_addr)) { |
| /* FIXME to be fixed whenever we figure out how to start first */ |
| LOG(THREAD, LOG_INTERP, 1, "RCT: haven't started yet --ok\n"); |
| STATS_INC(ret_after_call_before_start); |
| /* do not add exemption */ |
| return 2; |
| } |
| |
| /* Now come the known cases of unjustified ugliness from Microsoft apps. |
| For regression testing purposes we test for them on all platforms */ |
| |
| /* FIXME: see case 285 for a better method of obtaining source_pc, |
| which for all uses here is assumed to be a bb tag, |
| and will likely break if a trace containing these bb's is build. |
| Also see case 1858 about storing into RAC table validated targets. |
| */ |
| if (DYNAMO_OPTION(vbpop_rct) && |
| at_vbpop_exception(dcontext, target_addr, src_addr)) { |
| LOG(THREAD, LOG_INTERP, 1, "RCT: known exception on VB pop --ok\n"); |
| STATS_INC(ret_after_call_known_exceptions); |
| goto exempted; |
| } |
| |
| if (DYNAMO_OPTION(vbjmp_allowed) && |
| at_vbjmp_exception(dcontext, target_addr, src_addr)) { |
| LOG(THREAD, LOG_INTERP, 1, "RCT: known exception on VB jmp --ok\n"); |
| STATS_INC(ret_after_call_known_exceptions); |
| goto exempted; |
| } |
| |
| if (DYNAMO_OPTION(mso_rct) && |
| at_mso_rct_exception(dcontext, target_addr)) { |
| LOG(THREAD, LOG_INTERP, 1, "RCT: known exception on mso ret --ok\n"); |
| STATS_INC(ret_after_call_known_exceptions); |
| goto exempted; |
| } |
| |
| if (DYNAMO_OPTION(licdll_rct) && |
| at_licdll_rct_exception(dcontext, target_addr,src_addr)) { |
| LOG(THREAD, LOG_INTERP, 1, "RCT: known exception on licdll ret --ok\n"); |
| STATS_INC(ret_after_call_known_exceptions); |
| goto exempted; |
| } |
| |
| if (DYNAMO_OPTION(pushregret_rct) && |
| at_pushregret_exception(dcontext, target_addr, src_addr)) { |
| LOG(THREAD, LOG_INTERP, 1, "RCT: known exception on push reg; ret --ok\n"); |
| STATS_INC(ret_after_call_known_exceptions); |
| STATS_INC(ret_after_call_pushregret); |
| /* FIXME: we don't want to cache the target of this pattern |
| * as the usage we've seen is once-only. But, it has also been |
| * to DGC-only, which is currently not cached anyway. |
| */ |
| goto exempted; |
| } |
| |
| if (DYNAMO_OPTION(iret_rct) && |
| at_iret_exception(dcontext, target_addr, src_addr)) { |
| LOG(THREAD, LOG_INTERP, 1, "RCT: known exception on iret --ok\n"); |
| goto exempted; |
| } |
| |
| /* additional handling for known OS specific exceptions is in |
| unix/signal.c (for ld) and |
| win32/callback.c (for exempt modules, Win2003 fibers, and SEH) |
| */ |
| if (at_known_exception(dcontext, target_addr, src_addr)) { |
| LOG(THREAD, LOG_INTERP, 1, "RCT: known exception --ok\n"); |
| STATS_INC(ret_after_call_known_exceptions); |
| goto exempted; |
| } |
| |
| LOG(THREAD, LOG_INTERP, 1, |
| "RCT: BAD[%d] real problem target="PFX" src fragment="PFX"\n", |
| GLOBAL_STAT(ret_after_call_violations), target_addr, src_addr); |
| STATS_INC(ret_after_call_violations); |
| |
| if (DYNAMO_OPTION(unloaded_target_exception) && |
| is_unreadable_or_currently_unloaded_region(target_addr)) { |
| /* we know we either had unload in progress, or we're |
| * beyond unload, but unlike other violations we want to |
| * know the difference between unreadable due to unload, |
| * vs other unreadable ranges |
| */ |
| /* if it is currently tracked as unloaded we'll just ignore */ |
| /* case 9364 - we may prefer to kill a thread when |
| * unreadable memory that hasn't been unloaded |
| * Alternatively, if throwing an exception is always OK, |
| * we could exempt in all cases when we reach this. |
| */ |
| /* We assume that we'll throw an unreadable exception for |
| * both unloaded and unreadable memory later. (Note that |
| * we flush the fragments after we flush the RAC during |
| * process_mmap(), so there is a small chance that we'll |
| * in fact completely allow execution - which is OK since |
| * still a possible APP race.) FIXME: it may be |
| * preferable to throw our own exception here, if DLLs |
| * are in inconsistent state a lot longer while unloaded |
| * under us compared to native, then any execution during |
| * unload would be bad. |
| */ |
| /* if we are unreadable, we could be _after_ unload */ |
| if (is_in_last_unloaded_region(target_addr)) { |
| DODEBUG({ |
| if (!is_readable_without_exception(target_addr, 4)) { |
| /* if currently unreadable and in last unloaded module |
| * we'd let this through and assume that we'll throw |
| * an exception to the app |
| */ |
| LOG(THREAD, LOG_RCT, 1, |
| "RCT: DLL unload in progress, "PFX" --ok\n", target_addr); |
| STATS_INC(num_unloaded_race_during); |
| } else { |
| LOG(THREAD, LOG_RCT, 1, |
| "RCT: target in already unloaded DLL, "PFX" --ok\n", |
| target_addr); |
| STATS_INC(num_unloaded_race_after); |
| } |
| }); |
| /* case 6008 should apply this exemption to unreadable all |
| * unloaded DLLs not only the last one memory execution |
| */ |
| |
| /* do not add exemption */ |
| return 3; /* allow, don't throw .C */ |
| } else { |
| /* we probably were just unreadable, bad app or possibly attack, |
| * leaving to rct_ret_unreadable further down |
| */ |
| /* FIXME: case 6008 there is also a possibility of a |
| * race (that we were during unload at the time we |
| * checked, but since we only keep the last unmap, |
| * another one could have taken place, so we would get |
| * here even if we wanted to exempt. |
| */ |
| /* fall through */ |
| ASSERT_NOT_TESTED(); |
| } |
| } |
| |
| /* ASLR: check if is in wouldbe region, if so report as failure */ |
| if (aslr_is_possible_attack(target_addr)) { |
| LOG(THREAD, LOG_RCT, 1, |
| "RCT: ASLR: wouldbe a preferred DLL, "PFX" --BAD\n", target_addr); |
| /* fall through and report */ |
| ASSERT_NOT_TESTED(); |
| /* FIXME: case 7017 ASLR_NORMALIZE_ID handling */ |
| STATS_INC(aslr_rct_ret_wouldbe); |
| } |
| |
| /* special handling of unreadable memory targets - most likely |
| * corrupted app, but could also be an unsuccessful attack |
| */ |
| if (TEST(OPTION_ENABLED, DYNAMO_OPTION(rct_ret_unreadable))) { |
| if (!is_readable_without_exception(target_addr, 4)) { |
| SYSLOG_INTERNAL_WARNING_ONCE("return target "PFX" unreadable", |
| target_addr); |
| |
| /* We will eventually throw an exception unless |
| * security violation handles this differently. |
| * e.g. if OPTION_NO_REPORT|OPTION_BLOCK we may kill a thread |
| */ |
| /* the current defaults will let all of this through */ |
| /* FIXME: for now only OPTION_NO_REPORT is supported |
| * by security_violation() and that's all we currently need */ |
| if (security_violation(dcontext, target_addr, RETURN_TARGET_VIOLATION, |
| DYNAMO_OPTION(rct_ret_unreadable)) == |
| RETURN_TARGET_VIOLATION) { |
| /* do not cache unreadable memory target */ |
| return -1; |
| } else { |
| /* do not cache unreadable memory target */ |
| return 1; |
| } |
| } |
| } |
| |
| SYSLOG_INTERNAL_WARNING_ONCE("return target "PFX" with no known caller", |
| target_addr); |
| /* does not return in protect mode */ |
| if (security_violation(dcontext, target_addr, RETURN_TARGET_VIOLATION, OPTION_BLOCK|OPTION_REPORT) == |
| RETURN_TARGET_VIOLATION) { |
| /* running in detect mode */ |
| ASSERT(DYNAMO_OPTION(detect_mode) |
| /* case 9712: client security callback can modify the action. |
| * FIXME: if a client changes the action to ACTION_CONTINUE, |
| * this address will be exempted and we won't complain again. |
| * In the future we may need to add another action type. */ |
| IF_CLIENT_INTERFACE(||!IS_INTERNAL_STRING_OPTION_EMPTY(client_lib))); |
| /* we'll cache violation target */ |
| goto exempted; |
| } else { /* decided not to throw the violation */ |
| /* exempted Threat ID */ |
| /* we'll cache violation target */ |
| goto exempted; |
| } |
| exempted: |
| /* add target if in a module (code or data section), but not in DGC */ |
| if (DYNAMO_OPTION(rct_cache_exempt) == RCT_CACHE_EXEMPT_ALL || |
| (DYNAMO_OPTION(rct_cache_exempt) == RCT_CACHE_EXEMPT_MODULES && |
| (get_module_base(target_addr) != NULL))) { |
| /* FIXME: extra system calls may be become more expensive |
| * than extra exits for simple pattern matches, should |
| * have a cheap way of determining whether an address is |
| * in a module code section */ |
| |
| fragment_add_after_call(dcontext, target_addr); |
| ASSERT_CURIOSITY(is_executable_address(target_addr)); |
| STATS_INC(ret_after_call_exempt_added); |
| } else { |
| } |
| return 1; |
| } |
| #ifdef DIRECT_CALL_CHECK |
| else { |
| /* extra check on direct calls */ |
| /* TODO: verify if target is direct call */ |
| /* FIXME: make sure that instr_addr gets shifted properly on unit resize |
| i.e. considered as a normal fragment address, then this check is ok to use a cache_pc */ |
| if (reverse_check_ret_source(target_addr, instr_addr) < 0) { |
| LOG(1, "RCT: bad return source:"PFX" for after call target: "PFX"\n", |
| instr_addr, target_addr); |
| return -1; |
| } |
| } |
| #endif /* DIRECT_CALL_CHECK */ |
| LOG(THREAD, LOG_INTERP, 3, "RCT: good return to "PFX"\n", target_addr); |
| STATS_INC(ret_after_call_good); |
| |
| return 1; |
| } |
| |
| #endif /* RETURN_AFTER_CALL */ |