| /* ********************************************************** |
| * Copyright (c) 2011-2021 Google, Inc. All rights reserved. |
| * Copyright (c) 2001-2010 VMware, Inc. All rights reserved. |
| * ********************************************************** */ |
| |
| /* |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are met: |
| * |
| * * Redistributions of source code must retain the above copyright notice, |
| * this list of conditions and the following disclaimer. |
| * |
| * * Redistributions in binary form must reproduce the above copyright notice, |
| * this list of conditions and the following disclaimer in the documentation |
| * and/or other materials provided with the distribution. |
| * |
| * * Neither the name of VMware, Inc. nor the names of its contributors may be |
| * used to endorse or promote products derived from this software without |
| * specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| * ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE |
| * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
| * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
| * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH |
| * DAMAGE. |
| */ |
| |
| /* Copyright (c) 2003-2007 Determina Corp. */ |
| /* Copyright (c) 2001-2003 Massachusetts Institute of Technology */ |
| /* Copyright (c) 2001 Hewlett-Packard Company */ |
| |
| /* |
| * x86_shared.asm - x86 specific assembly code for sharing. |
| * See comments in x86.asm on the format here. |
| */ |
| |
| #include "../arch/asm_defines.asm" |
| #include "../arch/x86/x86_asm_defines.asm" /* PUSHGPR, POPGPR, etc. */ |
| #ifdef LINUX |
| # include "include/syscall.h" |
| #endif |
| #ifdef MACOS |
| # include "include/syscall_mach.h" /* SYSCALL_NUM_MARKER_* */ |
| #endif |
| START_FILE |
| |
| DECL_EXTERN(unexpected_return) |
| |
| /* we share dynamorio_syscall w/ preload */ |
| #ifdef UNIX |
| /* to avoid libc wrappers we roll our own syscall here |
| * hardcoded to use int 0x80 for 32-bit -- FIXME: use something like do_syscall |
| * and syscall for 64-bit. |
| * signature: dynamorio_syscall(sysnum, num_args, arg1, arg2, ...) |
| * For Linux, the argument max is 6. |
| * For MacOS, the argument max is 6 for x64 and 7 for x86. |
| */ |
| DECLARE_FUNC(dynamorio_syscall) |
| GLOBAL_LABEL(dynamorio_syscall:) |
| /* x64 kernel doesn't clobber all the callee-saved registers */ |
| push REG_XBX /* stack now aligned for x64 */ |
| # ifdef X64 |
| /* reverse order so we don't clobber earlier args */ |
| mov REG_XBX, ARG2 /* put num_args where we can reference it longer */ |
| mov rax, ARG1 /* sysnum: only need eax, but need rax for ARG1 (or movzx) */ |
| # ifdef MACOS |
| /* For now we assume a BSD syscall */ |
| or rax, SYSCALL_NUM_MARKER_BSD |
| # endif |
| cmp REG_XBX, 0 |
| je syscall_ready |
| mov ARG1, ARG3 |
| cmp REG_XBX, 1 |
| je syscall_ready |
| mov ARG2, ARG4 |
| cmp REG_XBX, 2 |
| je syscall_ready |
| mov ARG3, ARG5 |
| cmp REG_XBX, 3 |
| je syscall_ready |
| mov ARG4, ARG6 |
| cmp REG_XBX, 4 |
| je syscall_ready |
| mov ARG5, [2*ARG_SZ + REG_XSP] /* arg7: above xbx and retaddr */ |
| cmp REG_XBX, 5 |
| je syscall_ready |
| mov ARG6, [3*ARG_SZ + REG_XSP] /* arg8: above arg7, xbx, retaddr */ |
| syscall_ready: |
| mov r10, rcx |
| syscall |
| # else |
| push REG_XBP |
| push REG_XSI |
| push REG_XDI |
| /* Add 16 to skip the 4 pushes. |
| * XXX: We do not align the stack to 16 b/c the kernel doesn't care. |
| * XXX: rather than this dispatch, could have separate routines |
| * for each #args, or could just blindly read upward on the stack. |
| * for dispatch, if assume size of mov instr can do single ind jmp */ |
| mov ecx, [16+ 8 + esp] /* num_args */ |
| cmp ecx, 0 |
| je syscall_0args |
| cmp ecx, 1 |
| je syscall_1args |
| cmp ecx, 2 |
| je syscall_2args |
| cmp ecx, 3 |
| je syscall_3args |
| cmp ecx, 4 |
| je syscall_4args |
| cmp ecx, 5 |
| je syscall_5args |
| # ifdef MACOS |
| cmp ecx, 6 |
| je syscall_6args |
| # ifdef INTERNAL |
| cmp ecx, 7 |
| jg GLOBAL_REF(unexpected_return) |
| # endif |
| mov eax, [16+36 + esp] /* arg7 */ |
| syscall_6args: |
| # elif defined(INTERNAL) |
| cmp ecx, 6 |
| jg GLOBAL_REF(unexpected_return) |
| # endif |
| mov ebp, [16+32 + esp] /* arg6 */ |
| syscall_5args: |
| mov edi, [16+28 + esp] /* arg5 */ |
| syscall_4args: |
| mov esi, [16+24 + esp] /* arg4 */ |
| syscall_3args: |
| mov edx, [16+20 + esp] /* arg3 */ |
| syscall_2args: |
| mov ecx, [16+16 + esp] /* arg2 */ |
| syscall_1args: |
| mov ebx, [16+12 + esp] /* arg1 */ |
| syscall_0args: |
| # ifdef MACOS |
| push eax /* 7th arg, if any */ |
| /* Arg size is encoded in upper bits. |
| * XXX: or is that only for sysenter gateway? |
| * We assume this is size, not count, and so for our "7 arg" |
| * call that's really 6 with one 64-bit we leave it. |
| */ |
| mov eax, [20+ 8 + esp] /* num_args */ |
| shl eax, 18 /* <<16 but also *4 for size */ |
| or eax, [20+ 4 + esp] /* sysnum */ |
| /* args are on stack, w/ an extra slot (retaddr of syscall wrapper) */ |
| push ebp |
| push edi |
| push esi |
| push edx |
| push ecx |
| push ebx /* aligned to 16 after this push */ |
| push 0 /* extra slot (app retaddr) */ |
| /* It simplifies our syscall calling to have a single dynamorio_syscall() |
| * signature that returns int64 -- but most syscalls just return a 32-bit |
| * value and the kernel does not clear edx. Thus we need to do so, which |
| * should be safe since edx is caller-saved. (Note that we do not risk |
| * doing this for app syscalls: only those called by DR.) |
| */ |
| mov edx, 0 |
| # else |
| mov eax, [16+ 4 + esp] /* sysnum */ |
| # endif |
| /* PR 254280: we assume int$80 is ok even for LOL64, maybe slow is all. |
| * For Mac, it's possible to do sysenter here as we can store the retaddr |
| * in edx ourselves (in fact see r2514 dynamorio_syscall_sysenter for an |
| * implementation, now removed), but we still need int for certain syscalls |
| * (returning 64-bit values, e.g.) so we go w/ int always and assume our |
| * syscall perf doesn't matter much (should be rare). |
| */ |
| int HEX(80) |
| # ifdef MACOS |
| lea esp, [8*ARG_SZ + esp] /* must not change flags */ |
| # endif |
| pop REG_XDI |
| pop REG_XSI |
| pop REG_XBP |
| # endif /* X64 */ |
| pop REG_XBX |
| /* return val is in eax for us */ |
| /* for MacOS, it can also include edx, so be sure not to clobber that! */ |
| # ifdef MACOS |
| /* convert to -errno */ |
| jae syscall_success |
| neg eax |
| syscall_success: |
| # endif |
| ret |
| END_FUNC(dynamorio_syscall) |
| |
| # ifdef MACOS |
| /* Mach dep syscall invocation. |
| * Signature: dynamorio_mach_dep_syscall(sysnum, num_args, arg1, arg2, ...) |
| * Only supports up to 4 args. |
| */ |
| DECLARE_FUNC(dynamorio_mach_dep_syscall) |
| GLOBAL_LABEL(dynamorio_mach_dep_syscall:) |
| /* x64 kernel doesn't clobber all the callee-saved registers */ |
| push REG_XBX |
| # ifdef X64 |
| /* reverse order so we don't clobber earlier args */ |
| mov REG_XBX, ARG2 /* put num_args where we can reference it longer */ |
| mov rax, ARG1 /* sysnum: only need eax, but need rax to use ARG1 (or movzx) */ |
| cmp REG_XBX, 0 |
| je mach_dep_syscall_ready |
| mov ARG1, ARG3 |
| cmp REG_XBX, 1 |
| je mach_dep_syscall_ready |
| mov ARG2, ARG4 |
| cmp REG_XBX, 2 |
| je mach_dep_syscall_ready |
| mov ARG3, ARG5 |
| cmp REG_XBX, 3 |
| je mach_dep_syscall_ready |
| mov ARG4, ARG6 |
| mach_dep_syscall_ready: |
| syscall |
| # else |
| push REG_XBP |
| push REG_XSI |
| push REG_XDI |
| /* add 16 to skip the 4 pushes */ |
| mov ecx, [16+ 8 + esp] /* num_args */ |
| cmp ecx, 0 |
| je mach_dep_syscall_0args |
| cmp ecx, 1 |
| je mach_dep_syscall_1args |
| cmp ecx, 2 |
| je mach_dep_syscall_2args |
| cmp ecx, 3 |
| je mach_dep_syscall_3args |
| mov esi, [16+24 + esp] /* arg4 */ |
| mach_dep_syscall_3args: |
| mov edx, [16+20 + esp] /* arg3 */ |
| mach_dep_syscall_2args: |
| mov ecx, [16+16 + esp] /* arg2 */ |
| mach_dep_syscall_1args: |
| mov ebx, [16+12 + esp] /* arg1 */ |
| mach_dep_syscall_0args: |
| mov eax, [16+ 4 + esp] /* sysnum */ |
| lea REG_XSP, [-2*ARG_SZ + REG_XSP] /* maintain align-16: retaddr-5th below */ |
| /* args are on stack, w/ an extra slot (retaddr of syscall wrapper) */ |
| push esi |
| push edx |
| push ecx |
| push ebx |
| push 0 /* extra slot */ |
| /* clear the top half so we can always consider the result 64-bit */ |
| mov edx, 0 |
| /* mach dep syscalls use interrupt 0x82 */ |
| int HEX(82) |
| lea esp, [7*ARG_SZ + esp] /* must not change flags */ |
| pop REG_XDI |
| pop REG_XSI |
| pop REG_XBP |
| # endif |
| pop REG_XBX |
| /* return val is in eax for us */ |
| /* for MacOS, it can also include edx, so be sure not to clobber that! */ |
| /* convert to -errno */ |
| jae mach_dep_syscall_success |
| neg eax |
| mach_dep_syscall_success: |
| ret |
| END_FUNC(dynamorio_mach_dep_syscall) |
| |
| |
| /* Mach syscall invocation. |
| * Signature: ptr_int_t dynamorio_mach_syscall(sysnum, num_args, arg1, arg2, ...) |
| * Only supports up to 4 args. |
| * Does not support returning a 64-bit value in 32-bit mode. |
| */ |
| DECLARE_FUNC(dynamorio_mach_syscall) |
| GLOBAL_LABEL(dynamorio_mach_syscall:) |
| /* x64 kernel doesn't clobber all the callee-saved registers */ |
| push REG_XBX |
| # ifdef X64 |
| /* reverse order so we don't clobber earlier args */ |
| mov REG_XBX, ARG2 /* put num_args where we can reference it longer */ |
| mov rax, ARG1 /* sysnum: only need eax, but need rax to use ARG1 (or movzx) */ |
| or eax, SYSCALL_NUM_MARKER_MACH |
| cmp REG_XBX, 0 |
| je dynamorio_mach_syscall_ready |
| mov ARG1, ARG3 |
| cmp REG_XBX, 1 |
| je dynamorio_mach_syscall_ready |
| mov ARG2, ARG4 |
| cmp REG_XBX, 2 |
| je dynamorio_mach_syscall_ready |
| mov ARG3, ARG5 |
| cmp REG_XBX, 3 |
| je dynamorio_mach_syscall_ready |
| mov ARG4, ARG6 |
| dynamorio_mach_syscall_ready: |
| syscall |
| # else |
| push REG_XBP |
| push REG_XSI |
| push REG_XDI |
| /* add 16 to skip the 4 pushes */ |
| mov ecx, [16+ 8 + esp] /* num_args */ |
| cmp ecx, 0 |
| je dynamorio_mach_syscall_0args |
| cmp ecx, 1 |
| je dynamorio_mach_syscall_1args |
| cmp ecx, 2 |
| je dynamorio_mach_syscall_2args |
| cmp ecx, 3 |
| je dynamorio_mach_syscall_3args |
| mov esi, [16+24 + esp] /* arg4 */ |
| dynamorio_mach_syscall_3args: |
| mov edx, [16+20 + esp] /* arg3 */ |
| dynamorio_mach_syscall_2args: |
| mov ecx, [16+16 + esp] /* arg2 */ |
| dynamorio_mach_syscall_1args: |
| mov ebx, [16+12 + esp] /* arg1 */ |
| dynamorio_mach_syscall_0args: |
| mov eax, [16+ 4 + esp] /* sysnum */ |
| /* The sysnum is passed as a negative number */ |
| neg eax |
| /* args are on stack, w/ an extra slot (retaddr of syscall wrapper) */ |
| lea REG_XSP, [-2*ARG_SZ + REG_XSP] /* maintain align-16: retaddr-5th below */ |
| /* args are on stack, w/ an extra slot (retaddr of syscall wrapper) */ |
| push esi |
| push edx |
| push ecx |
| push ebx |
| push 0 /* extra slot */ |
| /* If we use ADDRTAKEN_LABEL and GLOBAL_REF we get text relocation |
| * complaints so we instead do this hack: |
| */ |
| call dynamorio_mach_syscall_next |
| dynamorio_mach_syscall_next: |
| pop REG_XDX |
| lea REG_XDX, [1/*pop*/ + 3/*lea*/ + 2/*sysenter*/ + 2/*mov*/ + REG_XDX] |
| mov REG_XCX, REG_XSP |
| /* We have to use sysenter for a Mach syscall, else we get SIGSYS. |
| * This implies that we can't return 64-bit in 32-bit mode. |
| */ |
| sysenter |
| lea esp, [7*ARG_SZ + esp] /* must not change flags */ |
| pop REG_XDI |
| pop REG_XSI |
| pop REG_XBP |
| # endif |
| pop REG_XBX |
| /* Return val is in eax for us. |
| * Note that unlike BSD and Machdep syscalls, Mach syscalls do not |
| * use flags to indicate success. |
| */ |
| ret |
| END_FUNC(dynamorio_mach_syscall) |
| |
| # endif /* MACOS */ |
| #endif /* UNIX */ |
| |
| /* void dr_fpu_exception_init(void) |
| * sets the exception mask flags for both regular float and xmm packed float |
| */ |
| #define FUNCNAME dr_fpu_exception_init |
| DECLARE_FUNC(FUNCNAME) |
| GLOBAL_LABEL(FUNCNAME:) |
| fninit |
| push HEX(1f80) |
| ldmxcsr DWORD [REG_XSP] |
| pop REG_XAX |
| ret |
| END_FUNC(FUNCNAME) |
| #undef FUNCNAME |
| |
| /* void get_mmx_val(OUT uint64 *val, uint index) |
| * Returns the value of mmx register #index in val. |
| */ |
| #define FUNCNAME get_mmx_val |
| DECLARE_FUNC_SEH(FUNCNAME) |
| GLOBAL_LABEL(FUNCNAME:) |
| mov REG_XAX, ARG1 |
| mov REG_XCX, ARG2 |
| END_PROLOG |
| cmp ecx, 0 |
| je get_mmx_0 |
| cmp ecx, 1 |
| je get_mmx_1 |
| cmp ecx, 2 |
| je get_mmx_2 |
| cmp ecx, 3 |
| je get_mmx_3 |
| cmp ecx, 4 |
| je get_mmx_4 |
| cmp ecx, 5 |
| je get_mmx_5 |
| cmp ecx, 6 |
| je get_mmx_6 |
| movq QWORD [REG_XAX], mm7 |
| jmp get_mmx_done |
| get_mmx_6: |
| movq QWORD [REG_XAX], mm6 |
| jmp get_mmx_done |
| get_mmx_5: |
| movq QWORD [REG_XAX], mm5 |
| jmp get_mmx_done |
| get_mmx_4: |
| movq QWORD [REG_XAX], mm4 |
| jmp get_mmx_done |
| get_mmx_3: |
| movq QWORD [REG_XAX], mm3 |
| jmp get_mmx_done |
| get_mmx_2: |
| movq QWORD [REG_XAX], mm2 |
| jmp get_mmx_done |
| get_mmx_1: |
| movq QWORD [REG_XAX], mm1 |
| jmp get_mmx_done |
| get_mmx_0: |
| movq QWORD [REG_XAX], mm0 |
| get_mmx_done: |
| add REG_XSP, 0 /* make a legal SEH64 epilog */ |
| ret |
| END_FUNC(FUNCNAME) |
| #undef FUNCNAME |
| |
| #ifdef WINDOWS /* on linux we use inline asm versions */ |
| |
| /* byte *get_frame_ptr(void) |
| * returns the value of ebp |
| */ |
| DECLARE_FUNC(get_frame_ptr) |
| GLOBAL_LABEL(get_frame_ptr:) |
| mov REG_XAX, REG_XBP |
| ret |
| END_FUNC(get_frame_ptr) |
| |
| /* byte *get_stack_ptr(void) |
| * returns the value of xsp before the call |
| */ |
| DECLARE_FUNC(get_stack_ptr) |
| GLOBAL_LABEL(get_stack_ptr:) |
| mov REG_XAX, REG_XSP |
| add REG_XAX, ARG_SZ /* remove return address space */ |
| ret |
| END_FUNC(get_stack_ptr) |
| |
| #endif /* WINDOWS */ |
| |
| |
| /***************************************************************************/ |
| #if defined(WINDOWS) && !defined(X64) |
| |
| /* Routines to switch to 64-bit mode from 32-bit WOW64, make a 64-bit |
| * call, and then return to 32-bit mode. |
| */ |
| |
| /* |
| * int switch_modes_and_load(void *ntdll64_LdrLoadDll, |
| * UNICODE_STRING_64 *lib, |
| * HANDLE *result) |
| * XXX i#1633: this routine does not yet support ntdll64 > 4GB |
| */ |
| # define FUNCNAME switch_modes_and_load |
| DECLARE_FUNC(FUNCNAME) |
| GLOBAL_LABEL(FUNCNAME:) |
| /* get args before we change esp */ |
| mov eax, ARG1 |
| mov ecx, ARG2 |
| mov edx, ARG3 |
| /* save callee-saved registers */ |
| push ebx |
| /* far jmp to next instr w/ 64-bit switch: jmp 0033:<sml_transfer_to_64> */ |
| RAW(ea) |
| DD offset sml_transfer_to_64 |
| DB CS64_SELECTOR |
| RAW(00) |
| sml_transfer_to_64: |
| /* Below here is executed in 64-bit mode, but with guarantees that |
| * no address is above 4GB, as this is a WOW64 process. |
| */ |
| /* Call LdrLoadDll to load 64-bit lib: |
| * LdrLoadDll(IN PWSTR DllPath OPTIONAL, |
| * IN PULONG DllCharacteristics OPTIONAL, |
| * IN PUNICODE_STRING DllName, |
| * OUT PVOID *DllHandle)); |
| */ |
| RAW(4c) RAW(8b) RAW(ca) /* mov r9, rdx : 4th arg: result */ |
| RAW(4c) RAW(8b) RAW(c1) /* mov r8, rcx : 3rd arg: lib */ |
| push 0 /* slot for &DllCharacteristics */ |
| lea edx, dword ptr [esp] /* 2nd arg: &DllCharacteristics */ |
| xor ecx, ecx /* 1st arg: DllPath = NULL */ |
| /* save WOW64 state */ |
| RAW(41) push esp /* push r12 */ |
| RAW(41) push ebp /* push r13 */ |
| RAW(41) push esi /* push r14 */ |
| RAW(41) push edi /* push r15 */ |
| /* align the stack pointer */ |
| mov ebx, esp /* save esp in callee-preserved reg */ |
| sub esp, 32 /* call conv */ |
| and esp, HEX(fffffff0) /* align to 16-byte boundary */ |
| call eax |
| mov esp, ebx /* restore esp */ |
| /* restore WOW64 state */ |
| RAW(41) pop edi /* pop r15 */ |
| RAW(41) pop esi /* pop r14 */ |
| RAW(41) pop ebp /* pop r13 */ |
| RAW(41) pop esp /* pop r12 */ |
| /* far jmp to next instr w/ 32-bit switch: jmp 0023:<sml_return_to_32> */ |
| push offset sml_return_to_32 /* 8-byte push */ |
| mov dword ptr [esp + 4], CS32_SELECTOR /* top 4 bytes of prev push */ |
| jmp fword ptr [esp] |
| sml_return_to_32: |
| add esp, 16 /* clean up far jmp target and &DllCharacteristics */ |
| pop ebx /* restore callee-saved reg */ |
| ret /* return value already in eax */ |
| END_FUNC(FUNCNAME) |
| |
| /* |
| * void d_r_set_ss_selector() |
| */ |
| DECL_EXTERN(d_r_ss_value) |
| # undef FUNCNAME |
| # define FUNCNAME d_r_set_ss_selector |
| DECLARE_FUNC(FUNCNAME) |
| GLOBAL_LABEL(FUNCNAME:) |
| mov eax, ss |
| mov DWORD SYMREF(d_r_ss_value), eax |
| ret |
| END_FUNC(FUNCNAME) |
| |
| /* |
| * int switch_modes_and_call(invoke_uint64_t *args) |
| */ |
| # undef FUNCNAME |
| # define FUNCNAME switch_modes_and_call |
| DECLARE_FUNC(FUNCNAME) |
| GLOBAL_LABEL(FUNCNAME:) |
| mov eax, ARG1 |
| /* Save callee-saved registers. */ |
| push ebx |
| push esi |
| push edi |
| push ebp |
| /* Far jmp to next instr w/ 64-bit switch: jmp 0033:<smc_transfer_to_64>. */ |
| RAW(ea) |
| DD offset smc_transfer_to_64 |
| DB CS64_SELECTOR |
| RAW(00) |
| smc_transfer_to_64: |
| /* Below here is executed in 64-bit mode, but with guarantees that |
| * no address is above 4GB, as this is a WOW64 process. |
| */ |
| /* Save WOW64 calee-saved registers. */ |
| RAW(41) push esp /* push r12 */ |
| RAW(41) push ebp /* push r13 */ |
| RAW(41) push esi /* push r14 */ |
| RAW(41) push edi /* push r15 */ |
| /* Align the stack pointer. */ |
| mov ebx, esp /* save esp in callee-preserved reg */ |
| and esp, HEX(fffffff0) /* align to 16-byte boundary */ |
| /* Set up args on the stack. */ |
| RAW(48) mov ecx, DWORD [eax + 8*6] /* load args.arg6 */ |
| push ecx /* push args.arg6 */ |
| RAW(48) mov ecx, DWORD [eax + 8*5] /* load args.arg5 */ |
| push ecx /* push args.arg5 */ |
| sub esp, 32 /* Leave slots for args 1-4. */ |
| /* arg1 is already in rcx, arg2 in rdx, arg3 in r8, arg4 in r9 */ |
| RAW(4c) mov ecx, DWORD [eax + 8*4] /* load args.arg4 into r9 */ |
| RAW(4c) mov eax, DWORD [eax + 8*3] /* load args.arg3 into r8 */ |
| RAW(48) mov edx, DWORD [eax + 8*2] /* load args.arg2 into rdx */ |
| RAW(48) mov ecx, DWORD [eax + 8*1] /* load args.arg1 into rcx */ |
| RAW(48) mov eax, DWORD [eax] /* load args.func into rax */ |
| call eax /* call rax */ |
| mov esp, ebx /* restore rsp */ |
| /* Restore WOW64 callee-saved regs. */ |
| RAW(41) pop edi /* pop r15 */ |
| RAW(41) pop esi /* pop r14 */ |
| RAW(41) pop ebp /* pop r13 */ |
| RAW(41) pop esp /* pop r12 */ |
| /* Far jmp to next instr w/ 32-bit switch: jmp 0023:<smc_return_to_32>. */ |
| push offset smc_return_to_32 /* 8-byte push */ |
| mov dword ptr [esp + 4], CS32_SELECTOR /* top 4 bytes of prev push */ |
| jmp fword ptr [esp] |
| smc_return_to_32: |
| add esp, 8 /* clean up far jmp target */ |
| /* i#4091: Work around an AMD processor bug where after switching from 64-bit |
| * back to 32-bit, if a thread switch happens around the same time, the |
| * SS segment descriptor gets corrupted somehow and any ESP reference |
| * raises an access violation with an undocumented Parameter[0]=00000003. |
| * Re-instating the proper descriptor by re-loading the selector seems |
| * to solve the problem. |
| */ |
| mov ebx, DWORD SYMREF(d_r_ss_value) |
| mov ss, ebx |
| /* Restore callee-saved regs. */ |
| pop ebp |
| pop edi |
| pop esi |
| pop ebx |
| ret /* return value already in eax */ |
| END_FUNC(FUNCNAME) |
| |
| #endif /* WINDOWS && !X64 */ |
| |
| /**************************************************************************** |
| * Injection code shared between core and drinjectlib. |
| * XXX: since we are exporting this file in the "drlibc" lib we may want |
| * to should move this code to a new file inject_shared.asm or sthg. |
| */ |
| #ifdef WINDOWS |
| |
| /* void load_dynamo(void) |
| * |
| * used for injection into a child process |
| * N.B.: if the code here grows, SIZE_OF_LOAD_DYNAMO in win32/inject.c |
| * must be updated. |
| */ |
| DECLARE_FUNC(load_dynamo) |
| GLOBAL_LABEL(load_dynamo:) |
| /* the code for this routine is copied into an allocation in the app |
| and invoked upon return from the injector. When it is invoked, |
| it expects the app's stack to look like this: |
| |
| xsp-->| &LoadLibrary | xsp must be 16-aligned |
| | &dynamo_path | |
| | &GetProcAddr | |
| | &dynamo_entry |___ |
| | | | |
| |(saved context)| priv_mcontext_t struct |
| | &code_alloc | | pointer to the code allocation |
| | sizeof(code_alloc)| size of the code allocation |
| |_______________|___| (possible padding for x64 xsp alignment) |
| &dynamo_path-->| | | |
| | (dynamo path) | TEXT(DYNAMORIO_DLL_PATH) |
| |_______________|___| |
| &dynamo_entry-->| | | |
| | (dynamo entry)| "dynamo_auto_start" |
| | |___| |
| |
| |
| in separate allocation ___ |
| | | | |
| | CODE | load_dynamo() code |
| | |___| |
| |
| The load_dynamo routine will load the dynamo DLL into memory, then jump |
| to its dynamo_auto_start entry point, passing it the saved app context as |
| parameters. |
| */ |
| /* two byte NOP to satisfy third party braindead-ness documented in case 3821 */ |
| mov edi, edi |
| /* Update priv_mcontext_t's xcx/xax in case the target start address was changed |
| * for .NET (i#3046). LdrpInitializeProcess goes and changes the initial |
| * thread's CONTEXT.Xcx from what the kernel set (the executable image entry), |
| * and what inject_into_thread() cached here on the stack, to something like |
| * MSCOREE!CorExeMain_Exported. We assume no other state was changed: just |
| * Xcx/Xax. Long-term we'd like to make early injection the default, which |
| * avoids this problem. |
| */ |
| #ifdef X64 |
| mov PTRSZ [MCONTEXT_XCX_OFFS + 4*ARG_SZ + REG_XSP], REG_XCX |
| #else |
| mov PTRSZ [MCONTEXT_XAX_OFFS + 4*ARG_SZ + REG_XSP], REG_XAX |
| #endif |
| #ifdef LOAD_DYNAMO_DEBUGBREAK |
| /* having this code in front may hide the problem addressed with the |
| * above padding */ |
| /* giant loop so can attach debugger, then change ebx to 1 |
| * to step through rest of code */ |
| mov ebx, HEX(7fffffff) |
| load_dynamo_repeat_outer: |
| mov eax, HEX(7fffffff) |
| load_dynamo_repeatme: |
| dec eax |
| cmp eax, 0 |
| jg load_dynamo_repeatme |
| dec ebx |
| cmp ebx, 0 |
| jg load_dynamo_repeat_outer |
| |
| # ifdef X64 |
| /* xsp is 8-aligned and our pop makes it 16-aligned NOCHECK BAIL ON WINDOWS WHICH DOESN"T REQUIRE 16-BYTE ALIGN? */ |
| # endif |
| /* TOS has &DebugBreak */ |
| pop REG_XBX /* pop REG_XBX = &DebugBreak */ |
| CALLWIN0(REG_XBX) /* call DebugBreak (in kernel32.lib) */ |
| #endif |
| /* TOS has &LoadLibraryA */ |
| pop REG_XBX /* pop REG_XBX = &LoadLibraryA */ |
| /* TOS has &dynamo_path */ |
| pop REG_XAX /* for 32-bit we're doing "pop eax, push eax" */ |
| sub REG_XSP, FRAME_ALIGNMENT - ARG_SZ*2 /* Align to 16. */ |
| CALLWIN1(REG_XBX, REG_XAX) /* call LoadLibraryA (in kernel32.lib) */ |
| add REG_XSP, FRAME_ALIGNMENT - ARG_SZ*2 /* Undo align. */ |
| |
| /* check result */ |
| cmp REG_XAX, 0 |
| jne load_dynamo_success |
| pop REG_XBX /* pop off &GetProcAddress */ |
| pop REG_XBX /* pop off &dynamo_entry */ |
| jmp load_dynamo_failure |
| load_dynamo_success: |
| /* TOS has &GetProcAddress */ |
| pop REG_XBX /* pop REG_XBX = &GetProcAddress */ |
| /* dynamo_handle is now in REG_XAX (returned by call LoadLibrary) */ |
| /* TOS has &dynamo_entry */ |
| pop REG_XDI /* for 32-bit we're doing "pop edi, push edi" */ |
| /* Stack is now 16-byte aligned. */ |
| CALLWIN2(REG_XBX, REG_XAX, REG_XDI) /* call GetProcAddress */ |
| cmp REG_XAX, 0 |
| je load_dynamo_failure |
| |
| /* jump to dynamo_auto_start (returned by GetProcAddress) */ |
| jmp REG_XAX |
| /* dynamo_auto_start will take over or continue natively at the saved |
| * context via load_dynamo_failure. |
| */ |
| END_FUNC(load_dynamo) |
| /* N.B.: load_dynamo_failure MUST follow load_dynamo, as both are |
| * copied in one fell swoop by inject_into_thread()! |
| */ |
| /* not really a function but having issues getting both masm and gas to |
| * let other asm routines jump here. |
| * targeted by load_dynamo and dynamo_auto_start by a jump, not a call, |
| * when we should not take over and should go native instead. |
| * Xref case 7654: we come here to the child's copy from dynamo_auto_start |
| * instead of returning to the parent's copy post-load_dynamo to avoid |
| * incompatibilites with stack layout accross dr versions. |
| */ |
| DECLARE_FUNC(load_dynamo_failure) |
| GLOBAL_LABEL(load_dynamo_failure:) |
| /* Would be nice if we could free our allocation here as well, but |
| * that's too much of a pain (esp. here). |
| * Note TOS has the saved context at this point, xref layout in |
| * auto_setup. Note this code is duplicated in dynamo_auto_start. */ |
| mov REG_XAX, [MCONTEXT_XSP_OFFS + REG_XSP] /* load app xsp */ |
| mov REG_XBX, [MCONTEXT_PC_OFFS + REG_XSP] /* load app start_pc */ |
| /* write app start_pc off top of app stack */ |
| mov [-ARG_SZ + REG_XAX], REG_XBX |
| /* it's ok to write past app TOS since we're just overwriting part of |
| * the dynamo_entry string which is dead at this point, won't affect |
| * the popping of the saved context */ |
| POPGPR |
| POPF |
| /* we assume reading beyond TOS is ok here (no signals on windows) */ |
| /* we assume xmm0-5 do not need to be restored */ |
| /* restore app xsp (POPGPR doesn't) */ |
| mov REG_XSP, [-MCONTEXT_PC_OFFS + MCONTEXT_XSP_OFFS + REG_XSP] |
| jmp PTRSZ [-ARG_SZ + REG_XSP] /* jmp to app start_pc */ |
| |
| ret |
| END_FUNC(load_dynamo_failure) |
| |
| #endif /* WINDOWS */ |
| |
| #ifdef LINUX |
| /* SYS_clone swaps the stack so we need asm support to call it. |
| * signature: |
| * thread_id_t dynamorio_clone(uint flags, byte *newsp, void *ptid, void *tls, |
| * void *ctid, void (*func)(void)) |
| * i#6514: If newsp is NULL then that tells the kernel to give the child the |
| * same value for SP as the parent. |
| */ |
| DECLARE_FUNC(dynamorio_clone) |
| GLOBAL_LABEL(dynamorio_clone:) |
| /* Save func for use post-syscall on the newsp. |
| * This is tricky because we have to handle the case of newsp == NULL. |
| */ |
| # ifdef X64 |
| /* The syscall preserves all registers except rax, rcx, r11. */ |
| push r15 |
| mov r15, ARG6 /* Func is now in r15. */ |
| and ARG2, -FRAME_ALIGNMENT /* For glibc compatibility, align newsp. */ |
| /* All args are already in syscall registers, except for rcx. */ |
| mov r10, rcx |
| mov REG_XAX, SYS_clone |
| syscall |
| # else |
| /* Fetch some args we need before we modify XSP and ARGn is no |
| * longer usable. |
| */ |
| mov REG_XCX, ARG2 /* newsp */ |
| mov REG_XDX, ARG3 /* ptid */ |
| mov REG_XAX, ARG6 /* func */ |
| /* Preserve callee-saved regs. */ |
| push REG_XBX |
| push REG_XSI |
| push REG_XDI |
| /* Now can't use ARG* since xsp modified by pushes. */ |
| mov REG_XBX, DWORD [4*ARG_SZ + REG_XSP] /* ARG1 + 3 pushes */ |
| mov REG_XSI, DWORD [7*ARG_SZ + REG_XSP] /* ARG4 + 3 pushes */ |
| mov REG_XDI, DWORD [8*ARG_SZ + REG_XSP] /* ARG5 + 3 pushes */ |
| /* i#6514: Save func on the child's stack. Remember that if newsp is |
| * NULL then the child's stack is our stack. When the syscall returns |
| * it's cumbersome to know whether newsp was NULL. To keep things simple |
| * for the parent always push func on our stack. |
| */ |
| push REG_XAX /* Xsp is misaligned at this point but kernel doesn't care. */ |
| and REG_XCX, -FRAME_ALIGNMENT /* For glibc compatibility, align newsp. */ |
| jz newsp_is_null |
| sub REG_XCX, ARG_SZ |
| mov [REG_XCX], REG_XAX /* Func is now on TOS of newsp. */ |
| newsp_is_null: |
| mov REG_XAX, SYS_clone |
| /* PR 254280: we assume int$80 is ok even for LOL64 */ |
| int HEX(80) |
| # endif |
| cmp REG_XAX, 0 |
| jne dynamorio_clone_parent |
| # ifdef X64 |
| call r15 |
| # else |
| pop REG_XCX |
| call REG_XCX |
| # endif |
| /* Shouldn't return. */ |
| jmp GLOBAL_REF(unexpected_return) |
| dynamorio_clone_parent: |
| # ifdef X64 |
| pop r15 |
| # else |
| /* Restore callee-saved regs. */ |
| add REG_XSP, ARG_SZ /* Discard func. */ |
| pop REG_XDI |
| pop REG_XSI |
| pop REG_XBX |
| # endif |
| /* Return val is in eax still. */ |
| ret |
| END_FUNC(dynamorio_clone) |
| #endif /* LINUX */ |
| |
| END_FILE |