blob: 45b273a9a0fb5e859f78f406d464becb7b3d0b4b [file] [log] [blame]
/* **********************************************************
* Copyright (c) 2011-2021 Google, Inc. All rights reserved.
* Copyright (c) 2001-2010 VMware, Inc. All rights reserved.
* ********************************************************** */
/*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* * Neither the name of VMware, Inc. nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
/* Copyright (c) 2003-2007 Determina Corp. */
/* Copyright (c) 2001-2003 Massachusetts Institute of Technology */
/* Copyright (c) 2001 Hewlett-Packard Company */
/*
* x86_shared.asm - x86 specific assembly code for sharing.
* See comments in x86.asm on the format here.
*/
#include "../arch/asm_defines.asm"
#include "../arch/x86/x86_asm_defines.asm" /* PUSHGPR, POPGPR, etc. */
#ifdef LINUX
# include "include/syscall.h"
#endif
#ifdef MACOS
# include "include/syscall_mach.h" /* SYSCALL_NUM_MARKER_* */
#endif
START_FILE
DECL_EXTERN(unexpected_return)
/* we share dynamorio_syscall w/ preload */
#ifdef UNIX
/* to avoid libc wrappers we roll our own syscall here
* hardcoded to use int 0x80 for 32-bit -- FIXME: use something like do_syscall
* and syscall for 64-bit.
* signature: dynamorio_syscall(sysnum, num_args, arg1, arg2, ...)
* For Linux, the argument max is 6.
* For MacOS, the argument max is 6 for x64 and 7 for x86.
*/
DECLARE_FUNC(dynamorio_syscall)
GLOBAL_LABEL(dynamorio_syscall:)
/* x64 kernel doesn't clobber all the callee-saved registers */
push REG_XBX /* stack now aligned for x64 */
# ifdef X64
/* reverse order so we don't clobber earlier args */
mov REG_XBX, ARG2 /* put num_args where we can reference it longer */
mov rax, ARG1 /* sysnum: only need eax, but need rax for ARG1 (or movzx) */
# ifdef MACOS
/* For now we assume a BSD syscall */
or rax, SYSCALL_NUM_MARKER_BSD
# endif
cmp REG_XBX, 0
je syscall_ready
mov ARG1, ARG3
cmp REG_XBX, 1
je syscall_ready
mov ARG2, ARG4
cmp REG_XBX, 2
je syscall_ready
mov ARG3, ARG5
cmp REG_XBX, 3
je syscall_ready
mov ARG4, ARG6
cmp REG_XBX, 4
je syscall_ready
mov ARG5, [2*ARG_SZ + REG_XSP] /* arg7: above xbx and retaddr */
cmp REG_XBX, 5
je syscall_ready
mov ARG6, [3*ARG_SZ + REG_XSP] /* arg8: above arg7, xbx, retaddr */
syscall_ready:
mov r10, rcx
syscall
# else
push REG_XBP
push REG_XSI
push REG_XDI
/* Add 16 to skip the 4 pushes.
* XXX: We do not align the stack to 16 b/c the kernel doesn't care.
* XXX: rather than this dispatch, could have separate routines
* for each #args, or could just blindly read upward on the stack.
* for dispatch, if assume size of mov instr can do single ind jmp */
mov ecx, [16+ 8 + esp] /* num_args */
cmp ecx, 0
je syscall_0args
cmp ecx, 1
je syscall_1args
cmp ecx, 2
je syscall_2args
cmp ecx, 3
je syscall_3args
cmp ecx, 4
je syscall_4args
cmp ecx, 5
je syscall_5args
# ifdef MACOS
cmp ecx, 6
je syscall_6args
# ifdef INTERNAL
cmp ecx, 7
jg GLOBAL_REF(unexpected_return)
# endif
mov eax, [16+36 + esp] /* arg7 */
syscall_6args:
# elif defined(INTERNAL)
cmp ecx, 6
jg GLOBAL_REF(unexpected_return)
# endif
mov ebp, [16+32 + esp] /* arg6 */
syscall_5args:
mov edi, [16+28 + esp] /* arg5 */
syscall_4args:
mov esi, [16+24 + esp] /* arg4 */
syscall_3args:
mov edx, [16+20 + esp] /* arg3 */
syscall_2args:
mov ecx, [16+16 + esp] /* arg2 */
syscall_1args:
mov ebx, [16+12 + esp] /* arg1 */
syscall_0args:
# ifdef MACOS
push eax /* 7th arg, if any */
/* Arg size is encoded in upper bits.
* XXX: or is that only for sysenter gateway?
* We assume this is size, not count, and so for our "7 arg"
* call that's really 6 with one 64-bit we leave it.
*/
mov eax, [20+ 8 + esp] /* num_args */
shl eax, 18 /* <<16 but also *4 for size */
or eax, [20+ 4 + esp] /* sysnum */
/* args are on stack, w/ an extra slot (retaddr of syscall wrapper) */
push ebp
push edi
push esi
push edx
push ecx
push ebx /* aligned to 16 after this push */
push 0 /* extra slot (app retaddr) */
/* It simplifies our syscall calling to have a single dynamorio_syscall()
* signature that returns int64 -- but most syscalls just return a 32-bit
* value and the kernel does not clear edx. Thus we need to do so, which
* should be safe since edx is caller-saved. (Note that we do not risk
* doing this for app syscalls: only those called by DR.)
*/
mov edx, 0
# else
mov eax, [16+ 4 + esp] /* sysnum */
# endif
/* PR 254280: we assume int$80 is ok even for LOL64, maybe slow is all.
* For Mac, it's possible to do sysenter here as we can store the retaddr
* in edx ourselves (in fact see r2514 dynamorio_syscall_sysenter for an
* implementation, now removed), but we still need int for certain syscalls
* (returning 64-bit values, e.g.) so we go w/ int always and assume our
* syscall perf doesn't matter much (should be rare).
*/
int HEX(80)
# ifdef MACOS
lea esp, [8*ARG_SZ + esp] /* must not change flags */
# endif
pop REG_XDI
pop REG_XSI
pop REG_XBP
# endif /* X64 */
pop REG_XBX
/* return val is in eax for us */
/* for MacOS, it can also include edx, so be sure not to clobber that! */
# ifdef MACOS
/* convert to -errno */
jae syscall_success
neg eax
syscall_success:
# endif
ret
END_FUNC(dynamorio_syscall)
# ifdef MACOS
/* Mach dep syscall invocation.
* Signature: dynamorio_mach_dep_syscall(sysnum, num_args, arg1, arg2, ...)
* Only supports up to 4 args.
*/
DECLARE_FUNC(dynamorio_mach_dep_syscall)
GLOBAL_LABEL(dynamorio_mach_dep_syscall:)
/* x64 kernel doesn't clobber all the callee-saved registers */
push REG_XBX
# ifdef X64
/* reverse order so we don't clobber earlier args */
mov REG_XBX, ARG2 /* put num_args where we can reference it longer */
mov rax, ARG1 /* sysnum: only need eax, but need rax to use ARG1 (or movzx) */
cmp REG_XBX, 0
je mach_dep_syscall_ready
mov ARG1, ARG3
cmp REG_XBX, 1
je mach_dep_syscall_ready
mov ARG2, ARG4
cmp REG_XBX, 2
je mach_dep_syscall_ready
mov ARG3, ARG5
cmp REG_XBX, 3
je mach_dep_syscall_ready
mov ARG4, ARG6
mach_dep_syscall_ready:
syscall
# else
push REG_XBP
push REG_XSI
push REG_XDI
/* add 16 to skip the 4 pushes */
mov ecx, [16+ 8 + esp] /* num_args */
cmp ecx, 0
je mach_dep_syscall_0args
cmp ecx, 1
je mach_dep_syscall_1args
cmp ecx, 2
je mach_dep_syscall_2args
cmp ecx, 3
je mach_dep_syscall_3args
mov esi, [16+24 + esp] /* arg4 */
mach_dep_syscall_3args:
mov edx, [16+20 + esp] /* arg3 */
mach_dep_syscall_2args:
mov ecx, [16+16 + esp] /* arg2 */
mach_dep_syscall_1args:
mov ebx, [16+12 + esp] /* arg1 */
mach_dep_syscall_0args:
mov eax, [16+ 4 + esp] /* sysnum */
lea REG_XSP, [-2*ARG_SZ + REG_XSP] /* maintain align-16: retaddr-5th below */
/* args are on stack, w/ an extra slot (retaddr of syscall wrapper) */
push esi
push edx
push ecx
push ebx
push 0 /* extra slot */
/* clear the top half so we can always consider the result 64-bit */
mov edx, 0
/* mach dep syscalls use interrupt 0x82 */
int HEX(82)
lea esp, [7*ARG_SZ + esp] /* must not change flags */
pop REG_XDI
pop REG_XSI
pop REG_XBP
# endif
pop REG_XBX
/* return val is in eax for us */
/* for MacOS, it can also include edx, so be sure not to clobber that! */
/* convert to -errno */
jae mach_dep_syscall_success
neg eax
mach_dep_syscall_success:
ret
END_FUNC(dynamorio_mach_dep_syscall)
/* Mach syscall invocation.
* Signature: ptr_int_t dynamorio_mach_syscall(sysnum, num_args, arg1, arg2, ...)
* Only supports up to 4 args.
* Does not support returning a 64-bit value in 32-bit mode.
*/
DECLARE_FUNC(dynamorio_mach_syscall)
GLOBAL_LABEL(dynamorio_mach_syscall:)
/* x64 kernel doesn't clobber all the callee-saved registers */
push REG_XBX
# ifdef X64
/* reverse order so we don't clobber earlier args */
mov REG_XBX, ARG2 /* put num_args where we can reference it longer */
mov rax, ARG1 /* sysnum: only need eax, but need rax to use ARG1 (or movzx) */
or eax, SYSCALL_NUM_MARKER_MACH
cmp REG_XBX, 0
je dynamorio_mach_syscall_ready
mov ARG1, ARG3
cmp REG_XBX, 1
je dynamorio_mach_syscall_ready
mov ARG2, ARG4
cmp REG_XBX, 2
je dynamorio_mach_syscall_ready
mov ARG3, ARG5
cmp REG_XBX, 3
je dynamorio_mach_syscall_ready
mov ARG4, ARG6
dynamorio_mach_syscall_ready:
syscall
# else
push REG_XBP
push REG_XSI
push REG_XDI
/* add 16 to skip the 4 pushes */
mov ecx, [16+ 8 + esp] /* num_args */
cmp ecx, 0
je dynamorio_mach_syscall_0args
cmp ecx, 1
je dynamorio_mach_syscall_1args
cmp ecx, 2
je dynamorio_mach_syscall_2args
cmp ecx, 3
je dynamorio_mach_syscall_3args
mov esi, [16+24 + esp] /* arg4 */
dynamorio_mach_syscall_3args:
mov edx, [16+20 + esp] /* arg3 */
dynamorio_mach_syscall_2args:
mov ecx, [16+16 + esp] /* arg2 */
dynamorio_mach_syscall_1args:
mov ebx, [16+12 + esp] /* arg1 */
dynamorio_mach_syscall_0args:
mov eax, [16+ 4 + esp] /* sysnum */
/* The sysnum is passed as a negative number */
neg eax
/* args are on stack, w/ an extra slot (retaddr of syscall wrapper) */
lea REG_XSP, [-2*ARG_SZ + REG_XSP] /* maintain align-16: retaddr-5th below */
/* args are on stack, w/ an extra slot (retaddr of syscall wrapper) */
push esi
push edx
push ecx
push ebx
push 0 /* extra slot */
/* If we use ADDRTAKEN_LABEL and GLOBAL_REF we get text relocation
* complaints so we instead do this hack:
*/
call dynamorio_mach_syscall_next
dynamorio_mach_syscall_next:
pop REG_XDX
lea REG_XDX, [1/*pop*/ + 3/*lea*/ + 2/*sysenter*/ + 2/*mov*/ + REG_XDX]
mov REG_XCX, REG_XSP
/* We have to use sysenter for a Mach syscall, else we get SIGSYS.
* This implies that we can't return 64-bit in 32-bit mode.
*/
sysenter
lea esp, [7*ARG_SZ + esp] /* must not change flags */
pop REG_XDI
pop REG_XSI
pop REG_XBP
# endif
pop REG_XBX
/* Return val is in eax for us.
* Note that unlike BSD and Machdep syscalls, Mach syscalls do not
* use flags to indicate success.
*/
ret
END_FUNC(dynamorio_mach_syscall)
# endif /* MACOS */
#endif /* UNIX */
/* void dr_fpu_exception_init(void)
* sets the exception mask flags for both regular float and xmm packed float
*/
#define FUNCNAME dr_fpu_exception_init
DECLARE_FUNC(FUNCNAME)
GLOBAL_LABEL(FUNCNAME:)
fninit
push HEX(1f80)
ldmxcsr DWORD [REG_XSP]
pop REG_XAX
ret
END_FUNC(FUNCNAME)
#undef FUNCNAME
/* void get_mmx_val(OUT uint64 *val, uint index)
* Returns the value of mmx register #index in val.
*/
#define FUNCNAME get_mmx_val
DECLARE_FUNC_SEH(FUNCNAME)
GLOBAL_LABEL(FUNCNAME:)
mov REG_XAX, ARG1
mov REG_XCX, ARG2
END_PROLOG
cmp ecx, 0
je get_mmx_0
cmp ecx, 1
je get_mmx_1
cmp ecx, 2
je get_mmx_2
cmp ecx, 3
je get_mmx_3
cmp ecx, 4
je get_mmx_4
cmp ecx, 5
je get_mmx_5
cmp ecx, 6
je get_mmx_6
movq QWORD [REG_XAX], mm7
jmp get_mmx_done
get_mmx_6:
movq QWORD [REG_XAX], mm6
jmp get_mmx_done
get_mmx_5:
movq QWORD [REG_XAX], mm5
jmp get_mmx_done
get_mmx_4:
movq QWORD [REG_XAX], mm4
jmp get_mmx_done
get_mmx_3:
movq QWORD [REG_XAX], mm3
jmp get_mmx_done
get_mmx_2:
movq QWORD [REG_XAX], mm2
jmp get_mmx_done
get_mmx_1:
movq QWORD [REG_XAX], mm1
jmp get_mmx_done
get_mmx_0:
movq QWORD [REG_XAX], mm0
get_mmx_done:
add REG_XSP, 0 /* make a legal SEH64 epilog */
ret
END_FUNC(FUNCNAME)
#undef FUNCNAME
#ifdef WINDOWS /* on linux we use inline asm versions */
/* byte *get_frame_ptr(void)
* returns the value of ebp
*/
DECLARE_FUNC(get_frame_ptr)
GLOBAL_LABEL(get_frame_ptr:)
mov REG_XAX, REG_XBP
ret
END_FUNC(get_frame_ptr)
/* byte *get_stack_ptr(void)
* returns the value of xsp before the call
*/
DECLARE_FUNC(get_stack_ptr)
GLOBAL_LABEL(get_stack_ptr:)
mov REG_XAX, REG_XSP
add REG_XAX, ARG_SZ /* remove return address space */
ret
END_FUNC(get_stack_ptr)
#endif /* WINDOWS */
/***************************************************************************/
#if defined(WINDOWS) && !defined(X64)
/* Routines to switch to 64-bit mode from 32-bit WOW64, make a 64-bit
* call, and then return to 32-bit mode.
*/
/*
* int switch_modes_and_load(void *ntdll64_LdrLoadDll,
* UNICODE_STRING_64 *lib,
* HANDLE *result)
* XXX i#1633: this routine does not yet support ntdll64 > 4GB
*/
# define FUNCNAME switch_modes_and_load
DECLARE_FUNC(FUNCNAME)
GLOBAL_LABEL(FUNCNAME:)
/* get args before we change esp */
mov eax, ARG1
mov ecx, ARG2
mov edx, ARG3
/* save callee-saved registers */
push ebx
/* far jmp to next instr w/ 64-bit switch: jmp 0033:<sml_transfer_to_64> */
RAW(ea)
DD offset sml_transfer_to_64
DB CS64_SELECTOR
RAW(00)
sml_transfer_to_64:
/* Below here is executed in 64-bit mode, but with guarantees that
* no address is above 4GB, as this is a WOW64 process.
*/
/* Call LdrLoadDll to load 64-bit lib:
* LdrLoadDll(IN PWSTR DllPath OPTIONAL,
* IN PULONG DllCharacteristics OPTIONAL,
* IN PUNICODE_STRING DllName,
* OUT PVOID *DllHandle));
*/
RAW(4c) RAW(8b) RAW(ca) /* mov r9, rdx : 4th arg: result */
RAW(4c) RAW(8b) RAW(c1) /* mov r8, rcx : 3rd arg: lib */
push 0 /* slot for &DllCharacteristics */
lea edx, dword ptr [esp] /* 2nd arg: &DllCharacteristics */
xor ecx, ecx /* 1st arg: DllPath = NULL */
/* save WOW64 state */
RAW(41) push esp /* push r12 */
RAW(41) push ebp /* push r13 */
RAW(41) push esi /* push r14 */
RAW(41) push edi /* push r15 */
/* align the stack pointer */
mov ebx, esp /* save esp in callee-preserved reg */
sub esp, 32 /* call conv */
and esp, HEX(fffffff0) /* align to 16-byte boundary */
call eax
mov esp, ebx /* restore esp */
/* restore WOW64 state */
RAW(41) pop edi /* pop r15 */
RAW(41) pop esi /* pop r14 */
RAW(41) pop ebp /* pop r13 */
RAW(41) pop esp /* pop r12 */
/* far jmp to next instr w/ 32-bit switch: jmp 0023:<sml_return_to_32> */
push offset sml_return_to_32 /* 8-byte push */
mov dword ptr [esp + 4], CS32_SELECTOR /* top 4 bytes of prev push */
jmp fword ptr [esp]
sml_return_to_32:
add esp, 16 /* clean up far jmp target and &DllCharacteristics */
pop ebx /* restore callee-saved reg */
ret /* return value already in eax */
END_FUNC(FUNCNAME)
/*
* void d_r_set_ss_selector()
*/
DECL_EXTERN(d_r_ss_value)
# undef FUNCNAME
# define FUNCNAME d_r_set_ss_selector
DECLARE_FUNC(FUNCNAME)
GLOBAL_LABEL(FUNCNAME:)
mov eax, ss
mov DWORD SYMREF(d_r_ss_value), eax
ret
END_FUNC(FUNCNAME)
/*
* int switch_modes_and_call(invoke_uint64_t *args)
*/
# undef FUNCNAME
# define FUNCNAME switch_modes_and_call
DECLARE_FUNC(FUNCNAME)
GLOBAL_LABEL(FUNCNAME:)
mov eax, ARG1
/* Save callee-saved registers. */
push ebx
push esi
push edi
push ebp
/* Far jmp to next instr w/ 64-bit switch: jmp 0033:<smc_transfer_to_64>. */
RAW(ea)
DD offset smc_transfer_to_64
DB CS64_SELECTOR
RAW(00)
smc_transfer_to_64:
/* Below here is executed in 64-bit mode, but with guarantees that
* no address is above 4GB, as this is a WOW64 process.
*/
/* Save WOW64 calee-saved registers. */
RAW(41) push esp /* push r12 */
RAW(41) push ebp /* push r13 */
RAW(41) push esi /* push r14 */
RAW(41) push edi /* push r15 */
/* Align the stack pointer. */
mov ebx, esp /* save esp in callee-preserved reg */
and esp, HEX(fffffff0) /* align to 16-byte boundary */
/* Set up args on the stack. */
RAW(48) mov ecx, DWORD [eax + 8*6] /* load args.arg6 */
push ecx /* push args.arg6 */
RAW(48) mov ecx, DWORD [eax + 8*5] /* load args.arg5 */
push ecx /* push args.arg5 */
sub esp, 32 /* Leave slots for args 1-4. */
/* arg1 is already in rcx, arg2 in rdx, arg3 in r8, arg4 in r9 */
RAW(4c) mov ecx, DWORD [eax + 8*4] /* load args.arg4 into r9 */
RAW(4c) mov eax, DWORD [eax + 8*3] /* load args.arg3 into r8 */
RAW(48) mov edx, DWORD [eax + 8*2] /* load args.arg2 into rdx */
RAW(48) mov ecx, DWORD [eax + 8*1] /* load args.arg1 into rcx */
RAW(48) mov eax, DWORD [eax] /* load args.func into rax */
call eax /* call rax */
mov esp, ebx /* restore rsp */
/* Restore WOW64 callee-saved regs. */
RAW(41) pop edi /* pop r15 */
RAW(41) pop esi /* pop r14 */
RAW(41) pop ebp /* pop r13 */
RAW(41) pop esp /* pop r12 */
/* Far jmp to next instr w/ 32-bit switch: jmp 0023:<smc_return_to_32>. */
push offset smc_return_to_32 /* 8-byte push */
mov dword ptr [esp + 4], CS32_SELECTOR /* top 4 bytes of prev push */
jmp fword ptr [esp]
smc_return_to_32:
add esp, 8 /* clean up far jmp target */
/* i#4091: Work around an AMD processor bug where after switching from 64-bit
* back to 32-bit, if a thread switch happens around the same time, the
* SS segment descriptor gets corrupted somehow and any ESP reference
* raises an access violation with an undocumented Parameter[0]=00000003.
* Re-instating the proper descriptor by re-loading the selector seems
* to solve the problem.
*/
mov ebx, DWORD SYMREF(d_r_ss_value)
mov ss, ebx
/* Restore callee-saved regs. */
pop ebp
pop edi
pop esi
pop ebx
ret /* return value already in eax */
END_FUNC(FUNCNAME)
#endif /* WINDOWS && !X64 */
/****************************************************************************
* Injection code shared between core and drinjectlib.
* XXX: since we are exporting this file in the "drlibc" lib we may want
* to should move this code to a new file inject_shared.asm or sthg.
*/
#ifdef WINDOWS
/* void load_dynamo(void)
*
* used for injection into a child process
* N.B.: if the code here grows, SIZE_OF_LOAD_DYNAMO in win32/inject.c
* must be updated.
*/
DECLARE_FUNC(load_dynamo)
GLOBAL_LABEL(load_dynamo:)
/* the code for this routine is copied into an allocation in the app
and invoked upon return from the injector. When it is invoked,
it expects the app's stack to look like this:
xsp-->| &LoadLibrary | xsp must be 16-aligned
| &dynamo_path |
| &GetProcAddr |
| &dynamo_entry |___
| | |
|(saved context)| priv_mcontext_t struct
| &code_alloc | | pointer to the code allocation
| sizeof(code_alloc)| size of the code allocation
|_______________|___| (possible padding for x64 xsp alignment)
&dynamo_path-->| | |
| (dynamo path) | TEXT(DYNAMORIO_DLL_PATH)
|_______________|___|
&dynamo_entry-->| | |
| (dynamo entry)| "dynamo_auto_start"
| |___|
in separate allocation ___
| | |
| CODE | load_dynamo() code
| |___|
The load_dynamo routine will load the dynamo DLL into memory, then jump
to its dynamo_auto_start entry point, passing it the saved app context as
parameters.
*/
/* two byte NOP to satisfy third party braindead-ness documented in case 3821 */
mov edi, edi
/* Update priv_mcontext_t's xcx/xax in case the target start address was changed
* for .NET (i#3046). LdrpInitializeProcess goes and changes the initial
* thread's CONTEXT.Xcx from what the kernel set (the executable image entry),
* and what inject_into_thread() cached here on the stack, to something like
* MSCOREE!CorExeMain_Exported. We assume no other state was changed: just
* Xcx/Xax. Long-term we'd like to make early injection the default, which
* avoids this problem.
*/
#ifdef X64
mov PTRSZ [MCONTEXT_XCX_OFFS + 4*ARG_SZ + REG_XSP], REG_XCX
#else
mov PTRSZ [MCONTEXT_XAX_OFFS + 4*ARG_SZ + REG_XSP], REG_XAX
#endif
#ifdef LOAD_DYNAMO_DEBUGBREAK
/* having this code in front may hide the problem addressed with the
* above padding */
/* giant loop so can attach debugger, then change ebx to 1
* to step through rest of code */
mov ebx, HEX(7fffffff)
load_dynamo_repeat_outer:
mov eax, HEX(7fffffff)
load_dynamo_repeatme:
dec eax
cmp eax, 0
jg load_dynamo_repeatme
dec ebx
cmp ebx, 0
jg load_dynamo_repeat_outer
# ifdef X64
/* xsp is 8-aligned and our pop makes it 16-aligned NOCHECK BAIL ON WINDOWS WHICH DOESN"T REQUIRE 16-BYTE ALIGN? */
# endif
/* TOS has &DebugBreak */
pop REG_XBX /* pop REG_XBX = &DebugBreak */
CALLWIN0(REG_XBX) /* call DebugBreak (in kernel32.lib) */
#endif
/* TOS has &LoadLibraryA */
pop REG_XBX /* pop REG_XBX = &LoadLibraryA */
/* TOS has &dynamo_path */
pop REG_XAX /* for 32-bit we're doing "pop eax, push eax" */
sub REG_XSP, FRAME_ALIGNMENT - ARG_SZ*2 /* Align to 16. */
CALLWIN1(REG_XBX, REG_XAX) /* call LoadLibraryA (in kernel32.lib) */
add REG_XSP, FRAME_ALIGNMENT - ARG_SZ*2 /* Undo align. */
/* check result */
cmp REG_XAX, 0
jne load_dynamo_success
pop REG_XBX /* pop off &GetProcAddress */
pop REG_XBX /* pop off &dynamo_entry */
jmp load_dynamo_failure
load_dynamo_success:
/* TOS has &GetProcAddress */
pop REG_XBX /* pop REG_XBX = &GetProcAddress */
/* dynamo_handle is now in REG_XAX (returned by call LoadLibrary) */
/* TOS has &dynamo_entry */
pop REG_XDI /* for 32-bit we're doing "pop edi, push edi" */
/* Stack is now 16-byte aligned. */
CALLWIN2(REG_XBX, REG_XAX, REG_XDI) /* call GetProcAddress */
cmp REG_XAX, 0
je load_dynamo_failure
/* jump to dynamo_auto_start (returned by GetProcAddress) */
jmp REG_XAX
/* dynamo_auto_start will take over or continue natively at the saved
* context via load_dynamo_failure.
*/
END_FUNC(load_dynamo)
/* N.B.: load_dynamo_failure MUST follow load_dynamo, as both are
* copied in one fell swoop by inject_into_thread()!
*/
/* not really a function but having issues getting both masm and gas to
* let other asm routines jump here.
* targeted by load_dynamo and dynamo_auto_start by a jump, not a call,
* when we should not take over and should go native instead.
* Xref case 7654: we come here to the child's copy from dynamo_auto_start
* instead of returning to the parent's copy post-load_dynamo to avoid
* incompatibilites with stack layout accross dr versions.
*/
DECLARE_FUNC(load_dynamo_failure)
GLOBAL_LABEL(load_dynamo_failure:)
/* Would be nice if we could free our allocation here as well, but
* that's too much of a pain (esp. here).
* Note TOS has the saved context at this point, xref layout in
* auto_setup. Note this code is duplicated in dynamo_auto_start. */
mov REG_XAX, [MCONTEXT_XSP_OFFS + REG_XSP] /* load app xsp */
mov REG_XBX, [MCONTEXT_PC_OFFS + REG_XSP] /* load app start_pc */
/* write app start_pc off top of app stack */
mov [-ARG_SZ + REG_XAX], REG_XBX
/* it's ok to write past app TOS since we're just overwriting part of
* the dynamo_entry string which is dead at this point, won't affect
* the popping of the saved context */
POPGPR
POPF
/* we assume reading beyond TOS is ok here (no signals on windows) */
/* we assume xmm0-5 do not need to be restored */
/* restore app xsp (POPGPR doesn't) */
mov REG_XSP, [-MCONTEXT_PC_OFFS + MCONTEXT_XSP_OFFS + REG_XSP]
jmp PTRSZ [-ARG_SZ + REG_XSP] /* jmp to app start_pc */
ret
END_FUNC(load_dynamo_failure)
#endif /* WINDOWS */
#ifdef LINUX
/* SYS_clone swaps the stack so we need asm support to call it.
* signature:
* thread_id_t dynamorio_clone(uint flags, byte *newsp, void *ptid, void *tls,
* void *ctid, void (*func)(void))
* i#6514: If newsp is NULL then that tells the kernel to give the child the
* same value for SP as the parent.
*/
DECLARE_FUNC(dynamorio_clone)
GLOBAL_LABEL(dynamorio_clone:)
/* Save func for use post-syscall on the newsp.
* This is tricky because we have to handle the case of newsp == NULL.
*/
# ifdef X64
/* The syscall preserves all registers except rax, rcx, r11. */
push r15
mov r15, ARG6 /* Func is now in r15. */
and ARG2, -FRAME_ALIGNMENT /* For glibc compatibility, align newsp. */
/* All args are already in syscall registers, except for rcx. */
mov r10, rcx
mov REG_XAX, SYS_clone
syscall
# else
/* Fetch some args we need before we modify XSP and ARGn is no
* longer usable.
*/
mov REG_XCX, ARG2 /* newsp */
mov REG_XDX, ARG3 /* ptid */
mov REG_XAX, ARG6 /* func */
/* Preserve callee-saved regs. */
push REG_XBX
push REG_XSI
push REG_XDI
/* Now can't use ARG* since xsp modified by pushes. */
mov REG_XBX, DWORD [4*ARG_SZ + REG_XSP] /* ARG1 + 3 pushes */
mov REG_XSI, DWORD [7*ARG_SZ + REG_XSP] /* ARG4 + 3 pushes */
mov REG_XDI, DWORD [8*ARG_SZ + REG_XSP] /* ARG5 + 3 pushes */
/* i#6514: Save func on the child's stack. Remember that if newsp is
* NULL then the child's stack is our stack. When the syscall returns
* it's cumbersome to know whether newsp was NULL. To keep things simple
* for the parent always push func on our stack.
*/
push REG_XAX /* Xsp is misaligned at this point but kernel doesn't care. */
and REG_XCX, -FRAME_ALIGNMENT /* For glibc compatibility, align newsp. */
jz newsp_is_null
sub REG_XCX, ARG_SZ
mov [REG_XCX], REG_XAX /* Func is now on TOS of newsp. */
newsp_is_null:
mov REG_XAX, SYS_clone
/* PR 254280: we assume int$80 is ok even for LOL64 */
int HEX(80)
# endif
cmp REG_XAX, 0
jne dynamorio_clone_parent
# ifdef X64
call r15
# else
pop REG_XCX
call REG_XCX
# endif
/* Shouldn't return. */
jmp GLOBAL_REF(unexpected_return)
dynamorio_clone_parent:
# ifdef X64
pop r15
# else
/* Restore callee-saved regs. */
add REG_XSP, ARG_SZ /* Discard func. */
pop REG_XDI
pop REG_XSI
pop REG_XBX
# endif
/* Return val is in eax still. */
ret
END_FUNC(dynamorio_clone)
#endif /* LINUX */
END_FILE