blob: 1501d871b8a1effd78972042c7b449fbc7140c3d [file] [log] [blame]
/* **********************************************************
* Copyright (c) 2011-2014 Google, Inc. All rights reserved.
* Copyright (c) 2006-2010 VMware, Inc. All rights reserved.
* **********************************************************/
/*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* * Neither the name of VMware, Inc. nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
/* Copyright (c) 2006-2007 Determina Corp. */
/*
* syscall.c - win32-specific system call handling routines
*/
#include "../globals.h"
#include "../fragment.h"
#include "ntdll.h"
#include "os_private.h"
#include "aslr.h"
#include "instrument.h"
#include "../synch.h"
/* this points to one of the os-version-specific system call # arrays below */
int *syscalls = NULL;
/* this points to one of the os-version-specific wow64 argument conversion arrays */
int *wow64_index = NULL;
/* Ref case 5217 - for Sygate compatibility we indirect int 2e system
* calls through the int_syscall_address (which after syscalls_init()
* will point to an int 2e, ret 0 in ntdll.dll. This is, for all intents
* and purposes, a function pointer that will be set only once early
* during app init, so we keep it here with the options to leverage their
* protection. */
app_pc int_syscall_address = NULL;
/* Ref case 5441 - for Sygate compatibility we fake our return address from
* sysenter system calls (they sometimes verify) to this address which will
* (by default) point to a ret 0 in ntdll.dll. This is, for all intents and
* purposes, a function pointer that will be set only once early during app
* init, so we keep it here with the options to leverage their protection. */
app_pc sysenter_ret_address = NULL;
/* i#537: sysenter returns to KiFastSystemCallRet from kernel */
app_pc KiFastSystemCallRet_address = NULL;
/* Snapshots are relatively heavyweight, so we do not take them on every memory
* system call. On the other hand, if we only did them when we dumped
* stats, we'd miss large memory allocations that were freed prior
* to the next stats dump (which can be far between if not much new code
* is being executed). Thus, we do them whenever we print stats and on
* every memory operation larger than this threshold:
*/
#define SNAPSHOT_THRESHOLD (16*PAGE_SIZE)
/*******************************************************/
#ifdef CLIENT_INTERFACE
/* i#1230: we support a limited number of extra interceptions.
* We add extra slots to all of the arrays.
*/
# define CLIENT_EXTRA_TRAMPOLINE 12
# define TRAMPOLINE_MAX (SYS_MAX + CLIENT_EXTRA_TRAMPOLINE)
/* no lock needed since only supported during dr_init */
static uint syscall_extra_idx;
#else
# define TRAMPOLINE_MAX SYS_MAX
#endif
const char * SYS_CONST syscall_names[TRAMPOLINE_MAX] = {
#define SYSCALL(name, act, nargs, arg32, ntsp0, ntsp3, ntsp4, w2k, xp, wow64, xp64,\
w2k3, vista0, vista0_x64, vista1, vista1_x64, w7x86, w7x64, \
w8x86, w8w64, w8x64, w81x86, w81w64, w81x64) \
"Nt"#name,
#include "syscallx.h"
#undef SYSCALL
};
SYS_CONST int windows_81_x64_syscalls[TRAMPOLINE_MAX] = {
#define SYSCALL(name, act, nargs, arg32, ntsp0, ntsp3, ntsp4, w2k, xp, wow64, xp64,\
w2k3, vista0, vista0_x64, vista1, vista1_x64, w7x86, w7x64, \
w8x86, w8w64, w8x64, w81x86, w81w64, w81x64) \
w81x64,
#include "syscallx.h"
#undef SYSCALL
};
SYS_CONST int windows_81_wow64_syscalls[TRAMPOLINE_MAX] = {
#define SYSCALL(name, act, nargs, arg32, ntsp0, ntsp3, ntsp4, w2k, xp, wow64, xp64,\
w2k3, vista0, vista0_x64, vista1, vista1_x64, w7x86, w7x64, \
w8x86, w8w64, w8x64, w81x86, w81w64, w81x64) \
w81w64,
#include "syscallx.h"
#undef SYSCALL
};
SYS_CONST int windows_81_x86_syscalls[TRAMPOLINE_MAX] = {
#define SYSCALL(name, act, nargs, arg32, ntsp0, ntsp3, ntsp4, w2k, xp, wow64, xp64,\
w2k3, vista0, vista0_x64, vista1, vista1_x64, w7x86, w7x64, \
w8x86, w8w64, w8x64, w81x86, w81w64, w81x64) \
w81x86,
#include "syscallx.h"
#undef SYSCALL
};
SYS_CONST int windows_8_x64_syscalls[TRAMPOLINE_MAX] = {
#define SYSCALL(name, act, nargs, arg32, ntsp0, ntsp3, ntsp4, w2k, xp, wow64, xp64,\
w2k3, vista0, vista0_x64, vista1, vista1_x64, w7x86, w7x64, \
w8x86, w8w64, w8x64, w81x86, w81w64, w81x64) \
w8x64,
#include "syscallx.h"
#undef SYSCALL
};
SYS_CONST int windows_8_wow64_syscalls[TRAMPOLINE_MAX] = {
#define SYSCALL(name, act, nargs, arg32, ntsp0, ntsp3, ntsp4, w2k, xp, wow64, xp64,\
w2k3, vista0, vista0_x64, vista1, vista1_x64, w7x86, w7x64, \
w8x86, w8w64, w8x64, w81x86, w81w64, w81x64) \
w8w64,
#include "syscallx.h"
#undef SYSCALL
};
SYS_CONST int windows_8_x86_syscalls[TRAMPOLINE_MAX] = {
#define SYSCALL(name, act, nargs, arg32, ntsp0, ntsp3, ntsp4, w2k, xp, wow64, xp64,\
w2k3, vista0, vista0_x64, vista1, vista1_x64, w7x86, w7x64, \
w8x86, w8w64, w8x64, w81x86, w81w64, w81x64) \
w8x86,
#include "syscallx.h"
#undef SYSCALL
};
SYS_CONST int windows_7_x64_syscalls[TRAMPOLINE_MAX] = {
#define SYSCALL(name, act, nargs, arg32, ntsp0, ntsp3, ntsp4, w2k, xp, wow64, xp64,\
w2k3, vista0, vista0_x64, vista1, vista1_x64, w7x86, w7x64, \
w8x86, w8w64, w8x64, w81x86, w81w64, w81x64) \
w7x64,
#include "syscallx.h"
#undef SYSCALL
};
SYS_CONST int windows_7_syscalls[TRAMPOLINE_MAX] = {
#define SYSCALL(name, act, nargs, arg32, ntsp0, ntsp3, ntsp4, w2k, xp, wow64, xp64,\
w2k3, vista0, vista0_x64, vista1, vista1_x64, w7x86, w7x64, \
w8x86, w8w64, w8x64, w81x86, w81w64, w81x64) \
w7x86,
#include "syscallx.h"
#undef SYSCALL
};
SYS_CONST int windows_vista_sp1_x64_syscalls[TRAMPOLINE_MAX] = {
#define SYSCALL(name, act, nargs, arg32, ntsp0, ntsp3, ntsp4, w2k, xp, wow64, xp64,\
w2k3, vista0, vista0_x64, vista1, vista1_x64, w7x86, w7x64, \
w8x86, w8w64, w8x64, w81x86, w81w64, w81x64) \
vista1_x64,
#include "syscallx.h"
#undef SYSCALL
};
SYS_CONST int windows_vista_sp1_syscalls[TRAMPOLINE_MAX] = {
#define SYSCALL(name, act, nargs, arg32, ntsp0, ntsp3, ntsp4, w2k, xp, wow64, xp64,\
w2k3, vista0, vista0_x64, vista1, vista1_x64, w7x86, w7x64, \
w8x86, w8w64, w8x64, w81x86, w81w64, w81x64) \
vista1,
#include "syscallx.h"
#undef SYSCALL
};
SYS_CONST int windows_vista_sp0_x64_syscalls[TRAMPOLINE_MAX] = {
#define SYSCALL(name, act, nargs, arg32, ntsp0, ntsp3, ntsp4, w2k, xp, wow64, xp64,\
w2k3, vista0, vista0_x64, vista1, vista1_x64, w7x86, w7x64, \
w8x86, w8w64, w8x64, w81x86, w81w64, w81x64) \
vista0_x64,
#include "syscallx.h"
#undef SYSCALL
};
SYS_CONST int windows_vista_sp0_syscalls[TRAMPOLINE_MAX] = {
#define SYSCALL(name, act, nargs, arg32, ntsp0, ntsp3, ntsp4, w2k, xp, wow64, xp64,\
w2k3, vista0, vista0_x64, vista1, vista1_x64, w7x86, w7x64, \
w8x86, w8w64, w8x64, w81x86, w81w64, w81x64) \
vista0,
#include "syscallx.h"
#undef SYSCALL
};
SYS_CONST int windows_2003_syscalls[TRAMPOLINE_MAX] = {
#define SYSCALL(name, act, nargs, arg32, ntsp0, ntsp3, ntsp4, w2k, xp, wow64, xp64,\
w2k3, vista0, vista0_x64, vista1, vista1_x64, w7x86, w7x64, \
w8x86, w8w64, w8x64, w81x86, w81w64, w81x64) \
w2k3,
#include "syscallx.h"
#undef SYSCALL
};
SYS_CONST int windows_XP_x64_syscalls[TRAMPOLINE_MAX] = {
#define SYSCALL(name, act, nargs, arg32, ntsp0, ntsp3, ntsp4, w2k, xp, wow64, xp64,\
w2k3, vista0, vista0_x64, vista1, vista1_x64, w7x86, w7x64, \
w8x86, w8w64, w8x64, w81x86, w81w64, w81x64) \
xp64,
#include "syscallx.h"
#undef SYSCALL
};
/* This is the index for XP through Win7. */
SYS_CONST int windows_XP_wow64_index[TRAMPOLINE_MAX] = {
#define SYSCALL(name, act, nargs, arg32, ntsp0, ntsp3, ntsp4, w2k, xp, wow64, xp64,\
w2k3, vista0, vista0_x64, vista1, vista1_x64, w7x86, w7x64, \
w8x86, w8w64, w8x64, w81x86, w81w64, w81x64) \
wow64,
#include "syscallx.h"
#undef SYSCALL
};
SYS_CONST int windows_XP_syscalls[TRAMPOLINE_MAX] = {
#define SYSCALL(name, act, nargs, arg32, ntsp0, ntsp3, ntsp4, w2k, xp, wow64, xp64,\
w2k3, vista0, vista0_x64, vista1, vista1_x64, w7x86, w7x64, \
w8x86, w8w64, w8x64, w81x86, w81w64, w81x64) \
xp,
#include "syscallx.h"
#undef SYSCALL
};
SYS_CONST int windows_2000_syscalls[TRAMPOLINE_MAX] = {
#define SYSCALL(name, act, nargs, arg32, ntsp0, ntsp3, ntsp4, w2k, xp, wow64, xp64,\
w2k3, vista0, vista0_x64, vista1, vista1_x64, w7x86, w7x64, \
w8x86, w8w64, w8x64, w81x86, w81w64, w81x64) \
w2k,
#include "syscallx.h"
#undef SYSCALL
};
SYS_CONST int windows_NT_sp4_syscalls[TRAMPOLINE_MAX] = {
#define SYSCALL(name, act, nargs, arg32, ntsp0, ntsp3, ntsp4, w2k, xp, wow64, xp64,\
w2k3, vista0, vista0_x64, vista1, vista1_x64, w7x86, w7x64, \
w8x86, w8w64, w8x64, w81x86, w81w64, w81x64) \
ntsp4,
#include "syscallx.h"
#undef SYSCALL
};
/* for SP3 (and maybe SP2 or SP1 -- haven't checked those) */
SYS_CONST int windows_NT_sp3_syscalls[TRAMPOLINE_MAX] = {
#define SYSCALL(name, act, nargs, arg32, ntsp0, ntsp3, ntsp4, w2k, xp, wow64, xp64,\
w2k3, vista0, vista0_x64, vista1, vista1_x64, w7x86, w7x64, \
w8x86, w8w64, w8x64, w81x86, w81w64, w81x64) \
ntsp3,
#include "syscallx.h"
#undef SYSCALL
};
/* for SP0 (and maybe SP2 or SP1 -- haven't checked those) */
SYS_CONST int windows_NT_sp0_syscalls[TRAMPOLINE_MAX] = {
#define SYSCALL(name, act, nargs, arg32, ntsp0, ntsp3, ntsp4, w2k, xp, wow64, xp64,\
w2k3, vista0, vista0_x64, vista1, vista1_x64, w7x86, w7x64, \
w8x86, w8w64, w8x64, w81x86, w81w64, w81x64) \
ntsp0,
#include "syscallx.h"
#undef SYSCALL
};
/* for x64 this is the # of args */
SYS_CONST uint syscall_argsz[TRAMPOLINE_MAX] = {
#ifdef X64
# define SYSCALL(name, act, nargs, arg32, ntsp0, ntsp3, ntsp4, w2k, xp, wow64, xp64,\
w2k3, vista0, vista0_x64, vista1, vista1_x64, w7x86, w7x64, \
w8x86, w8w64, w8x64, w81x86, w81w64, w81x64) \
nargs,
#else
# define SYSCALL(name, act, nargs, arg32, ntsp0, ntsp3, ntsp4, w2k, xp, wow64, xp64,\
w2k3, vista0, vista0_x64, vista1, vista1_x64, w7x86, w7x64, \
w8x86, w8w64, w8x64, w81x86, w81w64, w81x64) \
arg32,
#endif
#include "syscallx.h"
#undef SYSCALL
};
/* FIXME: currently whether a syscall needs action or not can't be
* dynamically changed since this flag is used early on by
* intercept_native_syscall() */
static SYS_CONST int syscall_requires_action[TRAMPOLINE_MAX] = {
#define SYSCALL(name, act, nargs, arg32, ntsp0, ntsp3, ntsp4, w2k, xp, wow64, xp64,\
w2k3, vista0, vista0_x64, vista1, vista1_x64, w7x86, w7x64, \
w8x86, w8w64, w8x64, w81x86, w81w64, w81x64) \
act,
#include "syscallx.h"
#undef SYSCALL
};
/* used to intercept syscalls while native */
static byte *syscall_trampoline_pc[TRAMPOLINE_MAX];
static app_pc syscall_trampoline_skip_pc[TRAMPOLINE_MAX];
static app_pc syscall_trampoline_hook_pc[TRAMPOLINE_MAX];
static app_pc syscall_trampoline_copy_pc[TRAMPOLINE_MAX];
#ifdef GBOP
/* GBOP stack adjustment - currently either always 0 or always 4 for
* vsyscall calls, but may need to be a more general array in case
* HOOKED_TRAMPOLINE_HOOK_DEEPER allows different offsets
* FIXME: case 7127 this can be compressed further, if really only a bitmask
* see intercept_syscall_wrapper
*/
static byte syscall_trampoline_gbop_fpo_offset[TRAMPOLINE_MAX];
#endif /* GBOP */
/****************************************************************************/
/* System call interception: put any special handling here
* Arguments come from the pusha right before the call
* Win32 syscall: int 0x2e, number is in eax, address of start of params
* on user stack is in edx
*
* WinXP uses sysenter instruction and does a call to it since sysenter
* doesn't store return info -- instead sysexit (called from kernel) grabs
* continuation pc from edx. So the callee, same one used by all syscalls,
* puts esp in edx so that kernel just has to dereference it.
* Actually, on closer examination, it looks like the kernel sends control
* directly to 0x7ffe0304, which does a ret to get back to the ret after
* the call %edx -- since the 0x7ffe0304 ret executes natively we can't tell
* the difference, but we should be aware of it! If this is true, why bother
* filling in edx for sysenter? Seems like the kernel must be hardcoding it
* with 0x7ffe0304.
* FIXME: think about whether want to
* insert a trampoline (and risk clobbering entry point after the ret)
* instead of the current method of clobbering the return address
*
* Here are some win2000 examples (from ntdll.dll):
NtSetContextThread:
77F97BFA: B8 BA 00 00 00 mov eax,0BAh
77F97BFF: 8D 54 24 04 lea edx,[esp+4]
77F97C03: CD 2E int 2Eh
77F97C05: C2 08 00 ret 8
this is the only one that does not immediately have a ret, though it
does ret after a jump, some poorly chosen "optimization":
NtContinue:
77F82872: B8 1C 00 00 00 mov eax,1Ch
77F82877: 8D 54 24 04 lea edx,[esp+4]
77F8287B: CD 2E int 2Eh
77F8287D: E9 82 74 01 00 jmp 77F99D04
77F99D04: C2 08 00 ret 8
*
* WinXP example:
NtOpenKey:
0x77f7eb23 b8 77 00 00 00 mov $0x00000077 -> %eax
0x77f7eb28 ba 00 03 fe 7f mov $0x7ffe0300 -> %edx
0x77f7eb2d ff d2 call %edx
0x7ffe0300 8b d4 mov %esp -> %edx
0x7ffe0302 0f 34 sysenter
0x7ffe0304 c3 ret %esp (%esp) -> %esp
0x77f7eb2f c2 0c 00 ret $0x000c %esp (%esp) -> %esp
*/
/* the win32ksys calls are all above 0x1000, only Zw/Nt* are below */
#define MAX_NTOSKRNL_SYSCALL_NUM 0x1000
bool
ignorable_system_call(int num, instr_t *gateway, dcontext_t *dcontext_live)
{
/* FIXME: this should really be a complete list of ignorable calls,
* just ntoskrnl ones that we understand, to avoid surprises
* with added calls?
*/
/* FIXME: switch to a bit vector?
* we may want an inverted bit vector instead (inw2k p.123 - lower 12 bits)
* there are 285 syscalls on xp - let's say we support 320
* instead of the 40 ints (160 bytes) and a loop we're using now,
* we can grab 40 bytes for 320 syscalls and do the bit extraction
* precomputing from this table will be easy
*/
/* FIXME : it looks like most file IO/creation syscalls are alertable
* ref bug 2520, should be added to non-ignorable */
/* FIXME : we just return false for all system calls, to be safe we should
* really be checking for known ignoreable system calls rather then the reverse,
* see syscallx.h for old enumeration. */
return false;
}
bool
optimizable_system_call(int num)
{
if (INTERNAL_OPTION(shared_eq_ignore))
return ignorable_system_call(num, NULL, NULL);
else {
int i;
/* FIXME: switch to a bit vector, just as for the syscalls array? */
for (i = 0; i < SYS_MAX; i++) {
if (num == syscalls[i])
return !syscall_requires_action[i];
}
/* If the syscall isn't in the array, DR doesn't care about it. */
return true;
}
}
/* The trampoline handler called for ntdll syscall wrappers that we
* care about, so that we can act on them while native_exec-ing
*/
after_intercept_action_t
syscall_while_native(app_state_at_intercept_t *state)
{
int sysnum = (int) (ptr_int_t) state->callee_arg;
/* FIXME : if dr calls through ntdll functions that are hooked by a third
* party (say Sygate's sysfer.dll) then they could perform syscalls that
* would get us here. Most of the time we'll be ok, but if the current
* thread is under_dyn_hack or native_exec we might try to process the
* system call or takeover, neither of which is safe. Currently we avoid
* calling through nt wrappers that sysfer.dll hooks (doing system call
* internally instead). This also applies if we call our own hooks, which
* we avoid in a similar manner.
*/
/* Returning AFTER_INTERCEPT_LET_GO will perform the syscall natively,
* while AFTER_INTERCEPT_LET_GO_ALT_DYN will skip it. Modify the register
* arguments to change the returned state, note that the stack will have
* to be popped once (modify reg_esp) to match up the returns.
*/
dcontext_t *dcontext = get_thread_private_dcontext();
IF_X64(ASSERT_TRUNCATE(int, int, (ptr_int_t)state->callee_arg));
/* N.B.: if any intercepted syscalls are used by DR from ntdll, rather
* than custom wrappers, then a recursion-avoidance check here would
* be required to avoid infinite loop on error here!
*/
STATS_INC(num_syscall_trampolines);
if (dcontext == NULL) {
/* unknown thread */
return AFTER_INTERCEPT_LET_GO; /* do syscall natively */
} else if (IS_UNDER_DYN_HACK(dcontext->thread_record->under_dynamo_control) ||
dcontext->thread_record->retakeover) {
/* this trampoline is our ticket to taking control again prior
* to the image entry point
* we often hit this on NtAllocateVirtualMemory from HeapCreate for
* the next dll init after the cb ret where we lost control
*/
STATS_INC(num_syscall_trampolines_retakeover);
LOG(THREAD, LOG_SYSCALLS, 1,
"syscall_while_native: retakeover in %s after native cb return lost control\n",
syscall_names[sysnum]);
retakeover_after_native(dcontext->thread_record, INTERCEPT_SYSCALL);
dcontext->thread_record->retakeover = false;
return AFTER_INTERCEPT_TAKE_OVER; /* syscall under DR */
} else if (!dcontext->thread_record->under_dynamo_control
/* xref PR 230836 */
IF_CLIENT_INTERFACE(&& !IS_CLIENT_THREAD(dcontext))
/* i#1318: may get here from privlib at exit, at least until we
* redirect *everything*. From privlib we need to keep
* the syscall native as DR locks may be held.
*/
IF_CLIENT_INTERFACE(&& dcontext->whereami == WHERE_APP)) {
/* assumption is that any known native thread is one we control in general,
* just not right now while in a native_exec_list dll */
STATS_INC(num_syscall_trampolines_native);
LOG(THREAD, LOG_SYSCALLS, 1,
"NATIVE system call %s\n", syscall_names[sysnum]);
DOLOG(IF_DGCDIAG_ELSE(1, 2), LOG_SYSCALLS, {
dump_callstack(*((byte **)state->mc.xsp) /*retaddr*/,
(app_pc) state->mc.xbp,
THREAD, DUMP_NOT_XML);
});
#ifdef GBOP
/* case 7127 - validate GBOP on syscalls that are already hooked for
* hotp_only on native_exec
*/
if (DYNAMO_OPTION(gbop) != GBOP_DISABLED) {
/* FIXME: case 7127: should enforce here GBOP_WHEN_NATIVE_EXEC if we
* want to apply for -hotp_only but not for native_exec.
* Today we always validate.
*/
/* FIXME: case 7127: for -exclude_gbop_list need to check a flag
* whether this ntdll!Nt* hook has been excluded
*/
/* state->xsp is the wishful thinking after syscall
* address, instead of the original one -
* intercept_syscall_wrapper() keeps the relevant
* FPO information: 4 on XP SP2+, or 0 earlier
*/
gbop_validate_and_act(state,
/* adjust ESP */
syscall_trampoline_gbop_fpo_offset[sysnum],
syscall_trampoline_hook_pc[sysnum]);
/* if the routine at all returns it passed the GBOP checks */
/* FIXME: case 7127: may want alternative handling
* and for system calls returning an error of some kind
* like STATUS_INVALID_ADDRESS or STATUS_BUFFER_OVERFLOW
* may be a somewhat useful attack handling alternative
*/
/* FIXME: case 7127 for completeness should be able to add
* this check to the regular DR syscalls where we'll be at
* the PC calling sysenter, not necessarily at the start
* of a function. Though other than uniform testing it
* won't serve much else. There we'll have to match the
* correct FPO offset at the syscall as well.
*/
}
#endif /* GBOP */
/* Notes on handling syscalls for native threads:
*
* FIXME: make sure each syscall handler can handle this thread being native,
* as well as target being native. E.g., will a native thread terminating
* itself hit any assertion about not coming back under DR control first?
* Another example, will GetCxt fail trying to translate a native thread's
* context?
* FIXME: what about asynch event while in syscall? none of ones we
* intercept are alertable?
* FIXME: exception during pre-syscall sequence can cause us to miss
* the go-native trigger!
*
* Be careful with cache consistency events -- we assume in general that
* code executed natively is never mixed with code executed under DR, in
* both execution and manipulation, and we try to have _all_ DGC-using
* dlls listed in the native_exec_list. We do handle write faults from
* cache consistency in native threads, so we'll have correct behavior,
* but we don't want a performance hit from in-cache DGC slowing down
* from-native DGC b/c they share memory and it keeps bouncing from RO to
* RW -- that's a big reason we're going native in the first place! For
* handling app memory-changing syscalls, we don't mark new code as
* read-only until executed from, so in the common case we should not
* incur any cost from cache consistency while native.
*/
/* Invoke normal DR syscall-handling by calling dispatch() with a
* linkstub_t marked just like those for fragments ending in syscalls.
* (We cannot return to the trampoline tail for asynch_take_over() since
* it will clobber out next_tag and last_exit and will execute the jmp
* back to the syscall under DR, requiring a more intrusive way of going
* native afterward.) Normal handling may skip the syscall or do
* whatever, but we expect it to not change control flow (we don't
* intercept those while threads are native) and to come out of the
* cache and continue on with the next_tag that we set here, which is a
* special stopping point routine of ours that causes DR to go native @
* the pc we store in dcontext->native_exec_postsyscall.
*/
dcontext->next_tag = BACK_TO_NATIVE_AFTER_SYSCALL;
/* start_pc is the take-over pc that will jmp to the syscall instr, while
* we need the post-syscall pc, which we stored when generating the trampoline
*/
ASSERT(syscall_trampoline_skip_pc[sysnum] != NULL);
dcontext->native_exec_postsyscall = syscall_trampoline_skip_pc[sysnum];
ASSERT(dcontext->whereami == WHERE_APP);
dcontext->whereami = WHERE_TRAMPOLINE;
set_last_exit(dcontext, (linkstub_t *) get_native_exec_syscall_linkstub());
/* assumption: no special cleanup from tail of trampoline needed */
transfer_to_dispatch(dcontext, &state->mc, false/*!full_DR_state*/);
ASSERT_NOT_REACHED();
}
/* This routine tries to handle syscalls from DR, but will fail in some
* cases (if the current thread has certain under_dynamo_control values) --
* so we use our own custom wrapper rather than go through ntdll when we
* expect going through wrapper to reach here (FIXME should do this for
* all system calls). */
/* i#924: this happens at exit during os_loader_exit(), and at thread init
* when priv libs call routines we haven't yet redirected. Best to disable
* the syslog for clients (we still have the log warning).
*/
#ifndef CLIENT_INTERFACE
DODEBUG({
/* Unfortunately we use various ntdll routines (most notably Ldr*)
* that may be hooked (hook code could do anything including making
* system calls). Also some the of the ntdll Rtl routines we
* import may be similarly ill behaved (though we don't believe any
* of the currently used ones are problematic). Also calling
* through Sygate hooks may reach here.
*/
SYSLOG_INTERNAL_WARNING_ONCE("syscall_while_native: using %s - maybe hooked?",
syscall_names[sysnum]);
});
#endif
STATS_INC(num_syscall_trampolines_DR);
LOG(THREAD, LOG_SYSCALLS, 1,
"WARNING: syscall_while_native: syscall from DR %s\n",
syscall_names[sysnum]);
return AFTER_INTERCEPT_LET_GO; /* do syscall natively */
}
static inline bool
intercept_syscall_for_thin_client(int SYSnum)
{
if (SYSnum == SYS_CreateThread ||
SYSnum == SYS_CreateProcess ||
SYSnum == SYS_CreateProcessEx ||
SYSnum == SYS_CreateUserProcess ||
SYSnum == SYS_TerminateThread || /* Case 9079. */
SYSnum == SYS_ResumeThread || /* i#1198: for env var propagation */
/* case 8866: for -early_inject we must intercept NtMapViewOfSection */
(DYNAMO_OPTION(early_inject) && SYSnum == SYS_MapViewOfSection)) {
return true;
}
return false;
}
static inline bool
intercept_native_syscall(int SYSnum)
{
ASSERT(SYSnum < TRAMPOLINE_MAX);
#ifdef CLIENT_INTERFACE
if ((uint)SYSnum >= SYS_MAX + syscall_extra_idx)
return false;
#endif
/* Don't hook all syscalls for thin_client. */
if (DYNAMO_OPTION(thin_client) && !intercept_syscall_for_thin_client(SYSnum))
return false;
if (!syscall_requires_action[SYSnum] || syscalls[SYSnum] == SYSCALL_NOT_PRESENT)
return false;
/* ignore control transfer system calls:
* 1) NtCallbackReturn (assume the corresponding cb was native as well,
* else we have big problems! we could detect
* by stacking up info on native cbs, if nobody ever
* did an int 2b natively...not worth it for now)
* 2) NtContinue
* 3) NtCreateThread
* Ref case 5295 - Sygate hooks this nt wrapper differently then the
* others (@ 2nd instruction). We only need to hook CreateThread
* system call for follow children from native exec threads anyways, so
* is easiest to just skip this one and live without that ability.
* 4) NtWriteVirtualMemory:
* Case 9156/9103: we don't hook it to avoid removing
* our own GBOP hook, until we actually implement acting on it (case 8321)
*
* We do NOT ignore SetContextThread or suspension/resumption, since
* the target could be in DR!
*/
if (SYSnum == SYS_CallbackReturn ||
SYSnum == SYS_Continue ||
(!DYNAMO_OPTION(native_exec_hook_create_thread) &&
SYSnum == SYS_CreateThread) ||
SYSnum == SYS_WriteVirtualMemory)
return false;
return true;
}
void
init_syscall_trampolines(void)
{
int i;
HMODULE h = (HMODULE)get_ntdll_base();
ASSERT(DYNAMO_OPTION(native_exec_syscalls));
for (i = 0; i < TRAMPOLINE_MAX; i++) {
if (intercept_native_syscall(i)) {
byte *fpo_adjustment = NULL;
#ifdef GBOP
fpo_adjustment = &syscall_trampoline_gbop_fpo_offset[i];
#endif
syscall_trampoline_hook_pc[i] = (app_pc)get_proc_address(h, syscall_names[i]);
syscall_trampoline_pc[i] =
/* FIXME: would like to use static references to entry points -- yet,
* set of those we care about varies dynamically by platform, and
* we cannot include a pointer to a 2003-only Nt* entry point and
* avoid a loader link error on 2000, right?
* For now just using get_proc_address!
*/
intercept_syscall_wrapper(&syscall_trampoline_hook_pc[i],
syscall_while_native,
(void *) (ptr_int_t) i /* callee arg */,
AFTER_INTERCEPT_DYNAMIC_DECISION,
/* must store the skip_pc for the new dispatch()
* to know where to go after handling from DR --
* this is simpler than having trampoline
* pass it in as an arg to syscall_while_native
* or trying to decode it.
*/
&syscall_trampoline_skip_pc[i],
/* Returns a pointer to a copy of the original
* first 5 bytes for removing the trampoline
* later. Excepting hook chaining situations
* this could just simply be the same as the
* returned syscall_trampoline_pc. */
&syscall_trampoline_copy_pc[i],
fpo_adjustment, syscall_names[i]);
}
}
}
void
exit_syscall_trampolines(void)
{
int i;
ASSERT(DYNAMO_OPTION(native_exec_syscalls));
for (i = 0; i < TRAMPOLINE_MAX; i++) {
if (intercept_native_syscall(i)) {
if (syscall_trampoline_pc[i] != NULL) {
ASSERT(syscall_trampoline_copy_pc[i] != NULL &&
syscall_trampoline_hook_pc[i] != NULL);
remove_trampoline(syscall_trampoline_copy_pc[i],
syscall_trampoline_hook_pc[i]);
} else {
ASSERT(DYNAMO_OPTION(native_exec_hook_conflict) ==
HOOKED_TRAMPOLINE_NO_HOOK);
}
}
DEBUG_DECLARE(else ASSERT(syscall_trampoline_pc[i] == NULL));
}
}
#ifdef DEBUG
void
check_syscall_array_sizes()
{
ASSERT(sizeof(windows_81_x64_syscalls) == sizeof(windows_2000_syscalls));
ASSERT(sizeof(windows_81_wow64_syscalls) == sizeof(windows_2000_syscalls));
ASSERT(sizeof(windows_81_x86_syscalls) == sizeof(windows_2000_syscalls));
ASSERT(sizeof(windows_8_x64_syscalls) == sizeof(windows_2000_syscalls));
ASSERT(sizeof(windows_8_wow64_syscalls) == sizeof(windows_2000_syscalls));
ASSERT(sizeof(windows_8_x86_syscalls) == sizeof(windows_2000_syscalls));
ASSERT(sizeof(windows_7_x64_syscalls) == sizeof(windows_2000_syscalls));
ASSERT(sizeof(windows_7_syscalls) == sizeof(windows_2000_syscalls));
ASSERT(sizeof(windows_vista_sp1_x64_syscalls) == sizeof(windows_2000_syscalls));
ASSERT(sizeof(windows_vista_sp1_syscalls) == sizeof(windows_2000_syscalls));
ASSERT(sizeof(windows_vista_sp0_x64_syscalls) == sizeof(windows_2000_syscalls));
ASSERT(sizeof(windows_vista_sp0_syscalls) == sizeof(windows_2000_syscalls));
ASSERT(sizeof(windows_2003_syscalls) == sizeof(windows_2000_syscalls));
ASSERT(sizeof(windows_XP_x64_syscalls) == sizeof(windows_2000_syscalls));
ASSERT(sizeof(windows_XP_wow64_index) == sizeof(windows_2000_syscalls));
ASSERT(sizeof(windows_2003_syscalls) == sizeof(windows_2000_syscalls));
ASSERT(sizeof(windows_XP_syscalls) == sizeof(windows_2000_syscalls));
ASSERT(sizeof(windows_NT_sp4_syscalls) == sizeof(windows_2000_syscalls));
ASSERT(sizeof(windows_NT_sp3_syscalls) == sizeof(windows_2000_syscalls));
ASSERT(sizeof(windows_NT_sp0_syscalls) == sizeof(windows_2000_syscalls));
ASSERT(sizeof(windows_2000_syscalls)/sizeof(windows_2000_syscalls[0]) ==
sizeof(syscall_requires_action)/sizeof(syscall_requires_action[0]));
ASSERT(sizeof(windows_2000_syscalls)/sizeof(windows_2000_syscalls[0]) ==
sizeof(syscall_names)/sizeof(syscall_names[0]));
}
/* verify that syscall numbers match our static lists in an attempt to catch
* changes to syscall interface across Windows patches and service packs
*/
void
check_syscall_numbers(dcontext_t *dcontext)
{
int i;
int sysnum;
byte *addr;
module_handle_t h = get_ntdll_base();
ASSERT(h != NULL && h != INVALID_HANDLE_VALUE);
LOG(GLOBAL, LOG_SYSCALLS, 4, "check_syscall_numbers: ntdll @ "PFX"\n", h);
for (i = 0; i < SYS_MAX; i++) {
if (syscalls[i] == SYSCALL_NOT_PRESENT)
continue;
addr = (byte *)get_proc_address(h, syscall_names[i]);
ASSERT(addr != NULL);
LOG(GLOBAL, LOG_SYSCALLS, 4,
"\tsyscall 0x%x %s: addr "PFX"\n", i, syscall_names[i], addr);
sysnum = decode_syscall_num(dcontext, addr);
/* because of Sygate hooks can't assert sysnum is valid here */
if (sysnum >= 0 && sysnum != syscalls[i]) {
SYSLOG_INTERNAL_ERROR("syscall %s is really 0x%x not 0x%x\n",
syscall_names[i], sysnum, syscalls[i]);
syscalls[i] = sysnum;
/* of course is much too late to fix if we already used via
* NT_SYSCALL */
}
}
}
#endif
/* adjust region to page boundaries, since Windows lets you pass
* non-aligned values, unlike Linux
* e.g. a two byte cross-page request will result in a two page region
*/
static inline void
align_page_boundary(dcontext_t *dcontext,
app_pc *base /* IN OUT */, size_t *size/* IN OUT */)
{
if (!ALIGNED(*base, PAGE_SIZE) || !ALIGNED(*size, PAGE_SIZE)) {
/* need to cover all pages overlapping the region [base, base + size) */
*size = ALIGN_FORWARD(*base+*size, PAGE_SIZE) - PAGE_START(*base);
*base = (app_pc) PAGE_START(*base);
LOG(THREAD, LOG_SYSCALLS|LOG_VMAREAS, 2,
"\talign_page_boundary => base="PFX" size="PIFX"\n", *base, *size);
}
}
/* verifies whether target process is being created, presumably as a
* child of the current process
*/
bool
is_newly_created_process(HANDLE process_handle)
{
uint remote_ldr_data;
/* We check based on - trait 3) PEB.Ldr
* The Ldr entry is created by the running process itself later */
/* ATTIC - rejected traits
* trait 1) it doesn't have any threads created
* Seems overly expensive to have no easy alternative to
* NtQuerySystemInformation to tell there are no threads created
* in the process, should use to verify new process since that
* should be the rare case
* FIXME: could at least store the last created pid and a flag indicating if
* its thread has been created and use that as an auxiliary check
*
* May be easier to check the PEB
* trait 2) PEB.ProcessParameters
* The process parameters are available only after they have been created,
* (in fact a good trait that a process without them has just been created,
* yet they are created at the time the first thread's stack is needed.
*
* NOTE - in Vista traits 1 and 2 are no longer valid for this
* purpose. NtCreateUserProcess creates the first thread and sets up
* the process parameters in addition to creating the process. However this
* is only used for aslr_stack so doesn't really matter that much. Trait 3
* (the one we use) should still work anyways (and cover anyone using the legacy
* native interface NtCreateProcess to create the process).
*/
DODEBUG({
/* dead end approach, this code can be removed*/
/* invalid trait 4: shouldn't have many handles open
* Attempted using NtQueryInformationProcess
* ProcessHandleCount which is usually 1 on XP at the time
* a new process is created, if it holds on all platforms
*
* Note unfortunately this cannot be counted on, since handles may
* be inherited - and processes created by cygwin do inherit a lot
* of handles.
*/
ulong remote_process_handle_count;
NTSTATUS res = get_process_handle_count(process_handle,
&remote_process_handle_count);
if (NT_SUCCESS(res)) {
LOG(GLOBAL, LOG_ALL, 2,
"is_newly_created_process: process "PIDFMT" has %d handles -> %s\n",
process_id_from_handle(process_handle),
remote_process_handle_count,
remote_process_handle_count == 1 ? "NEW" : "maybe new");
}
});
remote_ldr_data = get_remote_process_ldr_status(process_handle);
if (remote_ldr_data >= 0) {
LOG(GLOBAL, LOG_ALL, 1,
"is_newly_created_process: process "PIDFMT" PEB->Ldr = %s\n",
process_id_from_handle(process_handle),
remote_ldr_data != 0 ? "initialized" : "NULL -> new process");
return (remote_ldr_data == 0); /* new process */
} else {
/* xref case 9800 - can happen if the app handle lacks the rights we
* need (in which case isn't a new process since the handle used then has
* full rights). Get handle rights in local since won't be available in an
* ldmp. */
DEBUG_DECLARE(ACCESS_MASK rights = nt_get_handle_access_rights(process_handle);)
ASSERT_CURIOSITY(get_os_version() >= WINDOWS_VERSION_VISTA &&
"xref case 9800, is_newly_created_process failure");
}
return false;
}
/* Rather than split up get_syscall_method() we have routines like these
* to query variations
*/
bool
syscall_uses_wow64_index()
{
ASSERT(get_syscall_method() == SYSCALL_METHOD_WOW64);
return (get_os_version() < WINDOWS_VERSION_8);
}
bool
syscall_uses_edx_param_base()
{
return (get_syscall_method() != SYSCALL_METHOD_WOW64 ||
get_os_version() < WINDOWS_VERSION_8);
}
/* FIXME : For int/syscall we can just subtract 2 from the post syscall pc but for
* sysenter we do the post-syscall ret native and therefore we've lost the
* address of the actual syscall, but we are only going to use this for
* certain ntdll system calls so is almost certainly the ntdll sysenter. As
* a hack for now we just use the address of the first system call we saw
* (which should be ntdll's), this is good enough for detach and prob. good
* enough for app GetThreadContext (we could just use 0x7ffe0302 but it moved
* on xp sp2) */
#define SYSCALL_PC(dc) \
((get_syscall_method() == SYSCALL_METHOD_INT || \
get_syscall_method() == SYSCALL_METHOD_SYSCALL) ? \
(ASSERT(SYSCALL_LENGTH == INT_LENGTH), \
POST_SYSCALL_PC(dc) - INT_LENGTH) : \
(get_syscall_method() == SYSCALL_METHOD_WOW64 ? \
(POST_SYSCALL_PC(dc) - CTI_FAR_ABS_LENGTH) : \
get_app_sysenter_addr()))
/* since always coming from dispatch now, only need to set mcontext */
#define SET_RETURN_VAL(dc, val) \
get_mcontext(dc)->xax = (reg_t) (val)
/***************************************************************************
* PRE SYSTEM CALL
*
* FIXME: should we pass mcontext to these routines to avoid
* the get_mcontext() call and derefs?
* => now we're forcing the inline of get_mcontext() so should be fine
*/
static reg_t *
pre_system_call_param_base(priv_mcontext_t *mc)
{
#ifdef X64
reg_t *param_base = (reg_t *) mc->xsp;
#else
/* On Win8, wow64 syscalls do not point edx at the params and
* instead simply use esp.
*/
reg_t *param_base = (reg_t *)
(syscall_uses_edx_param_base() ? mc->xdx : mc->xsp);
#endif
param_base += (SYSCALL_PARAM_OFFSET() / sizeof(reg_t));
return param_base;
}
/* NtCreateProcess, NtCreateProcessEx */
static void
presys_CreateProcess(dcontext_t *dcontext, reg_t *param_base, bool ex)
{
priv_mcontext_t *mc = get_mcontext(dcontext);
HANDLE *process_handle = (HANDLE *) sys_param(dcontext, param_base, 0);
uint access_mask = (uint) sys_param(dcontext, param_base, 1);
uint attributes = (uint) sys_param(dcontext, param_base, 2);
uint inherit_from_process = (uint) sys_param(dcontext, param_base, 3);
BOOLEAN inherit_handles_only = (BOOLEAN) sys_param(dcontext, param_base, 4);
HANDLE section_handle = (HANDLE) sys_param(dcontext, param_base, 5);
HANDLE debug_handle = (HANDLE) sys_param(dcontext, param_base, 6);
HANDLE exception_handle = (HANDLE) sys_param(dcontext, param_base, 7);
if (ex) {
/* according to metasploit, others type as HANDLE unknown etc. */
uint job_member_level = (uint) sys_param(dcontext, param_base, 8);
}
/* Case 9173: guard against pid reuse. Better in post after success
* check but not a big deal.
* We don't do this on CreateThread b/c is_newly_created_process() is still
* true after the first thread (one fix is to store the last created pid and
* a flag indicating if its thread has been created and use that as an auxiliary
* check in is_newly_created_process())
*/
dcontext->aslr_context.last_child_padded = 0;
DOLOG(1, LOG_SYSCALLS, {
app_pc base = (app_pc) get_section_address(section_handle);
/* we will inject in post_syscall or when the first thread is about
* to be created */
LOG(THREAD, LOG_SYSCALLS, IF_DGCDIAG_ELSE(1, 2),
"syscall: NtCreateProcess section @"PFX"\n", base);
DOLOG(1, LOG_SYSCALLS, {
char buf[MAXIMUM_PATH];
get_module_name(base, buf, sizeof(buf));
if (buf[0] != '\0')
LOG(THREAD, LOG_SYSCALLS, 2,
"\tNtCreateProcess for module %s\n", buf);
});
});
}
#ifdef DEBUG
/* NtCreateUserProcess */
static void
presys_CreateUserProcess(dcontext_t *dcontext, reg_t *param_base)
{
/* New in Vista, here's what I got reverse engineering
* NtCreateUserProcess (11 args, using windows types)
*
* NtCreateUserProcess (
* OUT PHANDLE ProcessHandle,
* OUT PHANDLE ThreadHandle,
* IN ACCESS_MASK ProcDesiredAccess,
* IN ACCESS_MASK ThreadDesiredAccess,
* IN POBJECT_ATTRIBUTES ProcObjectAttributes,
* IN POBJECT_ATTRIBUTES ThreadObjectAttributes,
* IN uint? unknown, [ observed 0x4 ]
* IN BOOL CreateSuspended, [ refers to the thread not the process ]
* IN PRTL_USER_PROCESS_PARAMETERS Params,
* INOUT proc_stuff proc,
* INOUT create_proc_thread_info_t *thread [ see ntdll.h ])
* CreateProcess hardcodes 0x2000000 (== MAXIMUM_ALLOWED) for both
* ACCESS_MASK arguments. I've only observed NULL (== default) for the
* OBJECT_ATTRIBUTES arguments so they are a bit of a guess, but they
* need to be here somewhere and based on error codes I know they are
* ptr arguments so seems quite likely esp. given the arg layout.
*
* where proc_stuff { \\ speculative - the 64bit differences are odd and imply
* \\ more then just size changes
* size_t struct_size, [observed 0x48 (0x58 for 64bit)] \\ prob. sizeof(proc_stuff)
* ptr_uint_t unknown_p2, \\ OUT
* ptr_uint_t unknown_p3, \\ IN/OUT
* OUT HANDLE file_handle, [exe file handle]
* OUT HANDLE section_handle, [exe section handle]
* uint32 unknown_p6, \\ OUT
* uint32 unknown_p7, \\ OUT
* uint32 unknown_p8, \\ OUT
* uint32 unknown_p9, \\ OUT
* #ifndef X64
* uint32 unknown_p10, \\ OUT
* #endif
* OUT PEB *new_proc_peb,
* uint32 unknown_p12_p17[6], \\ OUT
* #ifndef X64
* uint32 unknown_p18, \\ OUT
* #endif
* }
*/
priv_mcontext_t *mc = get_mcontext(dcontext);
ACCESS_MASK proc_access_mask = (uint) sys_param(dcontext, param_base, 2);
ACCESS_MASK thread_access_mask = (uint) sys_param(dcontext, param_base, 3);
/* might be BOOLEAN instead? though separate param should zero out rest */
BOOL create_suspended = (BOOL) sys_param(dcontext, param_base, 7);
create_proc_thread_info_t *thread_stuff = (void *) sys_param(dcontext, param_base, 10);
ASSERT(get_os_version() >= WINDOWS_VERSION_VISTA);
/* might need these in post, note CreateProcess appears to hardcode them */
ASSERT_CURIOSITY(proc_access_mask == MAXIMUM_ALLOWED);
ASSERT_CURIOSITY(thread_access_mask == MAXIMUM_ALLOWED);
ASSERT_CURIOSITY(create_suspended);
/* FIXME - NYI - if any of the above curiosities don't hold we should
* change them here and then fixup as needed in post. */
/* Potentially dangerous deref of app ptr, but is only for debug logging */
ASSERT(thread_stuff != NULL && thread_stuff->nt_path_to_exe.buffer != NULL);
LOG(THREAD, LOG_SYSCALLS, 1, "syscall: NtCreateUserProcess presys %.*S\n",
MIN(MAXIMUM_PATH, thread_stuff->nt_path_to_exe.buffer_size),
(wchar_t *)thread_stuff->nt_path_to_exe.buffer);
/* The thread can be resumed inside the kernel so ideally we would
* insert the DR env vars into the pp param here (i#349).
* However, no matter what I do, the syscall returns STATUS_INVALID_PARAMETER.
* I made a complete copy of pp and updated the unicode pointers so it's
* all contiguous, but still the error. Perhaps it must be on the app heap?
* In any case, kernel32!CreateProcess is hardcoding that the thread be
* suspended (presumably to do its csrss and other inits safely) so we rely
* on seeing NtResumeThread.
*/
}
#endif
/* NtCreateThread */
static void
presys_CreateThread(dcontext_t *dcontext, reg_t *param_base)
{
priv_mcontext_t *mc = get_mcontext(dcontext);
HANDLE *thread_handle= (HANDLE *) sys_param(dcontext, param_base, 0);
uint access_mask = (uint) sys_param(dcontext, param_base, 1);
uint attributes = (uint) sys_param(dcontext, param_base, 2);
HANDLE process_handle= (HANDLE) sys_param(dcontext, param_base, 3);
uint *client_id = (uint*) sys_param(dcontext, param_base, 4);
CONTEXT *cxt = (CONTEXT *) sys_param(dcontext, param_base, 5);
USER_STACK *stack = (USER_STACK *) sys_param(dcontext, param_base, 6);
BOOLEAN suspended = (BOOLEAN) sys_param(dcontext, param_base, 7);
DEBUG_DECLARE(process_id_t pid = process_id_from_handle(process_handle);)
LOG(THREAD, LOG_SYSCALLS|LOG_THREADS, IF_DGCDIAG_ELSE(1, 2),
"syscall: NtCreateThread pid="PFX" suspended=%d\n",
pid, suspended);
LOG(THREAD, LOG_SYSCALLS|LOG_THREADS, 2,
"\tstack: "PFX" "PFX" "PFX" "PFX" "PFX"\n",
stack->FixedStackBase, stack->FixedStackLimit,
stack->ExpandableStackBase, stack->ExpandableStackLimit,
stack->ExpandableStackBottom);
/* According to Nebbett, in eax is the win32 start address
* (stored in ThreadQuerySetWin32StartAddress slot, though that
* is reused by the os, so might not be the same later) and eax is used
* by the thread start kernel32 thunk. It also appears from the thunk
* that the argument to the thread start function is in ebx */
LOG(THREAD, LOG_SYSCALLS|LOG_THREADS, 2,
"\tesp="PFX", xip="PFX"\n\tstart address "PFX" with arg "PFX"\n",
cxt->CXT_XSP, cxt->CXT_XIP, cxt->CXT_XAX, cxt->CXT_XBX);
DOLOG(2, LOG_SYSCALLS|LOG_THREADS, {
char buf[MAXIMUM_PATH];
print_symbolic_address((app_pc)cxt->CXT_XAX, buf, sizeof(buf), false);
LOG(THREAD, LOG_SYSCALLS|LOG_THREADS, 2,
"\tsymbol info for start address : %s\n", buf);
});
ASSERT(cxt != NULL);
/* if not early injecting, we will unsafely modify cxt (for late follow
* children) FIXME
* if not injecting at all we won't change cxt.
*/
maybe_inject_into_process(dcontext, process_handle, cxt);
if (is_phandle_me(process_handle))
pre_second_thread();
}
/* NtCreateThreadEx */
static void
presys_CreateThreadEx(dcontext_t *dcontext, reg_t *param_base)
{
/* New in Vista, here's what I got reverse engineering NtCreateThreadEx
* (11 args, using windows types)
*
* NtCreateThreadEx (
* OUT PHANDLE ThreadHandle,
* IN ACCESS_MASK DesiredAccess,
* IN POBJECT_ATTRIBUTES ObjectAttributes,
* IN HANDLE ProcessHandle,
* IN LPTHREAD_START_ROUTINE Win32StartAddress,
* IN LPVOID StartParameter,
* IN BOOL CreateSuspended,
* IN uint unknown, [ CreateThread hardcodes to 0 ]
* IN SIZE_T StackCommitSize,
* IN SIZE_T StackReserveSize,
* INOUT create_thread_info_t *thread_info [ see ntdll.h ])
*/
DEBUG_DECLARE(priv_mcontext_t *mc = get_mcontext(dcontext);)
HANDLE process_handle = (HANDLE) sys_param(dcontext, param_base, 3);
DEBUG_DECLARE(byte *start_addr = (byte *) sys_param(dcontext, param_base, 4);)
DEBUG_DECLARE(void *start_parameter = (void *) sys_param(dcontext, param_base, 5);)
DEBUG_DECLARE(bool create_suspended = (bool) sys_param(dcontext, param_base, 6);)
DEBUG_DECLARE(process_id_t pid = process_id_from_handle(process_handle);)
ASSERT(get_os_version() >= WINDOWS_VERSION_VISTA);
LOG(THREAD, LOG_SYSCALLS|LOG_THREADS, 2,
"syscall: NtCreateThread pid="PFX" suspended=%d\n"
"\tstart_addr="PFX" arg="PFX"\n",
pid, create_suspended, start_addr, start_parameter);
DOLOG(2, LOG_SYSCALLS|LOG_THREADS, {
char buf[MAXIMUM_PATH];
print_symbolic_address(start_addr, buf, sizeof(buf), false);
LOG(THREAD, LOG_SYSCALLS|LOG_THREADS, 2,
"\tsymbol info for start address : %s\n", buf);
});
if (is_phandle_me(process_handle))
pre_second_thread();
}
/* NtCreateWorkerFactory */
static void
presys_CreateWorkerFactory(dcontext_t *dcontext, reg_t *param_base)
{
/* New in Vista. 10 args:
* NtCreateWorkerFactory(
* __out PHANDLE FactoryHandle,
* __in ACCESS_MASK DesiredAccess,
* __in_opt POBJECT_ATTRIBUTES ObjectAttributes,
* __in HANDLE CompletionPortHandle,
* __in HANDLE ProcessHandle,
* __in PVOID StartRoutine,
* __in_opt PVOID StartParameter,
* __in_opt ULONG MaxThreadCount,
* __in_opt SIZE_T StackReserve,
* __in_opt SIZE_T StackCommit)
*/
HANDLE process_handle = (HANDLE) sys_param(dcontext, param_base, 4);
ASSERT(get_os_version() >= WINDOWS_VERSION_VISTA);
if (is_phandle_me(process_handle))
pre_second_thread();
}
/***************************************************************************
* ENV VAR PROPAGATION
*/
/* There is some overlap w/ handle_execve() in unix/os.c but not
* quite enough to easily share this.
*/
static const char * const env_to_propagate[] = {
DYNAMORIO_VAR_RUNUNDER,
DYNAMORIO_VAR_OPTIONS,
DYNAMORIO_VAR_AUTOINJECT,
DYNAMORIO_VAR_LOGDIR,
DYNAMORIO_VAR_CONFIGDIR,
};
static const wchar_t * const wenv_to_propagate[] = {
L_DYNAMORIO_VAR_RUNUNDER,
L_DYNAMORIO_VAR_OPTIONS,
L_DYNAMORIO_VAR_AUTOINJECT,
L_DYNAMORIO_VAR_LOGDIR,
L_DYNAMORIO_VAR_CONFIGDIR,
};
#define NUM_ENV_TO_PROPAGATE (sizeof(env_to_propagate)/sizeof(env_to_propagate[0]))
/* read env var from remote process:
* - return true on read successfully or until end of reading
* - skip DR env vars
*/
static wchar_t *
get_process_env_var(HANDLE phandle, wchar_t *env_ptr, wchar_t *buf, size_t toread)
{
int i;
size_t got;
bool keep_env;
while (true) {
keep_env = true;
ASSERT(toread <= (size_t)PAGE_SIZE);
/* if an env var is too long we're ok: DR vars will fit, and if longer we'll
* handle rest next call.
*/
if (!nt_read_virtual_memory(phandle, env_ptr, buf, toread, &got)) {
/* may have crossed page boundary and the next page is inaccessible */
byte *start = (byte *) env_ptr;
if (PAGE_START(start) != PAGE_START(start + toread)) {
ASSERT((size_t)((byte *)ALIGN_FORWARD(start, PAGE_SIZE)-start) <= toread);
toread = (byte *) ALIGN_FORWARD(start, PAGE_SIZE) - start;
if (!nt_read_virtual_memory(phandle, env_ptr, buf, toread, &got))
return NULL;
} else
return NULL;
continue;
}
buf[got/sizeof(buf[0]) - 1] = '\0';
if (buf[0] == '\0')
return env_ptr;
for (i = 0; i < NUM_ENV_TO_PROPAGATE; i++) {
/* if conflict between env and cfg, we use cfg */
if (wcsncmp(wenv_to_propagate[i], buf, wcslen(wenv_to_propagate[i])) == 0) {
keep_env = false;
}
}
if (keep_env)
return env_ptr;
env_ptr += wcslen(buf) + 1;
}
return false;
}
/* called at presys-ResumeThread to append DR env vars in the target process PEB */
static bool
add_dr_env_vars(dcontext_t *dcontext, HANDLE phandle, wchar_t **env_ptr)
{
wchar_t *env, *cur;
size_t tot_sz = 0, app_sz, sz;
size_t got;
wchar_t *new_env = NULL;
wchar_t buf[MAX_OPTIONS_STRING];
bool need_var[NUM_ENV_TO_PROPAGATE];
size_t sz_var[NUM_ENV_TO_PROPAGATE];
NTSTATUS res;
uint old_prot = PAGE_NOACCESS;
int i, num_propagate = 0;
for (i = 0; i < NUM_ENV_TO_PROPAGATE; i++) {
if (get_config_val(env_to_propagate[i]) == NULL)
need_var[i] = false;
else {
need_var[i] = true;
num_propagate++;
}
}
if (num_propagate == 0) {
LOG(THREAD, LOG_SYSCALLS, 2,
"%s: no DR env vars to propagate\n", __FUNCTION__);
return true; /* nothing to do */
}
ASSERT(env_ptr != NULL);
if (!nt_read_virtual_memory(phandle, env_ptr, &env, sizeof(env), NULL))
goto add_dr_env_failure;
if (env != NULL) {
/* compute size of current env block, and check for existing DR vars */
cur = env;
while (true) {
/* for simplicity we do a syscall for each var */
cur = get_process_env_var(phandle, cur, buf, sizeof(buf));
if (cur == NULL)
return false;
if (buf[0] == '\0')
break;
tot_sz += wcslen(buf) + 1;
cur += wcslen(buf) + 1;
}
tot_sz++; /* final 0 marking end */
/* from here on out, all *sz vars are total bytes, not wchar_t elements */
tot_sz *= sizeof(*env);
}
app_sz = tot_sz;
LOG(THREAD, LOG_SYSCALLS, 2,
"%s: orig app env vars at "PFX"-"PFX"\n",
__FUNCTION__, env, env + app_sz/sizeof(*env));
/* calculate size needed for adding DR env vars.
* for each var, we truncate if too big for buf.
*/
for (i = 0; i < NUM_ENV_TO_PROPAGATE; i++) {
if (need_var[i]) {
sz_var[i] = wcslen(wenv_to_propagate[i]) +
strlen(get_config_val(env_to_propagate[i])) + 2/*=+0*/;
if (sz_var[i] > BUFFER_SIZE_ELEMENTS(buf)) {
SYSLOG_INTERNAL(SYSLOG_WARNING, "truncating DR env var for child");
sz_var[i] = BUFFER_SIZE_ELEMENTS(buf);
}
sz_var[i] *= sizeof(*env);
tot_sz += sz_var[i];
}
}
/* allocate a new env block and copy over the old */
res = nt_remote_allocate_virtual_memory(phandle, &new_env, tot_sz,
PAGE_READWRITE, MEM_COMMIT);
if (!NT_SUCCESS(res)) {
LOG(THREAD, LOG_SYSCALLS, 2,
"%s: failed to allocate new env "PIFX"\n", __FUNCTION__, res);
goto add_dr_env_failure;
}
LOG(THREAD, LOG_SYSCALLS, 2,
"%s: new app env vars allocated at "PFX"-"PFX"\n",
__FUNCTION__, new_env, new_env + tot_sz/sizeof(*env));
cur = env;
sz = 0;
while (true) {
/* for simplicity we do a syscall for each var */
size_t towrite = 0;
cur = get_process_env_var(phandle, cur, buf, sizeof(buf));
if (cur == NULL)
goto add_dr_env_failure;
if (buf[0] == '\0')
break;
towrite = (wcslen(buf) + 1);
res = nt_raw_write_virtual_memory(phandle, new_env + sz/sizeof(*env),
buf, towrite * sizeof(*env), &got);
if (!NT_SUCCESS(res)) {
LOG(THREAD, LOG_SYSCALLS, 2,
"%s copy: got status "PFX", wrote "PIFX" vs requested "PIFX"\n",
__FUNCTION__, res, got, towrite);
goto add_dr_env_failure;
}
sz += towrite * sizeof(*env);
cur += towrite;
}
ASSERT(sz == app_sz - sizeof(*env) /* before final 0 */ );
/* add DR env vars at the end.
* XXX: is alphabetical sorting relied upon? adding to end is working.
*/
for (i = 0; i < NUM_ENV_TO_PROPAGATE; i++) {
if (need_var[i]) {
_snwprintf(buf, BUFFER_SIZE_ELEMENTS(buf), L"%s=%S",
wenv_to_propagate[i], get_config_val(env_to_propagate[i]));
NULL_TERMINATE_BUFFER(buf);
if (!nt_write_virtual_memory(phandle, new_env + sz/sizeof(*env),
buf, sz_var[i], &got))
goto add_dr_env_failure;
sz += sz_var[i];
}
}
ASSERT(sz == tot_sz - sizeof(*env) /* before final 0 */ );
/* write final 0 */
buf[0] = 0;
if (!nt_write_virtual_memory(phandle, new_env + sz/sizeof(*env), buf,
sizeof(*env), &got))
goto add_dr_env_failure;
/* install new env */
if (!nt_remote_protect_virtual_memory(phandle, (byte*)PAGE_START(env_ptr), PAGE_SIZE,
PAGE_READWRITE, &old_prot)) {
LOG(THREAD, LOG_SYSCALLS, 1,
"%s: failed to mark "PFX" writable\n", __FUNCTION__, env_ptr);
goto add_dr_env_failure;
}
if (!nt_write_virtual_memory(phandle, env_ptr, &new_env, sizeof(new_env), &got))
goto add_dr_env_failure;
if (!nt_remote_protect_virtual_memory(phandle, (byte*)PAGE_START(env_ptr), PAGE_SIZE,
old_prot, &old_prot)) {
LOG(THREAD, LOG_SYSCALLS, 1,
"%s: failed to restore "PFX" to "PIFX"\n", __FUNCTION__, env_ptr, old_prot);
/* not a fatal error */
}
/* XXX: free the original? on Vista+ it's part of the pp alloc and
* is on the app heap so we can't. we could query and see if it's
* a separate alloc. for now we just leave it be.
*/
LOG(THREAD, LOG_SYSCALLS, 2,
"%s: installed new env "PFX" at "PFX"\n", __FUNCTION__, new_env, env_ptr);
return true;
add_dr_env_failure:
if (new_env != NULL) {
if (!NT_SUCCESS(nt_remote_free_virtual_memory(phandle, new_env))) {
LOG(THREAD, LOG_SYSCALLS, 2,
"%s: unable to free new env "PFX"\n", __FUNCTION__, new_env);
}
if (old_prot != PAGE_NOACCESS) {
if (!nt_remote_protect_virtual_memory(phandle, (byte*)PAGE_START(env_ptr),
PAGE_SIZE, old_prot, &old_prot)) {
LOG(THREAD, LOG_SYSCALLS, 1, "%s: failed to restore "PFX" to "PIFX"\n",
__FUNCTION__, env_ptr, old_prot);
}
}
}
return false;
}
/* If unable to find info, return false (i.e., assume it might be the
* first thread). Retrieves context from thread handle.
*/
static bool
not_first_thread_in_new_process(HANDLE process_handle, HANDLE thread_handle)
{
char buf[MAX_CONTEXT_SIZE];
CONTEXT *cxt = nt_initialize_context(buf, CONTEXT_DR_STATE);
if (NT_SUCCESS(nt_get_context(thread_handle, cxt)))
return !is_first_thread_in_new_process(process_handle, cxt);
return false;
}
/* NtResumeThread */
static void
presys_ResumeThread(dcontext_t *dcontext, reg_t *param_base)
{
HANDLE thread_handle= (HANDLE) sys_param(dcontext, param_base, 0);
thread_id_t tid = thread_id_from_handle(thread_handle);
process_id_t pid = process_id_from_thread_handle(thread_handle);
LOG(THREAD, LOG_SYSCALLS|LOG_THREADS, IF_DGCDIAG_ELSE(1, 2),
"syscall: NtResumeThread pid=%d tid=%d\n", pid, tid);
if (DYNAMO_OPTION(follow_children) && pid != POINTER_MAX && !is_pid_me(pid)) {
/* For -follow_children we propagate env vars (current
* DYNAMORIO_RUNUNDER, DYNAMORIO_OPTIONS, DYNAMORIO_AUTOINJECT, and
* DYNAMORIO_LOGDIR) to the child to support a simple run-all-children
* model without requiring setting up config files for children.
*
* It's possible the app is explicitly resuming a thread in another
* process and this has nothing to do with a new process: but our env
* var insertion should be innocuous in that case.
*
* For pre-Vista, the initial thread is always suspended, and is either
* resumed inside kernel32!CreateProcessW or by the app, so we should
* always see a resume. For Vista+ NtCreateUserProcess has suspend as a
* param and ideally we should replace the env pre-NtCreateUserProcess,
* but we have yet to get that to work, so for now we rely on
* Vista+ process creation going through the kernel32 routines,
* which do hardcode the thread as being suspended.
*/
PEB *peb;
HANDLE process_handle = process_handle_from_id(pid);
RTL_USER_PROCESS_PARAMETERS *pp = NULL;
if (process_handle == INVALID_HANDLE_VALUE) {
LOG(THREAD, LOG_SYSCALLS, 1,
"WARNING: error acquiring process handle for pid="PIFX"\n", pid);
return;
}
if (!should_inject_into_process(dcontext, process_handle, NULL, NULL)) {
LOG(THREAD, LOG_SYSCALLS, 1,
"Not injecting so not setting DR env vars in pid="PIFX"\n", pid);
return;
}
if (not_first_thread_in_new_process(process_handle, thread_handle)) {
LOG(THREAD, LOG_SYSCALLS, 1,
"Not first thread so not setting DR env vars in pid="PIFX"\n", pid);
return;
}
peb = get_peb(process_handle);
if (peb == NULL) {
LOG(THREAD, LOG_SYSCALLS, 1,
"WARNING: error acquiring PEB for pid="PIFX"\n", pid);
close_handle(process_handle);
return;
}
if (!nt_read_virtual_memory(process_handle, &peb->ProcessParameters, &pp,
sizeof(pp), NULL) || pp == NULL) {
LOG(THREAD, LOG_SYSCALLS, 1,
"WARNING: error acquiring ProcessParameters for pid="PIFX"\n", pid);
close_handle(process_handle);
return;
}
LOG(THREAD, LOG_SYSCALLS, 2,
"inserting DR env vars to pid="PIFX" &pp->Environment="PFX"\n",
pid, &pp->Environment);
if (!add_dr_env_vars(dcontext, process_handle, (wchar_t**)&pp->Environment)) {
LOG(THREAD, LOG_SYSCALLS, 1,
"WARNING: unable to add DR env vars for child pid="PIFX"\n", pid);
close_handle(process_handle);
return;
}
close_handle(process_handle);
}
}
/* NtTerminateProcess */
static bool /* returns whether to execute syscall */
presys_TerminateProcess(dcontext_t *dcontext, reg_t *param_base)
{
priv_mcontext_t *mc = get_mcontext(dcontext);
HANDLE process_handle = (HANDLE) sys_param(dcontext, param_base, 0);
NTSTATUS exit_status = (NTSTATUS) sys_param(dcontext, param_base, 1);
LOG(THREAD, LOG_SYSCALLS, 1,
"syscall: NtTerminateProcess handle="PFX" pid=%d exit=%d\n",
process_handle,
process_id_from_handle((process_handle == 0) ? NT_CURRENT_PROCESS : process_handle),
exit_status);
if (process_handle == 0) {
NTSTATUS return_val;
thread_record_t **threads;
int num_threads;
priv_mcontext_t mcontext;
DEBUG_DECLARE(bool ok;)
/* this thread won't be terminated! */
LOG(THREAD, LOG_SYSCALLS, 2, "terminating all other threads, not this one\n");
copy_mcontext(mc, &mcontext);
mc->pc = SYSCALL_PC(dcontext);
#ifdef CLIENT_INTERFACE
/* make sure client nudges are finished */
wait_for_outstanding_nudges();
#endif
/* FIXME : issues with cleaning up here what if syscall fails */
DEBUG_DECLARE(ok =)
synch_with_all_threads(THREAD_SYNCH_SUSPENDED_AND_CLEANED,
&threads, &num_threads,
/* Case 6821: while we're ok to be detached, we're
* not ok to be reset since we won't have the
* last_exit flag set for coming back here (plus
* our kstats get off since we didn't yet enter
* the cache)
*/
THREAD_SYNCH_VALID_MCONTEXT_NO_XFER,
/* if we fail to suspend a thread (e.g., privilege
* problems) ignore it. FIXME: retry instead? */
THREAD_SYNCH_SUSPEND_FAILURE_IGNORE);
ASSERT(ok);
ASSERT(threads == NULL && num_threads == 0); /* We asked for CLEANED */
copy_mcontext(&mcontext, mc);
/* we hold the initexit lock at this point, but we cannot release
* it, b/c a new thread waiting on it could start initializing and
* then we'd issue the syscall and kill it while it's holding our
* lock, causing a deadlock when the subsequent process-terminating
* syscall comes in! (==case 4243) So, we hold the lock to issue
* the syscall, safest to do syscall right here rather than going
* back to handle_system_call()
*/
return_val = nt_terminate_process_for_app(process_handle, exit_status);
SET_RETURN_VAL(dcontext, return_val);
LOG(THREAD, LOG_SYSCALLS, 2,
"\tNtTerminateProcess("PFX", "PFX") => "PIFX" on behalf of app\n",
process_handle, exit_status, return_val);
end_synch_with_all_threads(threads, num_threads, false/*no resume*/);
return false; /* do not execute syscall -- we already did it */
} else if (is_phandle_me((process_handle == 0) ? NT_CURRENT_PROCESS : process_handle)) {
/* case 10338: we don't synchall here for faster shutdown, but we have
* to try and not crash any other threads. FIXME: if it's rare to get here
* w/ > 1 thread perhaps we should do the synchall.
*/
LOG(THREAD, LOG_SYSCALLS, 2, "\tterminating process w/ %d running thread(s)\n",
get_num_threads());
KSTOP(pre_syscall);
KSTOP(num_exits_dir_syscall);
if (is_thread_currently_native(dcontext->thread_record)) {
/* Avoid hooks on syscalls made while cleaning up: such as
* private libraries making system lib calls
*/
dynamo_thread_under_dynamo(dcontext);
}
/* FIXME: what if syscall returns w/ STATUS_PROCESS_IS_TERMINATING? */
os_terminate_wow64_write_args(true/*process*/, process_handle, exit_status);
cleanup_and_terminate(dcontext, syscalls[SYS_TerminateProcess],
IF_X64_ELSE(mc->xcx, mc->xdx),
mc->xdx, true /* entire process */, 0, 0);
}
return true;
}
/* NtTerminateThread */
static void
presys_TerminateThread(dcontext_t *dcontext, reg_t *param_base)
{
priv_mcontext_t *mc = get_mcontext(dcontext);
/* NtTerminateThread(IN HANDLE ThreadHandle OPTIONAL, IN NTSTATUS ExitStatus) */
HANDLE thread_handle = (HANDLE) sys_param(dcontext, param_base, 0);
NTSTATUS exit_status = (NTSTATUS) sys_param(dcontext, param_base, 1);
/* need to determine which thread is being terminated
* it's harder than you'd think -- we can get its handle but
* the handle may have been duplicated, no way to test
* equivalence, we have to get the thread id
*/
thread_id_t tid;
thread_record_t *tr = thread_lookup(get_thread_id());
ASSERT(tr != NULL);
if (thread_handle == 0)
thread_handle = NT_CURRENT_THREAD;
tid = thread_id_from_handle(thread_handle);
LOG(THREAD, LOG_SYSCALLS|LOG_THREADS, 1, "syscall: NtTerminateThread tid=%d\n", tid);
if (tid == 0xFFFFFFFF) {
/* probably invalid handle, do nothing for now */
/* FIXME: case 2573 about adding ASSERT_CURIOSITY replacing the ASSERT we had */
} else if (tid != tr->id) {
priv_mcontext_t mcontext;
DEBUG_DECLARE(thread_synch_result_t synch_res;)
copy_mcontext(mc, &mcontext);
mc->pc = SYSCALL_PC(dcontext);
/* Fixme : issues with cleaning up here, what if syscall fails */
DEBUG_DECLARE(synch_res =)
synch_with_thread(tid, true, false, THREAD_SYNCH_VALID_MCONTEXT,
THREAD_SYNCH_SUSPENDED_AND_CLEANED,
/* if we fail to suspend a thread (e.g., privilege
* problems) ignore it. FIXME: retry instead? */
THREAD_SYNCH_SUSPEND_FAILURE_IGNORE);
ASSERT(synch_res == THREAD_SYNCH_RESULT_SUCCESS ||
/* App could be calling on already exited thread (xref 8125)
* or thread could have exited while we were synching.
* FIXME - check is racy since for dr purposes the thread is
* considered exited just before it is signaled, but is ok
* for an assert. */
is_thread_exited(thread_handle) == THREAD_EXITED ||
!is_pid_me(process_id_from_thread_handle(thread_handle)));
copy_mcontext(&mcontext, mc);
} else {
/* case 9347 - racy early thread, yet primary is not yet 'known' */
/* we should evaluate dr_late_injected_primary_thread before
* get_num_threads()
*/
bool secondary = dr_injected_secondary_thread &&
!dr_late_injected_primary_thread;
bool exitproc = !secondary && (is_last_app_thread() && !dynamo_exited);
/* this should really be check_sole_thread() */
/* FIXME: case 9461 - we may not control all threads,
* the syscall may fail and may not be allowed to kill last thread
*/
if (secondary) {
SYSLOG_INTERNAL_WARNING("secondary thread terminating, primary not ready\n");
ASSERT(!exitproc);
ASSERT(!check_sole_thread());
}
ASSERT(!exitproc || check_sole_thread());
KSTOP(pre_syscall);
KSTOP(num_exits_dir_syscall);
os_terminate_wow64_write_args(false/*thread*/, thread_handle, exit_status);
cleanup_and_terminate(dcontext, syscalls[SYS_TerminateThread],
IF_X64_ELSE(mc->xcx, mc->xdx),
mc->xdx, exitproc, 0, 0);
}
}
/* NtSetContextThread */
static bool
presys_SetContextThread(dcontext_t *dcontext, reg_t *param_base)
{
priv_mcontext_t *mc = get_mcontext(dcontext);
HANDLE thread_handle = (HANDLE) sys_param(dcontext, param_base, 0);
CONTEXT *cxt = (CONTEXT *) sys_param(dcontext, param_base, 1);
thread_id_t tid = thread_id_from_handle(thread_handle);
bool intercept = true;
bool execute_syscall = true;
/* FIXME : we are going to read and write to cxt, which may be unsafe */
ASSERT(tid != 0xFFFFFFFF);
LOG(THREAD, LOG_SYSCALLS|LOG_THREADS, IF_DGCDIAG_ELSE(1, 2),
"syscall: NtSetContextThread handle="PFX" tid=%d cxt->Xip="PFX"\n",
thread_handle, tid, cxt->CXT_XIP);
mutex_lock(&thread_initexit_lock); /* need lock to lookup thread */
if (intercept_asynch_for_thread(tid, false/*no unknown threads*/)) {
priv_mcontext_t mcontext;
thread_record_t *tr = thread_lookup(tid);
CONTEXT *my_cxt;
NTSTATUS res;
const thread_synch_state_t desired_state = THREAD_SYNCH_VALID_MCONTEXT;
DEBUG_DECLARE(thread_synch_result_t synch_res;)
ASSERT(tr != NULL);
SELF_PROTECT_LOCAL(tr->dcontext, WRITABLE);
/* now ensure target thread is at a safe point when it gets reset */
copy_mcontext(mc, &mcontext);
mc->pc = SYSCALL_PC(dcontext);
DEBUG_DECLARE(synch_res =)
synch_with_thread(tid, true, true, desired_state,
THREAD_SYNCH_SUSPENDED_VALID_MCONTEXT,
/* if we fail to suspend a thread (e.g., privilege
* problems) ignore it. FIXME: retry instead? */
THREAD_SYNCH_SUSPEND_FAILURE_IGNORE);
ASSERT(synch_res == THREAD_SYNCH_RESULT_SUCCESS);
copy_mcontext(&mcontext, mc);
if (!TESTALL(CONTEXT_CONTROL/*2 bits so ALL*/, cxt->ContextFlags)) {
/* app didn't request pc so we'd better get it now.
* FIXME: this isn't transparent as we have to clobber
* fields in the app cxt: should restore in post-syscall.
*/
char buf[MAX_CONTEXT_SIZE];
CONTEXT *alt_cxt = nt_initialize_context(buf, CONTEXT_DR_STATE);
STATS_INC(num_app_setcontext_no_control);
if (thread_get_context(tr, alt_cxt) &&
translate_context(tr, alt_cxt, true/*set memory*/)) {
LOG(THREAD, LOG_SYSCALLS, 2, "no CONTROL flag on original cxt:\n");
DOLOG(3, LOG_SYSCALLS, { dump_context_info(cxt, THREAD, true); });
cxt->ContextFlags |= CONTEXT_CONTROL;
cxt->CXT_XIP = alt_cxt->CXT_XIP;
cxt->CXT_XFLAGS = alt_cxt->CXT_XFLAGS;
cxt->CXT_XSP = alt_cxt->CXT_XSP;
cxt->CXT_XBP = alt_cxt->CXT_XBP;
IF_X64(ASSERT_NOT_IMPLEMENTED(false)); /* Rbp not part of CONTROL */
cxt->SegCs = alt_cxt->SegCs;
cxt->SegSs = alt_cxt->SegSs;
LOG(THREAD, LOG_SYSCALLS, 3, "changed cxt:\n");
DOLOG(3, LOG_SYSCALLS, { dump_context_info(cxt, THREAD, true); });
/* don't care about other regs -- if app didn't
* specify CONTEXT_INTEGER that's fine
*/
} else {
/* just don't intercept: could crash us in middle of mangled
* sequence once we start translating there and treating them
* as safe spots, but for now will be ok.
*/
intercept = false;
ASSERT_NOT_REACHED();
}
}
if (intercept) {
/* modify the being-set cxt so that we retain control */
intercept_nt_setcontext(tr->dcontext, cxt);
LOG(THREAD, LOG_SYSCALLS, 3, "final cxt passed to syscall:\n");
DOLOG(3, LOG_SYSCALLS, { dump_context_info(cxt, THREAD, true); });
}
/* nt_continue_dynamo_start path assumes target is !couldbelinking
* all synch_with_thread synch points should be, we check here
*/
ASSERT(!is_couldbelinking(tr->dcontext));
if (TEST(THREAD_SET_CONTEXT, nt_get_handle_access_rights(thread_handle))) {
/* Case 10101: a thread waiting at check_wait_at_safe_spot can't
* be directly setcontext-ed so we explicitly do the context
* set request here and skip the system call.
* A waiting thread does NtContinue and so bypasses permission issues,
* so we explicitly check for setcontext permission.
* We have to make a copy since the app could de-allocate or modify
* cxt before a waiting thread examines it.
*/
DEBUG_DECLARE(bool ok;)
#ifdef X64
/* PR 263338: we need to align to 16 on x64. Heap is 8-byte aligned. */
byte *cxt_alloc;
#endif
my_cxt = global_heap_alloc(CONTEXT_HEAP_SIZE(*my_cxt) HEAPACCT(ACCT_OTHER));
#ifdef X64
cxt_alloc = (byte *) cxt;
if (!ALIGNED(cxt, 16)) {
ASSERT(ALIGNED(cxt, 8));
cxt = (CONTEXT *) ( ((app_pc)cxt)+8 );
}
ASSERT(ALIGNED(cxt, 16));
#endif
*my_cxt = *cxt;
/* my_cxt is freed by set_synched_thread_context() or target thread */
DEBUG_DECLARE(ok = )
set_synched_thread_context(tr, NULL, (void *) my_cxt,
CONTEXT_HEAP_SIZE(*my_cxt), desired_state
_IF_X64(cxt_alloc) _IF_WINDOWS(&res));
/* We just tested permissions, but could be bad handle, etc.
* FIXME: if so and thread was waiting we have transparency violation
*/
ASSERT_CURIOSITY(ok);
SET_RETURN_VAL(tr->dcontext, res);
/* must wake up thread so it can go to nt_continue_dynamo_start */
nt_thread_resume(tr->handle, NULL);
execute_syscall = false;
} else {
/* we expect the system call to fail */
DODEBUG({ tr->dcontext->expect_last_syscall_to_fail = true; });
}
SELF_PROTECT_LOCAL(tr->dcontext, READONLY);
}
mutex_unlock(&thread_initexit_lock);
return execute_syscall;
}
/* Assumes mc is app state prior to system call.
* Returns true iff system call is a callback return that does transfer control
* (xref case 10579).
*/
bool
is_cb_return_syscall(dcontext_t *dcontext)
{
priv_mcontext_t *mc = get_mcontext(dcontext);
if (mc->xax == (reg_t) syscalls[SYS_CallbackReturn]) {
reg_t *param_base = pre_system_call_param_base(mc);
if ((NTSTATUS)sys_param(dcontext, param_base, 2) != STATUS_CALLBACK_POP_STACK)
return true;
}
return false;
}
/* NtCallbackReturn */
static void
presys_CallbackReturn(dcontext_t *dcontext, reg_t *param_base)
{
/* args are:
* IN PVOID Result OPTIONAL, IN ULONG ResultLength, IN NTSTATUS Status
* same args go to int 2b (my theory anyway), where they are passed in
* eax, ecx, and edx. if KiUserCallbackDispatcher returns, it leaves
* eax w/ result value of callback, and zeros out ecx and edx, then int 2b.
* people doing the int 2b in user32 set ecx and edx to what they want, then
* call a routine that simply pulls first arg into eax and then does int 2b.
*/
priv_mcontext_t *mc = get_mcontext(dcontext);
NTSTATUS status = (NTSTATUS) sys_param(dcontext, param_base, 2);
if (status == STATUS_CALLBACK_POP_STACK) {
/* case 10579: this status code instructs the kernel to only
* pop the stack and not transfer control there */
LOG(THREAD, LOG_SYSCALLS|LOG_THREADS, IF_DGCDIAG_ELSE(1, 2),
"syscall: NtCallbackReturn STATUS_CALLBACK_POP_STACK\n");
} else {
/* NtCallbackReturn returns from callback via a syscall, and it
* requires us to restore the prev dcontext immediately prior
* to the syscall (want to use current dcontext in prior instructions
* in shared_syscall).
* N.B.: this means that the return from the call to pre_system_call
* uses a different dcontext than the setup for the call!
* the popa and popf will be ok -- old dstack is still in esp, isn't
* restored, isn't deleted by swapping to new dcontext.
* The problem is the restore of the app's esp -- so we fix that by
* having the clean call to pre_system_call store and restore app's esp
* from a special nonswapped dcontext slot.
*/
LOG(THREAD, LOG_SYSCALLS|LOG_THREADS, IF_DGCDIAG_ELSE(1, 2),
"syscall: NtCallbackReturn\n");
callback_start_return(mc);
}
}
static void
check_for_stack_free(dcontext_t *dcontext, byte *base, size_t size)
{
/* Ref case 5518 - on some versions of windows the thread stack is freed
* in process. So we watch here for the free to keep from removing again
* at thread exit. */
os_thread_data_t *ostd = (os_thread_data_t *) dcontext->os_field;
ASSERT(dcontext == get_thread_private_dcontext());
if (base == ostd->stack_base) {
LOG(THREAD, LOG_SYSCALLS|LOG_VMAREAS, 1,
"Thread's os stack is being freed\n");
ASSERT(base + size == ostd->stack_top);
/* only seen the in process free on 2k and NT */
ASSERT_CURIOSITY(get_os_version() <= WINDOWS_VERSION_2000);
/* When we've seen it happen (in kernel32!ExitThread), ExitThread uses
* a chunk of the TEB as the stack while freeing and calling
* NtTerminate. */
ASSERT_CURIOSITY((byte *)get_mcontext(dcontext)->xsp >=
(byte *)get_own_teb() &&
(byte *)get_mcontext(dcontext)->xsp <
((byte *)get_own_teb()) + PAGE_SIZE);
/* FIXME - Instead of saying the teb stack is no longer valid, we could
* instead change the bounds to be the TEB region. Other users could
* then always we assert we have something valid set. Is slightly
* greater dependence on observed behavior though. */
ostd->teb_stack_no_longer_valid = true;
ostd->stack_base = NULL;
ostd->stack_top = NULL;
}
}
/* NtAllocateVirtualMemory */
static bool
presys_AllocateVirtualMemory(dcontext_t *dcontext, reg_t *param_base, int sysnum)
{
priv_mcontext_t *mc = get_mcontext(dcontext);
HANDLE process_handle = (HANDLE) sys_param(dcontext, param_base, 0);
void **pbase = (void **) sys_param(dcontext, param_base, 1);
/* XXX i#899: NtWow64AllocateVirtualMemory64 has an extra arg after ZeroBits but
* it's ignored in wow64!whNtWow64AllocateVirtualMemory64. We should keep an eye
* out: maybe a future service pack or win9 will use it.
*/
int arg_shift = (sysnum == syscalls[SYS_Wow64AllocateVirtualMemory64] ? 1 : 0);
size_t *psize = (size_t *) sys_param(dcontext, param_base, 3 + arg_shift);
uint type = (uint) sys_param(dcontext, param_base, 4 + arg_shift);
uint prot = (uint) sys_param(dcontext, param_base, 5 + arg_shift);
app_pc base;
if (is_phandle_me(process_handle) && TEST(MEM_COMMIT, type) &&
/* Any overlap when asking for MEM_RESERVE (even when combined w/ MEM_COMMIT)
* will fail anyway, so we only have to worry about overlap on plain MEM_COMMIT
*/
!TEST(MEM_RESERVE, type)) {
/* i#1175: NtAllocateVirtualMemory can modify prot on existing pages */
size_t size;
if (safe_read(pbase, sizeof(base), &base) &&
safe_read(psize, sizeof(size), &size) &&
base != NULL &&
!app_memory_pre_alloc(dcontext, base, size, osprot_to_memprot(prot), false)) {
SET_RETURN_VAL(dcontext, STATUS_CONFLICTING_ADDRESSES);
return false; /* do not execute system call */
}
}
#ifdef PROGRAM_SHEPHERDING
if (is_phandle_me(process_handle) && TEST(MEM_COMMIT, type) &&
TESTALL(PAGE_EXECUTE_READWRITE, prot)) {
/* executable_if_alloc policy says we only add a region to the future
* list if it is committed rwx with no prior reservation.
* - if a base is passed and MEM_RESERVE is not set, there must be a prior
* reservation
* - if a base is passed and MEM_RESERVE is set, do a query to see if
* reservation existed before
* - if no base is passed, there was no reservation
*/
/* unfortunately no way to avoid syscall to check readability
* (unless have try...except)
*/
if (safe_read(pbase, sizeof(base), &base)) {
dcontext->alloc_no_reserve =
(base == NULL ||
(TEST(MEM_RESERVE, type) && !get_memory_info(base, NULL, NULL, NULL)));
/* FIXME: can one MEM_RESERVE an address previously
* MEM_RESERVEd - at least on XP that's not allowed */
}
} else if (TEST(ASLR_STACK, DYNAMO_OPTION(aslr)) &&
!is_phandle_me(process_handle) &&
TEST(MEM_RESERVE, type)
&& is_newly_created_process(process_handle)) {
/* pre-processing of remote NtAllocateVirtualMemory reservation */
/* Case 9173: ignore allocations with a requested base. These may come
* after we've inserted our pad (is_newly_created_process() isn't
* perfect), but may also come before, and we do not want to cause
* interop issues. We could instead try to adjust our pad to not cause
* their alloc to fail, but may end up eliminating any security
* advantage anyway.
*/
if (safe_read(pbase, sizeof(base), &base)) {
if (base == NULL) {
/* FIXME: make the above check stronger */
ASSERT_CURIOSITY(prot == PAGE_READWRITE);
/* this is just a reservation, so can be anything */
/* currently not following child flags, so maybe is almost always */
/* NOTE - on vista we should only ever get here if someone is using
* the legacy NtCreateProcess native api (vs NtCreateUserProcess) or
* the app is injecting memory into a new process before it's started
* initializing itself. */
ASSERT_CURIOSITY(get_os_version() < WINDOWS_VERSION_VISTA);
aslr_maybe_pad_stack(dcontext, process_handle);
} else {
DODEBUG({
if (process_id_from_handle(process_handle) !=
dcontext->aslr_context.last_child_padded) {
SYSLOG_INTERNAL_WARNING_ONCE("aslr stack: allowing alloc prior "
"to pad");
}
});
}
}
}
#endif /* PROGRAM_SHEPHERDING */
return true;
}
/* NtFreeVirtualMemory */
static void
presys_FreeVirtualMemory(dcontext_t *dcontext, reg_t *param_base)
{
priv_mcontext_t *mc = get_mcontext(dcontext);
HANDLE process_handle = (HANDLE) sys_param(dcontext, param_base, 0);
void **pbase = (void **) sys_param(dcontext, param_base, 1);
size_t *psize = (size_t *) sys_param(dcontext, param_base, 2);
uint type = (uint) sys_param(dcontext, param_base, 3);
app_pc base;
size_t size;
/* check for common argument problems, apps tend to screw this call
* up a lot (who cares about a memory leak, esp. at process exit) */
/* ref case 3536, 545, 4046 */
if (!safe_read(pbase, sizeof(base), &base) || base == NULL ||
!safe_read(psize, sizeof(size), &size) ||
!(type == MEM_RELEASE || type == MEM_DECOMMIT)) {
/* we expect the system call to fail */
DODEBUG(dcontext->expect_last_syscall_to_fail = true;);
return;
}
if (!is_phandle_me(process_handle)) {
IPC_ALERT("ERROR: FreeVirtualMemory %s "PFX" "PIFX" on another process",
type == MEM_DECOMMIT ? "MEM_DECOMMIT" : "MEM_RELEASE",
base, size);
return;
}
if ((type == MEM_DECOMMIT && size == 0) || (type == MEM_RELEASE)) {
app_pc real_base;
/* whole region being freed, we must look up size, ignore psize
* msdn and Nebbet claim that you need *psize == 0 for MEM_RELEASE
* but that doesn't seem to be true on all platforms */
/* 2K+: if base is anywhere on the first page of region this succeeds,
* and doesn't otherwise.
* NT: base must be the actual base.
*/
LOG(THREAD, LOG_SYSCALLS|LOG_VMAREAS, 1,
"syscall: NtFreeVirtualMemory type=%s region base="PFX" size="PIFX"\n",
type == MEM_DECOMMIT ? "MEM_DECOMMIT" : "MEM_RELEASE",
base, size);
size = get_allocation_size(base, &real_base);
ASSERT(ALIGNED(real_base, PAGE_SIZE));
/* if region has been already been freed */
if (((app_pc) ALIGN_BACKWARD(base, PAGE_SIZE) != real_base) ||
(get_os_version() == WINDOWS_VERSION_NT && base != real_base)) {
/* we expect the system call to fail
* with (NTSTATUS) 0xc000009f -
* "Virtual memory cannot be freed as base address is not
* the base of the region and a region size of zero was
* specified"
*/
LOG(THREAD, LOG_SYSCALLS|LOG_VMAREAS, 1,
"syscall: NtFreeVirtualMemory base="PFX", size="PIFX" invalid base\n",
base, size);
DODEBUG(dcontext->expect_last_syscall_to_fail = true;);
return;
}
/* make sure we use correct region base address, */
/* otherwise we'll free an extra page */
base = real_base;
ASSERT(real_base != NULL && "already freed");
}
DODEBUG({
/* FIXME: this shouldn't be DODEBUG since we need to handle syscall failure */
if (type == MEM_DECOMMIT && size != 0) {
size_t real_size = get_allocation_size(base, NULL);
if ((app_pc)ALIGN_BACKWARD(base, PAGE_SIZE) + real_size < base + size) {
/* we expect the system call to fail with
* (NTSTATUS) 0xc000001a - "Virtual memory cannot be freed."
*/
DODEBUG(dcontext->expect_last_syscall_to_fail = true;);
LOG(THREAD, LOG_SYSCALLS|LOG_VMAREAS, 1,
"syscall: NtFreeVirtualMemory base="PFX", size="PIFX
" too large should fail \n", base, size);
return;
}
}
});
LOG(THREAD, LOG_SYSCALLS|LOG_VMAREAS, 1,
"syscall: NtFreeVirtualMemory base="PFX" size="PIFX"\n",
base, size);
DOLOG(1, LOG_SYSCALLS|LOG_VMAREAS, {
char buf[MAXIMUM_PATH];
get_module_name(base, buf, sizeof(buf));
if (buf[0] != '\0') {
LOG(THREAD, LOG_SYSCALLS|LOG_VMAREAS, 2,
"\tNtFreeVirtualMemory called on module %s\n", buf);
ASSERT_CURIOSITY(false && "NtFreeVirtualMemory called on module");
/* should switch to PE name and then can do this at loglevel 0 */
}
});
DOLOG(1, LOG_MEMSTATS, {
/* snapshots are heavyweight, so do rarely */
if (size > SNAPSHOT_THRESHOLD)
mem_stats_snapshot();
});
align_page_boundary(dcontext, &base, &size);
ASSERT_BUG_NUM(4511, ALIGNED(base, PAGE_SIZE) && ALIGNED(size, PAGE_SIZE));
/* ref case 5518 - we need to keep track if the thread stack is freed */
if (type == MEM_RELEASE) {
check_for_stack_free(dcontext, base, size);
}
if (type == MEM_RELEASE &&
TEST(ASLR_HEAP_FILL, DYNAMO_OPTION(aslr))) {
/* We free our allocation before the application
* reservation is released. Not a critical failure if the
* application free fails but we have freed our pad. Also
* avoids fragmentation if a racy allocation.
*/
aslr_pre_process_free_virtual_memory(dcontext, base, size);
/* note we handle the untracked stack free in
* os_thread_stack_exit() */
}
app_memory_deallocation(dcontext, base, size,
false /* don't own thread_initexit_lock */,
false /* not image */);
}
/* NtProtectVirtualMemory */
static bool /* returns whether to execute syscall */
presys_ProtectVirtualMemory(dcontext_t *dcontext, reg_t *param_base)
{
priv_mcontext_t *mc = get_mcontext(dcontext);
HANDLE process_handle = (HANDLE) sys_param(dcontext, param_base, 0);
void **pbase = (void **) sys_param(dcontext, param_base, 1);
size_t *psize = (size_t *) sys_param(dcontext, param_base, 2);
uint prot = (uint) sys_param(dcontext, param_base, 3);
uint *oldprot = (uint *) sys_param(dcontext, param_base, 4);
app_pc base;
size_t size;
uint old_memprot = MEMPROT_NONE; /* for SUBSET_APP_MEM_PROT_CHANGE
* or PRETEND_APP_MEM_PROT_CHANGE */
uint subset_memprot = MEMPROT_NONE; /* for SUBSET_APP_MEM_PROT_CHANGE */
if (!safe_read(pbase, sizeof(base), &base) ||
!safe_read(psize, sizeof(size), &size)) {
/* we expect the system call to fail */
DODEBUG(dcontext->expect_last_syscall_to_fail = true;);
return true;
}
LOG(THREAD, LOG_SYSCALLS|LOG_VMAREAS, 1,
"syscall: NtProtectVirtualMemory process="PFX" base="PFX" size="
PIFX" prot=%s 0x%x\n", process_handle, base, size, prot_string(prot), prot);
if (is_phandle_me(process_handle)) {
uint res;
/* go to page boundaries, since windows lets you pass non-aligned
* values, unlike Linux
*/
/* FIXME: use align_page_boundary(dcontext, &base, &size) instead */
if (!ALIGNED(base, PAGE_SIZE) || !ALIGNED(base+size, PAGE_SIZE)) {
/* need to cover all pages between base and base + size */
size = ALIGN_FORWARD(base+size, PAGE_SIZE) - PAGE_START(base);
base = (app_pc) PAGE_START(base);
LOG(THREAD, LOG_SYSCALLS|LOG_VMAREAS, 2,
"\tpage boundaries => base="PFX" size="PIFX"\n", base, size);
}
DOLOG(1, LOG_SYSCALLS|LOG_VMAREAS, {
char module_name[MAX_MODNAME_INTERNAL];
if (os_get_module_name_buf(base, module_name,
BUFFER_SIZE_ELEMENTS(module_name)) > 0) {
LOG(THREAD, LOG_SYSCALLS|LOG_VMAREAS, 2,
"\tNtProtectVirtualMemory called on module %s\n", module_name);
}
});
#ifdef DGC_DIAGNOSTICS
DOLOG(1, LOG_VMAREAS, {
dump_callstack(POST_SYSCALL_PC(dcontext),
(app_pc) mc->xbp, THREAD,
DUMP_NOT_XML);
});
#endif
res = app_memory_protection_change(dcontext, base, size,
osprot_to_memprot(prot),
&subset_memprot,
&old_memprot);
if (res != DO_APP_MEM_PROT_CHANGE) {
/* from experimentation it seems to return
* STATUS_CONFLICTING_ADDRESSES
* rather than STATUS_NOT_COMMITTED for invalid memory
*/
if (res == FAIL_APP_MEM_PROT_CHANGE) {
SET_RETURN_VAL(dcontext, STATUS_CONFLICTING_ADDRESSES);
} else if (res == PRETEND_APP_MEM_PROT_CHANGE ||
res == SUBSET_APP_MEM_PROT_CHANGE) {
/*
* FIXME: is alternative of letting it go through and undoing in
* post-handler simpler and safer (here we have to emulate kernel
* behavior), if we remove +w flag to avoid other-thread issues?
*/
uint pretend_oldprot;
uint old_osprot = PAGE_NOACCESS;
SET_RETURN_VAL(dcontext, STATUS_SUCCESS);
if (res == SUBSET_APP_MEM_PROT_CHANGE) {
uint subset_osprot =
osprot_replace_memprot(prot, subset_memprot);
/* we explicitly make our system call. Although in this
* case we could change the application arguments as
* well, in general it is not nice to the application
* to change IN arguments.
*/
bool ok =
nt_remote_protect_virtual_memory(process_handle,
base, size,
subset_osprot, &old_osprot);
/* using app's handle in case it has different rights that current thread */
ASSERT_CURIOSITY(process_handle == NT_CURRENT_PROCESS);
ASSERT_CURIOSITY(ok);
/* we'll keep going anyways as if it would have worked */
} else {
ASSERT_NOT_TESTED();
ASSERT(res == PRETEND_APP_MEM_PROT_CHANGE);
/* pretend it worked but don't execute system call */
old_osprot = get_current_protection(base);
}
/* Today we base on the current actual flags
* (old_osprot), and preserve WRITECOPY and other
* unlikely original flags.
*
* We should be using our value for what the correct
* view of the application memory should be. case
* 10437 we should be able to transparently carry the
* original protection flags across multiple calls to
* NtProtectVirtualMemory.
*/
pretend_oldprot = osprot_replace_memprot(old_osprot,
old_memprot);
/* have to set OUT vars properly */
/* size and base were already aligned up above */
ASSERT(ALIGNED(size, PAGE_SIZE));
ASSERT(ALIGNED(base, PAGE_SIZE));
LOG(THREAD, LOG_SYSCALLS|LOG_VMAREAS, 1,
"skipping NtProtectVirtualMemory, returning base="PFX", size="
PIFX", oldprot=%s 0x%x\n",
base, size, prot_string(pretend_oldprot), pretend_oldprot);
/* FIXME: we really should be _probing_ these writes
* to make sure not targeting DR addresses when
* PROTECT_FROM_APP
*/
safe_write(oldprot, sizeof(pretend_oldprot), &pretend_oldprot);
safe_write(pbase, sizeof(base), &base);
safe_write(psize, sizeof(size), &size);
} else {
ASSERT_NOT_REACHED();
}
return false; /* do not execute system call */
} else {
/* FIXME i#143: we still need to tweak the returned oldprot (in
* post-syscall) for writable areas we've made read-only
*/
/* FIXME: ASSERT here that have not modified size unless using, e.g. fix_unsafe_hooker */
}
} else {
/* FIXME: should we try to alert any dynamo running the other process?
*/
LOG(THREAD, LOG_SYSCALLS|LOG_VMAREAS, 1,
"WARNING: ProtectVirtualMemory called on process "PFX" %d\n",
process_handle, process_id_from_handle(process_handle));
/* this actually happens (e.g., in calc.exe's winhlp popups)
* so don't die here with IPC_ALERT
*/
}
return true;
}
/* NtMapViewOfSection */
static void
presys_MapViewOfSection(dcontext_t *dcontext, reg_t *param_base)
{
DODEBUG({
priv_mcontext_t *mc = get_mcontext(dcontext);
HANDLE section_handle = (HANDLE) sys_param(dcontext, param_base, 0);
/* trying to make sure we're tracking properly all section
* handles
*
* Unfortunately SHELL32!SHChangeRegistration_Create seems
* to be using sections to communicate with explorer.exe
* and sends a message via sending a duplicate section
* handle, and likely receives a message back in a
* similarly duplicated handle from the other process.
* Hard to match that particular call so cannot keep a
* CURIOSITY here.
*
* Note we also wouldn't like some global handle being used by
* different threads as well, or any other unusually nested
* use of NtCreateSection/NtOpenSection before NtMapViewOfSection.
*
* For non-image sections accessed via OpenSection rather than CreateSection,
* we do NOT have the file name here, but we can get it once we have a mapping
* via MemorySectionName: plus we don't care about non-images. But, we don't
* have a test for image here, so we leave this LOG note.
*/
const char *file = section_to_file_lookup(section_handle);
if (file == NULL &&
section_handle != dcontext->aslr_context.randomized_section_handle) {
LOG(THREAD, LOG_SYSCALLS|LOG_VMAREAS,
2, "syscall: NtMapViewOfSection unusual section mapping\n");
}
if (file != NULL)
dr_strfree(file HEAPACCT(ACCT_VMAREAS));
});
/* no pre-processing needed except for ASLR */
if (TESTANY(ASLR_DLL|ASLR_MAPPED, DYNAMO_OPTION(aslr))) {
aslr_pre_process_mapview(dcontext);
}
}
/* NtUnmapViewOfSection{,Ex} */
static void
presys_UnmapViewOfSection(dcontext_t *dcontext, reg_t *param_base, int sysnum)
{
/* This is what actually removes a dll from memory */
priv_mcontext_t *mc = get_mcontext(dcontext);
HANDLE process_handle = (HANDLE) sys_param(dcontext, param_base, 0);
app_pc base = (app_pc) sys_param(dcontext, param_base, 1);
app_pc real_base;
size_t size = get_allocation_size(base, &real_base);
MEMORY_BASIC_INFORMATION mbi;
if (sysnum == syscalls[SYS_UnmapViewOfSectionEx]) {
ptr_int_t arg3 = (ptr_int_t) sys_param(dcontext, param_base, 2);
/* FIXME i#899: new Win8 syscall w/ 3rd arg that's 0 by default.
* We want to know when we see non-zero so we have some code to study.
*/
ASSERT_CURIOSITY(arg3 == 0 && "i#899: unknown new param");
}
LOG(THREAD, LOG_SYSCALLS|LOG_VMAREAS, 1,
"syscall: NtUnmapViewOfSection "PFX" size="PIFX"\n", base, size);
if (!is_phandle_me(process_handle)) {
IPC_ALERT("ERROR: UnmapViewOfSection on another process");
return;
}
/* check for args we expect to fail, ref case 545, 3697, on east coast
* xp server shell32 dllmain process attach calls kernel32
* CreateActCtxW which ends up calling this with an unaligned pointer
* into private memory (which is suspicously just a few bytes under
* the base address of a recently freed mapped region) */
/* Don't worry about the query_virtual_memory cost, we are already
* doing a ton of them for the get_allocation_size and process_mmap
* calls */
if (query_virtual_memory(base, &mbi, sizeof(mbi)) != sizeof(mbi) ||
(mbi.Type != MEM_IMAGE && mbi.Type != MEM_MAPPED)) {
DODEBUG(dcontext->expect_last_syscall_to_fail = true;);
return;
}
/* people don't always call with the actual base address (see east
* coast xp server (sp1) whose uxtheme.dll CThemeSignature::
* CalculateHash always calls this with base+0x130, is hardcoded in
* the assembly). OS doesn't seem to care as the syscall still
* succeeds. */
if (base != mbi.AllocationBase) {
LOG(THREAD, LOG_SYSCALLS|LOG_VMAREAS, 1,
"syscall: NtUnmapViewOfSection real base is "PFX"\n",
mbi.AllocationBase);
base = mbi.AllocationBase;
}
DOLOG(1, LOG_MEMSTATS, {
/* snapshots are heavyweight, so do rarely */
if (size > SNAPSHOT_THRESHOLD)
mem_stats_snapshot();
});
RSTATS_INC(num_app_munmaps);
/* we have to mark before any policy processing gets started */
/* FIXME: we could also allow MEM_MAPPED areas here, since .B
* policies may in fact allow such to be executable areas, but
* since we can keep track of only one, focusing on MEM_IMAGE only
*/
if (DYNAMO_OPTION(unloaded_target_exception) &&
mbi.Type == MEM_IMAGE) {
mark_unload_start(base, size);
}
if (TESTANY(ASLR_DLL|ASLR_MAPPED, DYNAMO_OPTION(aslr))) {
aslr_pre_process_unmapview(dcontext, base, size);
}
process_mmap(dcontext, base, size, false/*unmap*/, NULL);
}
/* NtFlushInstructionCache */
static void
presys_FlushInstructionCache(dcontext_t *dcontext, reg_t *param_base)
{
/* This syscall is from the days when Windows ran on multiple
* architectures, but many apps still use it
*/
priv_mcontext_t *mc = get_mcontext(dcontext);
HANDLE process_handle = (HANDLE) sys_param(dcontext, param_base, 0);
app_pc base = (app_pc) sys_param(dcontext, param_base, 1);
size_t size = (size_t) sys_param(dcontext, param_base, 2);
#ifdef PROGRAM_SHEPHERDING
uint prot;
#endif
/* base can be NULL, in which case size is meaningless
* loader calls w/ NULL & 0 on rebasing -- means entire icache?
*/
LOG(THREAD, LOG_SYSCALLS|LOG_VMAREAS, 1,
"syscall: NtFlushInstructionCache "PFX" size="PIFX"\n", base, size);
if (base == NULL)
return;
if (is_phandle_me(process_handle)) {
#ifdef DGC_DIAGNOSTICS
DOLOG(1, LOG_VMAREAS, {
dump_callstack(POST_SYSCALL_PC(dcontext),
(app_pc) mc->xbp, THREAD,
DUMP_NOT_XML);
});
#endif
#ifdef PROGRAM_SHEPHERDING
prot = osprot_to_memprot(get_current_protection(base));
app_memory_flush(dcontext, base, size, prot);
#endif
} else {
/* FIXME: should we try to alert any dynamo running the other process?
* no reason to ASSERT here, not critical like alloc/dealloc in other process
*/
LOG(THREAD, LOG_SYSCALLS|LOG_VMAREAS, 2,
"WARNING: NtFlushInstructionCache on another process\n");
}
}
/* NtCreateSection */
static void
presys_CreateSection(dcontext_t *dcontext, reg_t *param_base)
{
/* a section is an object that can be mmapped */
priv_mcontext_t *mc = get_mcontext(dcontext);
HANDLE *section_handle = (HANDLE*) sys_param(dcontext, param_base, 0);
uint access_mask = (uint) sys_param(dcontext, param_base, 1);
POBJECT_ATTRIBUTES obj = (POBJECT_ATTRIBUTES) sys_param(dcontext, param_base, 2);
void *size = (void *) sys_param(dcontext, param_base, 3);
uint protect = (uint) sys_param(dcontext, param_base, 4);
uint attributes = (uint) sys_param(dcontext, param_base, 5);
HANDLE file_handle = (HANDLE) sys_param(dcontext, param_base, 6);
LOG(THREAD, LOG_SYSCALLS|LOG_VMAREAS, 2,
"syscall: NtCreateSection protect 0x%x, attributes 0x%x, file "PIFX"\n",
protect, attributes, file_handle);
DODEBUG({
if (obj != NULL && obj->ObjectName != NULL) {
DEBUG_DECLARE(char buf[MAXIMUM_PATH];)
/* convert name from unicode to ansi */
wchar_t *name = obj->ObjectName->Buffer;
_snprintf(buf, BUFFER_SIZE_ELEMENTS(buf), "%S", name);
NULL_TERMINATE_BUFFER(buf);
LOG(THREAD, LOG_SYSCALLS|LOG_VMAREAS, 2,
"syscall: NtCreateSection %s\n", buf);
} else {
LOG(THREAD, LOG_SYSCALLS|LOG_VMAREAS, 2,
"syscall: NtCreateSection\n");
}
});
}
/* NtClose */
static void
presys_Close(dcontext_t *dcontext, reg_t *param_base)
{
if (DYNAMO_OPTION(track_module_filenames)) {
HANDLE handle = (HANDLE) sys_param(dcontext, param_base, 0);
if (section_to_file_remove(handle)) {
LOG(THREAD, LOG_SYSCALLS|LOG_VMAREAS, 2,
"syscall: NtClose of section handle "PFX"\n", handle);
}
}
}
#ifdef DEBUG
/* NtOpenFile */
static void
presys_OpenFile(dcontext_t *dcontext, reg_t *param_base)
{
priv_mcontext_t *mc = get_mcontext(dcontext);
HANDLE *file_handle = (HANDLE*) sys_param(dcontext, param_base, 0);
uint access_mask = (uint) sys_param(dcontext, param_base, 1);
POBJECT_ATTRIBUTES obj = (POBJECT_ATTRIBUTES) sys_param(dcontext, param_base, 2);
void *status = (void *) sys_param(dcontext, param_base, 3);
uint share = (uint) sys_param(dcontext, param_base, 4);
uint options = (uint) sys_param(dcontext, param_base, 5);
if (obj != NULL) {
/* convert name from unicode to ansi */
char buf[MAXIMUM_PATH];
wchar_t *name = obj->ObjectName->Buffer;
/* not always null-terminated */
_snprintf(buf, MIN(obj->ObjectName->Length/sizeof(obj->ObjectName->Buffer[0]),
BUFFER_SIZE_ELEMENTS(buf)),
"%S", name);
NULL_TERMINATE_BUFFER(buf);
LOG(THREAD, LOG_SYSCALLS|LOG_VMAREAS, 2,
"syscall: NtOpenFile %s\n", buf);
} else {
LOG(THREAD, LOG_SYSCALLS|LOG_VMAREAS, 2,
"syscall: NtOpenFile\n");
}
}
#endif
int
os_normalized_sysnum(int num_raw, instr_t *gateway, dcontext_t *dcontext_live)
{
return num_raw;
}
/* WARNING: flush_fragments_and_remove_region assumes that pre and post system
* call handlers do not examine or modify fcache or its fragments in any
* way except for calling flush_fragments_and_remove_region!
*/
bool
pre_system_call(dcontext_t *dcontext)
{
bool execute_syscall = true;
priv_mcontext_t *mc = get_mcontext(dcontext);
int sysnum = (int) mc->xax;
reg_t *param_base = pre_system_call_param_base(mc);
where_am_i_t old_whereami = dcontext->whereami;
dcontext->whereami = WHERE_SYSCALL_HANDLER;
IF_X64(ASSERT_TRUNCATE(sysnum, int, mc->xax));
DODEBUG(dcontext->expect_last_syscall_to_fail = false;);
KSTART(pre_syscall);
RSTATS_INC(pre_syscall);
DOSTATS({
if (ignorable_system_call(sysnum, NULL, dcontext))
STATS_INC(pre_syscall_ignorable);
});
LOG(THREAD, LOG_SYSCALLS, 2, "system call: sysnum = "PFX", param_base = "PFX"\n",
sysnum, param_base);
#ifdef DEBUG
DOLOG(2, LOG_SYSCALLS, {
dump_mcontext(mc, THREAD, false/*not xml*/);
});
/* we can't pass other than a numeric literal anymore */
LOG(THREAD, LOG_SYSCALLS, 3, "\tparam 0: "PFX"\n", sys_param(dcontext, param_base, 0));
LOG(THREAD, LOG_SYSCALLS, 3, "\tparam 1: "PFX"\n", sys_param(dcontext, param_base, 1));
LOG(THREAD, LOG_SYSCALLS, 3, "\tparam 2: "PFX"\n", sys_param(dcontext, param_base, 2));
LOG(THREAD, LOG_SYSCALLS, 3, "\tparam 3: "PFX"\n", sys_param(dcontext, param_base, 3));
LOG(THREAD, LOG_SYSCALLS, 3, "\tparam 4: "PFX"\n", sys_param(dcontext, param_base, 4));
LOG(THREAD, LOG_SYSCALLS, 3, "\tparam 5: "PFX"\n", sys_param(dcontext, param_base, 5));
LOG(THREAD, LOG_SYSCALLS, 3, "\tparam 6: "PFX"\n", sys_param(dcontext, param_base, 6));
LOG(THREAD, LOG_SYSCALLS, 3, "\tparam 7: "PFX"\n", sys_param(dcontext, param_base, 7));
LOG(THREAD, LOG_SYSCALLS, 3, "\tparam 8: "PFX"\n", sys_param(dcontext, param_base, 8));
DOLOG(3, LOG_SYSCALLS, {
/* ebp isn't in mcontext right now, so pass ebp */
dump_callstack(POST_SYSCALL_PC(dcontext), (app_pc) mc->xbp, THREAD,
DUMP_NOT_XML);
});
#endif
/* save key register values for post_system_call (they get clobbered
* in syscall itself)
* FIXME: our new stateless asynch handling means that these values
* are wrong when we finally return to an interrupted syscall, so post-processing
* looks at the wrong system call!
* Fortunately it always looks at NtContinue, and we haven't yet implemented
* NtContinue failure
* We need fields analogous to asynch_target: asynch_sys_num and
* asynch_param_base. Unlike callbacks only one outstanding return-to point
* can exist. Let's do this when we go and make our syscall failure handling
* more robust. (This is case 1501)
*/
dcontext->sys_num = sysnum;
dcontext->sys_param_base = param_base;
#ifdef X64
/* save params that are in registers */
dcontext->sys_param0 = sys_param(dcontext, param_base, 0);
dcontext->sys_param1 = sys_param(dcontext, param_base, 1);
dcontext->sys_param2 = sys_param(dcontext, param_base, 2);
dcontext->sys_param3 = sys_param(dcontext, param_base, 3);
#endif
if (sysnum == syscalls[SYS_Continue]) {
CONTEXT *cxt = (CONTEXT *) sys_param(dcontext, param_base, 0);
/* FIXME : we are going to read and write to cxt, which may be unsafe */
int flag = (int) sys_param(dcontext, param_base, 1);
LOG(THREAD, LOG_SYSCALLS|LOG_ASYNCH, IF_DGCDIAG_ELSE(1, 2),
"syscall: NtContinue cxt->Xip="PFX" flag="PFX"\n",
cxt->CXT_XIP, flag);
intercept_nt_continue(cxt, flag);
}
else if (sysnum == syscalls[SYS_CallbackReturn]) {
presys_CallbackReturn(dcontext, param_base);
}
else if (sysnum == syscalls[SYS_SetContextThread]) {
execute_syscall = presys_SetContextThread(dcontext, param_base);
}
else if (sysnum == syscalls[SYS_CreateProcess]) {
presys_CreateProcess(dcontext, param_base, false/*!Ex*/);
}
else if (sysnum == syscalls[SYS_CreateProcessEx]) {
presys_CreateProcess(dcontext, param_base, true/*Ex*/);
}
#ifdef DEBUG
else if (sysnum == syscalls[SYS_CreateUserProcess]) {
presys_CreateUserProcess(dcontext, param_base);
}
#endif
else if (sysnum == syscalls[SYS_CreateThread]) {
presys_CreateThread(dcontext, param_base);
}
else if (sysnum == syscalls[SYS_CreateThreadEx]) {
presys_CreateThreadEx(dcontext, param_base);
}
else if (sysnum == syscalls[SYS_CreateWorkerFactory]) {
presys_CreateWorkerFactory(dcontext, param_base);
}
else if (sysnum == syscalls[SYS_SuspendThread]) {
HANDLE thread_handle= (HANDLE) sys_param(dcontext, param_base, 0);
thread_id_t tid = thread_id_from_handle(thread_handle);
LOG(THREAD, LOG_SYSCALLS|LOG_THREADS, IF_DGCDIAG_ELSE(1, 2),
"syscall: NtSuspendThread tid=%d\n", tid);
if (SELF_PROTECT_ON_CXT_SWITCH) {
/* This thread must make it back out of the cache for post-syscall
* processing, regardless of what locks target thread holds at
* suspension point, so we have to turn off our cxt switch hooks
* (see case 4942)
*/
dcontext->ignore_enterexit = true;
}
}
else if (sysnum == syscalls[SYS_ResumeThread]) {
presys_ResumeThread(dcontext, param_base);
}
#ifdef DEBUG
else if (sysnum == syscalls[SYS_AlertResumeThread]) {
HANDLE thread_handle= (HANDLE) sys_param(dcontext, param_base, 0);
thread_id_t tid = thread_id_from_handle(thread_handle);
LOG(THREAD, LOG_SYSCALLS|LOG_THREADS, IF_DGCDIAG_ELSE(1, 2),
"syscall: NtAlertResumeThread tid=%d\n", tid);
}
#endif
else if (sysnum == syscalls[SYS_TerminateProcess]) {
execute_syscall = presys_TerminateProcess(dcontext, param_base);
}
else