blob: a7d86289a21c07e6a0ddd1c786098b17576b2d7d [file] [log] [blame]
/* **********************************************************
* Copyright (c) 2010-2014 Google, Inc. All rights reserved.
* Copyright (c) 2003-2010 VMware, Inc. All rights reserved.
* **********************************************************/
/*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* * Neither the name of VMware, Inc. nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
/* Copyright (c) 2003-2007 Determina Corp. */
/*
* ntdll.c
* Routines for calling Windows system calls via the ntdll.dll wrappers.
*
* This file is used by the main library, the preinject library, and the
* standalone injector.
*/
#include "configure.h"
#ifdef NOT_DYNAMORIO_CORE
# define ASSERT(x)
# define ASSERT_CURIOSITY(x)
# define ASSERT_NOT_REACHED()
# define ASSERT_NOT_IMPLEMENTED(x)
# define DODEBUG(x)
# define DOCHECK(n, x)
# define DEBUG_DECLARE(x)
# pragma warning(disable : 4210) //nonstandard extension used : function given file scope
# pragma warning( disable : 4204) //nonstandard extension used : non-constant aggregate initializer
# define INVALID_FILE INVALID_HANDLE_VALUE
# define snprintf _snprintf
# include <stdio.h> /* _snprintf */
#else
/* we include globals.h mainly for ASSERT, even though we're
* used by preinject.
* preinject just defines its own internal_error!
*/
# include "../globals.h"
# include "../module_shared.h"
#endif
/* We have to hack away things we use here that won't work for non-core */
#if defined(NOT_DYNAMORIO_CORE_PROPER) || defined(NOT_DYNAMORIO_CORE)
# undef ASSERT_OWN_NO_LOCKS
# define ASSERT_OWN_NO_LOCKS() /* who cares if not the core */
#endif
#include "ntdll.h"
#ifndef NOT_DYNAMORIO_CORE
# include "os_private.h"
#endif
#include <wchar.h> /* _snwprintf */
/* WARNING: these routines use the Native API, an undocumented API
* exported by ntdll.dll.
* It could change without warning with a new version of Windows.
*/
/* FIXME : combine NTPRINT with NTLOG */
/* must turn on VERBOSE in inject_shared.c as well since we're now
* using display_verbose_message() -- FIXME: link them automatically */
#if defined(NOT_DYNAMORIO_CORE_PROPER) || defined(NOT_DYNAMORIO_CORE)
# define VERBOSE 0
#else
# define VERBOSE 0
#endif
#if VERBOSE
/* in inject_shared.c: must turn on VERBOSE=1 there as well */
void display_verbose_message(char *format, ...);
# define NTPRINT(...) display_verbose_message(__VA_ARGS__)
#else
# define NTPRINT(...)
#endif
/* i#437 support ymm */
uint context_xstate = 0;
/* needed for injector and preinject, to avoid them requiring asm and syscalls */
#if defined(NOT_DYNAMORIO_CORE_PROPER) || defined(NOT_DYNAMORIO_CORE)
/* use ntdll wrappers for simplicity, to keep ntdll.c standalone */
# define GET_SYSCALL(name, ...) GET_NTDLL(Nt##name, (__VA_ARGS__))
# define GET_RAW_SYSCALL GET_SYSCALL
# define NT_SYSCALL(name, ...) Nt##name (__VA_ARGS__)
# define NTLOG(...)
#else
# define NTLOG LOG
/* Our own syscall wrapper to avoid relying on ntdll, for 4 reasons:
* 1) Maximum interoperability w/ ntdll hookers
* 2) Security by avoiding being disabled via ntdll being messed up
* 3) Early injection: although ntdll is already in the address space,
* this way we don't need the loader
* 4) Easier trampolines on ntdll syscall wrappers for handling native code
* (don't have to worry about DR syscalls going through the trampolines)
*
* For now we only use our own wrapper for syscalls in the app-relevant array.
* When we add the rest we can:
* 1) leave out of array, and dynamically determine
* (ideally using our own version of GetProcAddress)
* 2) add to array, then everything's consistent
* 3) eliminate array completely and always dynamically determine
* But, relying on dynamic determination means we won't work if there's a hook
* already placed there (losing a big advantage of our own wrappers), and
* dynamically determining doesn't give us that much more independence --
* we still need to manually verify each new ntdll for other types of
* syscall changes (new syscalls we care about, semantic changes, etc.)
*/
/* decides which of dynamorio_syscall_{int2e,sysenter,wow64} to use */
static enum {
DR_SYSCALL_INT2E,
DR_SYSCALL_SYSENTER,
DR_SYSCALL_SYSCALL,
DR_SYSCALL_WOW64,
} dr_which_syscall_t;
/* For x64 "raw syscalls", i.e., those we call directly w/o invoking the
* ntdll wrapper routine, we play some games with types to work more
* easily w/ the x64 calling convention:
*/
# define GET_RAW_SYSCALL(name, arg1, ...) \
GET_NTDLL(Nt##name, (arg1, __VA_ARGS__)); \
typedef NTSTATUS name##_type (int sysnum, arg1, __VA_ARGS__); \
typedef NTSTATUS name##_dr_type (int sys_enum, __VA_ARGS__, arg1)
# define GET_SYSCALL(name, ...) \
GET_NTDLL(Nt##name, (__VA_ARGS__)); \
typedef NTSTATUS name##_type (int sysnum, __VA_ARGS__)
/* FIXME - since it doesn't vary we could have a variable to store the dr
* syscall routine to use, but would be yet another function pointer in
* our data segment... */
/* We use the wrappers till the native_exec Nt hooks go in (at which point
* the options have been read) so that we can have sygate compatibility as a
* runtime option. */
/* For X64 sycall we need the 1st arg last to preserve the rest in their
* proper registers. If we ever support 0-arg syscalls here we'll
* need a separate macro for those.
* Any syscall called using this macro must be declared with GET_RAW_SYSCALL
* rather than GET_SYSCALL to get the types to match up.
*/
/* i#1011: We usually use NT_SYSCALL to invoke a system call. However,
* for system calls that do not exist in older Windows, e.g. NtOpenKeyEx,
* we use NT_RAW_SYSCALL to avoid static link and build failure.
*/
# define NT_RAW_SYSCALL(name, arg1, ...) \
((dr_which_syscall_t == DR_SYSCALL_WOW64) ? \
(!syscall_uses_edx_param_base() ? \
((name##_type *) dynamorio_syscall_wow64_noedx)(SYS_##name, arg1, __VA_ARGS__): \
(((name##_type *) dynamorio_syscall_wow64) (SYS_##name, arg1, __VA_ARGS__))): \
((IF_X64_ELSE(dr_which_syscall_t == DR_SYSCALL_SYSCALL, false)) ? \
((name##_dr_type *) IF_X64_ELSE(dynamorio_syscall_syscall, NULL)) \
(SYS_##name, __VA_ARGS__, arg1) : \
(((name##_type *) ((dr_which_syscall_t == DR_SYSCALL_SYSENTER) ? \
(DYNAMO_OPTION(dr_sygate_sysenter) ? \
dynamorio_syscall_sygate_sysenter : \
dynamorio_syscall_sysenter) : \
(DYNAMO_OPTION(dr_sygate_int) ? \
dynamorio_syscall_sygate_int2e : \
dynamorio_syscall_int2e))) \
(syscalls[SYS_##name], arg1, __VA_ARGS__))))
# define NT_SYSCALL(name, arg1, ...) \
(nt_wrappers_intercepted ? \
Nt##name(arg1, __VA_ARGS__) : \
NT_RAW_SYSCALL(name, arg1, __VA_ARGS__))
/* check syscall numbers without using any heap */
# ifdef X64
# define SYSNUM_OFFS 4
# else
# define SYSNUM_OFFS 1
# endif
# define CHECK_SYSNUM_AT(pc, idx) \
ASSERT(pc != NULL && \
(*((int *)((pc) + SYSNUM_OFFS)) == syscalls[idx] || ALLOW_HOOKER(pc) || \
(idx == SYS_TestAlert && *(uint*)(pc) == 0xe9505050))); /* xref 9288 */
/* assuming relative CTI's are the only one's used by hookers */
# define ALLOW_HOOKER(pc) (*(unsigned char*)(pc) == JMP_REL32_OPCODE || \
*(unsigned char*)(pc) == CALL_REL32_OPCODE)
/* FIXME: we'll evaluate pc multiple times in the above macro */
static void tls_exit(void);
#endif /* !NOT_DYNAMORIO_CORE_PROPER */
/****************************************************************************
* Defines only needed internally to this file
*/
/* TlsSlots offset is hardcoded into kernel32!TlsGetValue as 0xe10 on all
* 32-bit platforms we've seen, 0x1480 for 64-bit:
*/
#ifdef X64
# define TEB_TLS64_OFFSET 0x1480
#else
# define TEB_TLS64_OFFSET 0xe10
#endif
/***************************************************************************
* declarations for ntdll exports shared by several routines in this file
*/
GET_NTDLL(NtQueryInformationProcess, (IN HANDLE ProcessHandle,
IN PROCESSINFOCLASS ProcessInformationClass,
OUT PVOID ProcessInformation,
IN ULONG ProcessInformationLength,
OUT PULONG ReturnLength OPTIONAL));
GET_NTDLL(NtQueryInformationFile, (IN HANDLE FileHandle,
OUT PIO_STATUS_BLOCK IoStatusBlock,
OUT PVOID FileInformation,
IN ULONG FileInformationLength,
IN FILE_INFORMATION_CLASS FileInformationClass));
GET_NTDLL(NtQuerySection, (IN HANDLE SectionHandle,
IN SECTION_INFORMATION_CLASS SectionInformationClass,
OUT PVOID SectionInformation,
IN ULONG SectionInformationLength,
OUT PULONG ResultLength OPTIONAL));
GET_NTDLL(NtQueryInformationToken, (IN HANDLE TokenHandle,
IN TOKEN_INFORMATION_CLASS TokenInformationClass,
OUT PVOID TokenInformation,
IN ULONG TokenInformationLength,
OUT PULONG ReturnLength));
/* routines that we may hook if specified in
* syscall_requires_action[], all new routines can use GET_SYSCALL
* instead of GET_NTDLL if we provide the syscall numbers - see
* comments in GET_SYSCALL definition.
*/
GET_RAW_SYSCALL(QueryVirtualMemory,
IN HANDLE ProcessHandle,
IN const void *BaseAddress,
IN MEMORY_INFORMATION_CLASS MemoryInformationClass,
OUT PVOID MemoryInformation,
IN SIZE_T MemoryInformationLength,
OUT PSIZE_T ReturnLength OPTIONAL);
GET_RAW_SYSCALL(UnmapViewOfSection,
IN HANDLE ProcessHandle,
IN PVOID BaseAddress);
GET_RAW_SYSCALL(CreateSection,
OUT PHANDLE SectionHandle,
IN ACCESS_MASK DesiredAccess,
IN POBJECT_ATTRIBUTES ObjectAttributes,
IN PLARGE_INTEGER SectionSize OPTIONAL,
IN ULONG Protect,
IN ULONG Attributes,
IN HANDLE FileHandle);
GET_RAW_SYSCALL(OpenSection,
OUT PHANDLE SectionHandle,
IN ACCESS_MASK DesiredAccess,
IN POBJECT_ATTRIBUTES ObjectAttributes);
GET_RAW_SYSCALL(AllocateVirtualMemory,
IN HANDLE ProcessHandle,
IN OUT PVOID *BaseAddress,
IN ULONG ZeroBits,
IN OUT PSIZE_T AllocationSize,
IN ULONG AllocationType,
IN ULONG Protect);
GET_RAW_SYSCALL(FreeVirtualMemory,
IN HANDLE ProcessHandle,
IN OUT PVOID *BaseAddress,
IN OUT PSIZE_T FreeSize,
IN ULONG FreeType);
GET_RAW_SYSCALL(ProtectVirtualMemory,
IN HANDLE ProcessHandle,
IN OUT PVOID *BaseAddress,
IN OUT PSIZE_T ProtectSize,
IN ULONG NewProtect,
OUT PULONG OldProtect);
GET_RAW_SYSCALL(QueryInformationThread,
IN HANDLE ThreadHandle,
IN THREADINFOCLASS ThreadInformationClass,
OUT PVOID ThreadInformation,
IN ULONG ThreadInformationLength,
OUT PULONG ReturnLength OPTIONAL);
/* CreateFile is defined CreateFileW (Unicode) or CreateFileA (ANSI),
* undefine here for system call.
*/
#undef CreateFile
GET_RAW_SYSCALL(CreateFile,
OUT PHANDLE FileHandle,
IN ACCESS_MASK DesiredAccess,
IN POBJECT_ATTRIBUTES ObjectAttributes,
OUT PIO_STATUS_BLOCK IoStatusBlock,
IN PLARGE_INTEGER AllocationSize OPTIONAL,
IN ULONG FileAttributes,
IN ULONG ShareAccess,
IN ULONG CreateDisposition,
IN ULONG CreateOptions,
IN PVOID EaBuffer OPTIONAL,
IN ULONG EaLength);
GET_RAW_SYSCALL(CreateKey,
OUT PHANDLE KeyHandle,
IN ACCESS_MASK DesiredAccess,
IN POBJECT_ATTRIBUTES ObjectAttributes,
IN ULONG TitleIndex,
IN PUNICODE_STRING Class OPTIONAL,
IN ULONG CreateOptions,
OUT PULONG Disposition OPTIONAL);
GET_RAW_SYSCALL(OpenKey,
OUT PHANDLE KeyHandle,
IN ACCESS_MASK DesiredAccess,
IN POBJECT_ATTRIBUTES ObjectAttributes);
GET_RAW_SYSCALL(SetInformationFile,
IN HANDLE FileHandle,
OUT PIO_STATUS_BLOCK IoStatusBlock,
IN PVOID FileInformation,
IN ULONG FileInformationLength,
IN FILE_INFORMATION_CLASS FileInformationClass);
/* the same structure as _CONTEXT_EX in winnt.h */
typedef struct _context_chunk_t {
LONG offset;
DWORD length;
} context_chunk_t;
/* the same structure as _CONTEXT_CHUNK in winnt.h */
typedef struct _context_ex_t {
context_chunk_t all;
context_chunk_t legacy;
context_chunk_t xstate;
} context_ex_t;
/* XXX, the function below can be statically-linked if all versions of
* ntdll have the corresponding routine, which need to be checked, so we use
* get_proc_address to get instead here.
*/
typedef int (WINAPI *ntdll_RtlGetExtendedContextLength_t)(DWORD, int *);
typedef int (WINAPI *ntdll_RtlInitializeExtendedContext_t)
(PVOID, DWORD, context_ex_t **);
typedef CONTEXT* (WINAPI *ntdll_RtlLocateLegacyContext_t)(context_ex_t *, DWORD);
ntdll_RtlGetExtendedContextLength_t ntdll_RtlGetExtendedContextLength = NULL;
ntdll_RtlInitializeExtendedContext_t ntdll_RtlInitializeExtendedContext = NULL;
ntdll_RtlLocateLegacyContext_t ntdll_RtlLocateLegacyContext = NULL;
#if !defined(NOT_DYNAMORIO_CORE_PROPER) && !defined(NOT_DYNAMORIO_CORE)
/* Nt* routines that are not available on all versions of Windows */
typedef NTSTATUS (WINAPI *NtGetNextThread_t)(__in HANDLE ProcessHandle,
__in HANDLE ThreadHandle,
__in ACCESS_MASK DesiredAccess,
__in ULONG HandleAttributes,
__in ULONG Flags,
__out PHANDLE NewThreadHandle);
NtGetNextThread_t NtGetNextThread;
#endif
/***************************************************************************
* Implementation
*/
#if !defined(NOT_DYNAMORIO_CORE_PROPER) && !defined(NOT_DYNAMORIO_CORE)
/* for Sygate 5441 compatibility hack, we need a tls slot for NT_SYSCALL when
* using sysenter system calls */
uint sysenter_tls_offset = 0xffffffff; /* something that will fault */
/* will be set to false once the options are read but before the native_exec
* Nt* hooks are put in. Till then lets NT_SYSCALL know it's safe to call via
* the wrappers for Sygate compatibility before the option string is read in. */
static bool nt_wrappers_intercepted = true;
void
syscalls_init_options_read()
{
if (DYNAMO_OPTION(dr_sygate_sysenter)) {
tls_alloc(false /* don't grab lock */, &sysenter_tls_offset);
}
nt_wrappers_intercepted = false;
}
/* Called very early, prior to any system call use by us, making error
* reporting problematic once we have all syscalls requiring this!
* See windows_version_init() comments.
* The other problem w/ error reporting is that other code assumes
* that things are initialized -- that's all fixed now, with stats, dcontext,
* etc. checked for NULL in all the right places.
*/
bool
syscalls_init()
{
/* Determine which syscall routine to use
* We don't have heap available yet (no syscalls yet!) so
* we can't decode easily.
* FIXME: for app syscalls, we wait until we see one so we know
* the method being used -- should we move that decision up, since
* we're checking here for DR?
*/
/* pick a syscall that is unlikely to be hooked, ref case 5217 Sygate
* requires all int system call to occur in ntdll.dll or sysfer.dll
* so we borrow the int 2e from NtYieldExecution for system calls!
* (both our own and the apps via shared_syscall). The Nt* wrappers
* are stdcall so NtYieldExecution is convenient since it has zero
* args and is unlikely to be hooked. Ref case 5441, Sygate also sometimes
* verifies the top of the stack for sysenter system calls in a similar
* fashion (must be in ntdll/sysfer). For that we again borrow out of
* NtYieldExecution (this time just the ret) to fix up our stack. */
GET_NTDLL(NtYieldExecution, (VOID));
/* N.B.: if we change which syscall, for WOW64 the wrapper can change */
app_pc pc = (app_pc) NtYieldExecution;
app_pc int_target = pc + 9;
ushort check = *((ushort *)(int_target));
HMODULE ntdllh = get_ntdll_base();
if (!windows_version_init())
return false;
ASSERT(syscalls != NULL);
/* FIXME : ref case 5463, we should follow through to actual system call
* for sysenter cases to be sure os isn't actually using int because of
* old hardware not supporting sysenter */
/* check 10th and 11th bytes:
* int 2e: {2k}
* 77F97BFA: B8 BA 00 00 00 mov eax,0BAh
* 77F97BFF: 8D 54 24 04 lea edx,[esp+4]
* 77F97C03: CD 2E int 2Eh
* ret (stdcall)
* sysenter: {xpsp[0,1] 2k3sp0}
* 0x77f7eb23 b8 77 00 00 00 mov $0x00000077 -> %eax
* 0x77f7eb28 ba 00 03 fe 7f mov $0x7ffe0300 -> %edx
* 0x77f7eb2d ff d2 call %edx
* ret (stdcall)
* sysenter: {xpsp2 2k3sp1}
* 0x77f7eb23 b8 77 00 00 00 mov $0x00000077 -> %eax
* 0x77f7eb28 ba 00 03 fe 7f mov $0x7ffe0300 -> %edx
* 0x77f7eb2d ff 12 call [%edx]
* ret (stdcall)
* wow64 xp64 (case 3922):
* 7d61ce3f b843000000 mov eax,0x43
* 7d61ce44 b901000000 mov ecx,0x1
* 7d61ce49 8d542404 lea edx,[esp+0x4]
* 7d61ce4d 64ff15c0000000 call dword ptr fs:[000000c0]
* 7d61ce54 c3 ret
* x64 syscall (PR 215398):
* 00000000`78ef16c0 4c8bd1 mov r10,rcx
* 00000000`78ef16c3 b843000000 mov eax,43h
* 00000000`78ef16c8 0f05 syscall
* 00000000`78ef16ca c3 ret
* win8 sysenter w/ co-located "inlined" callee:
* 77d7422c b801000000 mov eax,1
* 77d74231 e801000000 call ntdll!NtYieldExecution+0xb (77d74237)
* 77d74236 c3 ret
* 77d74237 8bd4 mov edx,esp
* 77d74239 0f34 sysenter
* 77d7423b c3 ret
* win8 wow64 syscall (has no ecx):
* 777311bc b844000100 mov eax,10044h
* 777311c1 64ff15c0000000 call dword ptr fs:[0C0h]
* 777311c8 c3 ret
*/
if (check == 0x2ecd) {
dr_which_syscall_t = DR_SYSCALL_INT2E;
set_syscall_method(SYSCALL_METHOD_INT);
int_syscall_address = int_target;
/* ASSERT is simple ret (i.e. 0 args) */
ASSERT(*(byte *)(int_target + 2) == 0xc3 /* ret 0 */);
} else if (check == 0x8d00 ||
check == 0x0000/* win8 */) {
ASSERT(is_wow64_process(NT_CURRENT_PROCESS));
dr_which_syscall_t = DR_SYSCALL_WOW64;
set_syscall_method(SYSCALL_METHOD_WOW64);
if (check == 0x8d00) /* xp through win7 */
wow64_index = (int *) windows_XP_wow64_index;
DOCHECK(1, {
int call_start_offs = (check == 0x8d00) ? 5 : -4;
ASSERT(*((uint *)(int_target+call_start_offs)) == 0xc015ff64);
ASSERT(*((uint *)(int_target+call_start_offs+3)) == WOW64_TIB_OFFSET);
});
DOCHECK(1, {
/* We assume syscalls go through teb->WOW32Reserved */
TEB *teb = get_own_teb();
ASSERT(teb != NULL && teb->WOW32Reserved != NULL);
});
#ifdef X64 /* PR 205898 covers 32-bit syscall support */
} else if (check == 0xc305) {
dr_which_syscall_t = DR_SYSCALL_SYSCALL;
set_syscall_method(SYSCALL_METHOD_SYSCALL);
/* ASSERT is syscall */
ASSERT(*(byte *)(int_target - 1) == 0x0f);
#endif
} else if (check == 0xff7f) {
/* verifiy is call %edx or call [%edx] followed by ret 0 [0xc3] */
ASSERT(*((ushort *)(int_target+2)) == 0xc3d2 ||
*((ushort *)(int_target+2)) == 0xc312);
/* Double check use_ki_syscall_routines() matches type of ind call used */
ASSERT((!use_ki_syscall_routines() && *((ushort *)(int_target+1)) == 0xd2ff) ||
(use_ki_syscall_routines() && *((ushort *)(int_target+1)) == 0x12ff));
/* verify VSYSCALL_BOOTSTRAP_ADDR */
IF_X64(ASSERT_NOT_IMPLEMENTED(false));
ASSERT(*((uint *)(int_target-3)) == (uint)(ptr_uint_t)VSYSCALL_BOOTSTRAP_ADDR);
sysenter_ret_address = (app_pc)int_target+3; /* save addr of ret */
#ifdef CLIENT_INTERFACE
/* i#537: we do not support XPSP{0,1} wrt showing the skipped ret,
* which requires looking at the vsyscall code.
*/
KiFastSystemCallRet_address = (app_pc)
get_proc_address(ntdllh, "KiFastSystemCallRet");
#endif
set_syscall_method(SYSCALL_METHOD_SYSENTER);
dr_which_syscall_t = DR_SYSCALL_SYSENTER;
} else {
/* win8: call followed by ret */
ASSERT(check == 0xc300 || check == 0xc200);
IF_X64(ASSERT_NOT_IMPLEMENTED(false));
/* kernel returns control to KiFastSystemCallRet, not local sysenter, of course */
sysenter_ret_address = (app_pc) get_proc_address(ntdllh, "KiFastSystemCallRet");
ASSERT(sysenter_ret_address != NULL);
#ifdef CLIENT_INTERFACE
KiFastSystemCallRet_address = (app_pc)
get_proc_address(ntdllh, "KiFastSystemCallRet");
#endif
set_syscall_method(SYSCALL_METHOD_SYSENTER);
dr_which_syscall_t = DR_SYSCALL_SYSENTER;
}
/* Prime use_ki_syscall_routines() */
use_ki_syscall_routines();
/* quick sanity check that the syscall numbers we care about are what's
* in our static array. we still do our later full-decode sanity checks.
*/
DOCHECK(1, {
int i;
ASSERT(ntdllh != NULL);
for (i = 0; i < SYS_MAX; i++) {
if (syscalls[i] == SYSCALL_NOT_PRESENT)
continue;
/* note that this check allows a hooker so we'll need a
* better way of determining syscall numbers
*/
CHECK_SYSNUM_AT((byte *) get_proc_address(ntdllh, syscall_names[i]), i);
}
});
return true;
}
/* Returns true if machine is using the Ki*SysCall routines (indirection via vsyscall
* page), false otherwise.
*
* XXX: on win8, KiFastSystemCallRet is used, but KiFastSystemCall is never
* executed even though it exists. This routine returns true there (we have not
* yet set up the versions so can't just call get_os_version()).
*/
bool
use_ki_syscall_routines()
{
/* FIXME - two ways to do this. We could use the byte matching above in
* syscalls_init to match call edx vs call [edx] or we could check for the
* existence of the Ki*SystemCall* routines. We do the latter and have
* syscalls_init assert that the two methods agree. */
/* We use KiFastSystemCall, but KiIntSystemCall and KiFastSystemCallRet would
* work just as well. */
static generic_func_t ki_fastsyscall_addr = (generic_func_t)PTR_UINT_MINUS_1;
if (ki_fastsyscall_addr == (generic_func_t)PTR_UINT_MINUS_1) {
ki_fastsyscall_addr = get_proc_address(get_ntdll_base(), "KiFastSystemCall");
ASSERT(ki_fastsyscall_addr != (generic_func_t)PTR_UINT_MINUS_1);
}
return (ki_fastsyscall_addr != NULL);
}
static void
nt_get_context_extended_functions(app_pc base)
{
if (YMM_ENABLED()) { /* indicates OS support, not just processor support */
ntdll_RtlGetExtendedContextLength = (ntdll_RtlGetExtendedContextLength_t)
get_proc_address(base, "RtlGetExtendedContextLength");
ntdll_RtlInitializeExtendedContext =
(ntdll_RtlInitializeExtendedContext_t)
get_proc_address(base, "RtlInitializeExtendedContext");
ntdll_RtlLocateLegacyContext = (ntdll_RtlLocateLegacyContext_t)
get_proc_address(base, "RtlLocateLegacyContext");
ASSERT(ntdll_RtlGetExtendedContextLength != NULL &&
ntdll_RtlInitializeExtendedContext != NULL &&
ntdll_RtlLocateLegacyContext != NULL);
}
}
static void
nt_init_dynamic_syscall_wrappers(app_pc base)
{
NtGetNextThread = (NtGetNextThread_t) get_proc_address(base, "NtGetNextThread");
}
#endif /* !NOT_DYNAMORIO_CORE_PROPER */
void
ntdll_init()
{
/* FIXME: decode kernel32!TlsGetValue and get the real offset
* from there?
*/
ASSERT(offsetof(TEB, TlsSlots) == TEB_TLS64_OFFSET);
#if !defined(NOT_DYNAMORIO_CORE_PROPER) && !defined(NOT_DYNAMORIO_CORE)
nt_init_dynamic_syscall_wrappers((app_pc)get_ntdll_base());
nt_get_context_extended_functions((app_pc)get_ntdll_base());
#endif
}
/* note that this function is called even on the release fast exit path
* (via os_exit) and thus should only do necessary cleanup without ifdef
* DEBUG, but also be carefull about ifdef DEBUG since Detach wants to remove
* as much of us as possible
*/
void
ntdll_exit(void)
{
#if !defined(NOT_DYNAMORIO_CORE_PROPER) && !defined(NOT_DYNAMORIO_CORE)
tls_exit();
#endif
}
/* export this if needed elsewhere */
static NTSTATUS
query_thread_info(HANDLE h, THREAD_BASIC_INFORMATION *info)
{
NTSTATUS res;
ULONG got;
memset(info, 0, sizeof(THREAD_BASIC_INFORMATION));
res = NT_SYSCALL(QueryInformationThread, h, ThreadBasicInformation,
info, sizeof(THREAD_BASIC_INFORMATION), &got);
ASSERT(!NT_SUCCESS(res) || got == sizeof(THREAD_BASIC_INFORMATION));
return res;
}
/* Get a segment descriptor. This code assumes the selector is set
* appropriately in entry->Selector */
NTSTATUS
query_seg_descriptor(HANDLE hthread, DESCRIPTOR_TABLE_ENTRY *entry)
{
NTSTATUS res;
ULONG got;
res = NT_SYSCALL(QueryInformationThread, hthread, ThreadDescriptorTableEntry,
entry, sizeof(DESCRIPTOR_TABLE_ENTRY), &got);
/* This call only writes the LDT_ENTRY portion of the table entry */
ASSERT(!NT_SUCCESS(res) || got == sizeof(LDT_ENTRY));
return res;
}
/* Get a win32 start address. NOTE: According to Nebbet, the value
* retrieved with ThreadQuerySetWin32StartAddress is invalid if the
* thread has call ZwReplyWaitReplyPort or ZwReplyWaitReceivePort.
*/
NTSTATUS
query_win32_start_addr(HANDLE hthread, PVOID start_addr)
{
NTSTATUS res;
ULONG got;
res = NT_SYSCALL(QueryInformationThread, hthread, ThreadQuerySetWin32StartAddress,
start_addr, sizeof(app_pc), &got);
ASSERT(!NT_SUCCESS(res) || got == sizeof(PVOID));
return res;
}
/* Collects system information available through the NtQuerySystemInformation
* system call.
*/
NTSTATUS
query_system_info(IN SYSTEM_INFORMATION_CLASS info_class,
IN int info_size,
OUT PVOID info)
{
NTSTATUS result;
ULONG bytes_received = 0;
GET_NTDLL(NtQuerySystemInformation, (IN SYSTEM_INFORMATION_CLASS info_class,
OUT PVOID info,
IN ULONG info_size,
OUT PULONG bytes_received));
result = NtQuerySystemInformation(info_class, info, info_size, &bytes_received);
return result;
}
/* since not exporting get_own_teb() */
#ifndef NOT_DYNAMORIO_CORE
thread_id_t
get_thread_id()
{
return (thread_id_t) get_own_teb()->ClientId.UniqueThread;
}
process_id_t
get_process_id()
{
return (process_id_t) get_own_teb()->ClientId.UniqueProcess;
}
int
get_last_error()
{
return get_own_teb()->LastErrorValue;
}
void
set_last_error(int error)
{
get_own_teb()->LastErrorValue = error;
}
#endif /* !NOT_DYNAMORIO_CORE */
HANDLE
get_stderr_handle()
{
HANDLE herr = get_own_peb()->ProcessParameters->StdErrorHandle;
if (herr == NULL)
return INVALID_HANDLE_VALUE;
return herr;
}
HANDLE
get_stdout_handle()
{
HANDLE hout = get_own_peb()->ProcessParameters->StdOutputHandle;
if (hout == NULL)
return INVALID_HANDLE_VALUE;
return hout;
}
HANDLE
get_stdin_handle()
{
HANDLE hin = get_own_peb()->ProcessParameters->StdInputHandle;
if (hin == NULL)
return INVALID_HANDLE_VALUE;
return hin;
}
thread_exited_status_t
is_thread_exited(HANDLE hthread)
{
LARGE_INTEGER timeout;
wait_status_t result;
/* Keep the timeout small, just want to check if signaled. Don't want to wait at all
* really, but no way to specify that. Note negative => relative time offset (so is
* a 1 millisecond timeout). */
timeout.QuadPart = -((int)1 * TIMER_UNITS_PER_MILLISECOND);
if (thread_id_from_handle(hthread) == (thread_id_t)PTR_UINT_MINUS_1) {
/* not a thread handle */
ASSERT(false && "Not a valid thread handle.");
return THREAD_EXIT_ERROR;
}
if (!TEST(SYNCHRONIZE, nt_get_handle_access_rights(hthread))) {
/* Note that our own thread handles will have SYNCHRONIZE since, like
* THREAD_TERMINATE, that seems to be a right the thread can always get for
* itself (prob. due to how stacks are freed). So only a potential issue with
* app handles for which we try to dup with the required rights. xref 9529 */
HANDLE ht = INVALID_HANDLE_VALUE;
NTSTATUS res = duplicate_handle(NT_CURRENT_PROCESS, hthread, NT_CURRENT_PROCESS,
&ht, SYNCHRONIZE, 0, 0);
if (!NT_SUCCESS(res)) {
ASSERT_CURIOSITY(false && "Unable to check if thread has exited.");
return THREAD_EXIT_ERROR;
}
result = nt_wait_event_with_timeout(ht, &timeout);
close_handle(ht);
} else {
result = nt_wait_event_with_timeout(hthread, &timeout);
}
if (result == WAIT_SIGNALED)
return THREAD_EXITED;
if (result == WAIT_TIMEDOUT)
return THREAD_NOT_EXITED;
ASSERT(result == WAIT_ERROR);
ASSERT_CURIOSITY(false && "is_thread_exited() unknown error");
return THREAD_EXIT_ERROR;
}
/* The other ways to get thread info, like OpenThread and Toolhelp, don't
* let you go from handle to id (remember handles can be duplicated and
* there's no way to tell equivalence), plus are only on win2k.
* Returns POINTER_MAX on failure
*/
thread_id_t
thread_id_from_handle(HANDLE h)
{
THREAD_BASIC_INFORMATION info;
NTSTATUS res = query_thread_info(h, &info);
if (!NT_SUCCESS(res))
return POINTER_MAX;
else
return (thread_id_t) info.ClientId.UniqueThread;
}
/* export this if needed elsewhere */
static NTSTATUS
query_process_info(HANDLE h, PROCESS_BASIC_INFORMATION *info)
{
NTSTATUS res;
ULONG got;
memset(info, 0, sizeof(PROCESS_BASIC_INFORMATION));
res = NtQueryInformationProcess(h, ProcessBasicInformation,
info, sizeof(PROCESS_BASIC_INFORMATION), &got);
ASSERT(!NT_SUCCESS(res) || got == sizeof(PROCESS_BASIC_INFORMATION));
return res;
}
/* Returns POINTER_MAX on failure */
process_id_t
process_id_from_handle(HANDLE h)
{
PROCESS_BASIC_INFORMATION info;
NTSTATUS res = query_process_info(h, &info);
if (!NT_SUCCESS(res))
return POINTER_MAX;
else
return (process_id_t) info.UniqueProcessId;
}
/* Returns POINTER_MAX on failure */
process_id_t
process_id_from_thread_handle(HANDLE h)
{
THREAD_BASIC_INFORMATION info;
NTSTATUS res = query_thread_info(h, &info);
if (!NT_SUCCESS(res))
return POINTER_MAX;
else
return (process_id_t) info.ClientId.UniqueProcess;
}
HANDLE
process_handle_from_id(process_id_t pid)
{
NTSTATUS res;
HANDLE h;
OBJECT_ATTRIBUTES oa;
CLIENT_ID cid;
InitializeObjectAttributes(&oa, NULL, OBJ_CASE_INSENSITIVE, NULL, NULL);
memset(&cid, 0, sizeof(cid));
cid.UniqueProcess = (HANDLE) pid;
res = nt_raw_OpenProcess(&h, PROCESS_ALL_ACCESS, &oa, &cid);
if (!NT_SUCCESS(res)) {
NTPRINT("nt_open_process failed: %x\n", res);
}
if (!NT_SUCCESS(res))
return INVALID_HANDLE_VALUE;
else
return h;
}
#if !defined(NOT_DYNAMORIO_CORE_PROPER) && !defined(NOT_DYNAMORIO_CORE)
HANDLE
thread_handle_from_id(thread_id_t tid)
{
NTSTATUS res;
HANDLE h;
OBJECT_ATTRIBUTES oa;
CLIENT_ID cid;
InitializeObjectAttributes(&oa, NULL, OBJ_CASE_INSENSITIVE, NULL, NULL);
memset(&cid, 0, sizeof(cid));
cid.UniqueThread = (HANDLE) tid;
res = nt_raw_OpenThread(&h, THREAD_ALL_ACCESS, &oa, &cid);
if (!NT_SUCCESS(res)) {
NTPRINT("nt_open_thread failed: %x\n", res);
}
if (!NT_SUCCESS(res))
return INVALID_HANDLE_VALUE;
else
return h;
}
#endif
/* PEB:
* for a running thread this is stored at fs:[30h]
* it's always at 0x7FFDF000 according to InsideWin2k p.290
* but that's out of date, is randomized within 0x7ffd... on XPsp2
* so use query_process_info to get it
*/
PEB *
get_peb(HANDLE h)
{
PROCESS_BASIC_INFORMATION info;
NTSTATUS res = query_process_info(h, &info);
if (!NT_SUCCESS(res))
return NULL;
else
return info.PebBaseAddress;
}
PEB *
get_own_peb()
{
/* alt. we could use get_own_teb->PEBptr, but since we're remembering the
* results of the first lookup doesn't really gain us much */
static PEB *own_peb;
if (own_peb == NULL) {
own_peb = get_peb(NT_CURRENT_PROCESS);
ASSERT(own_peb != NULL);
}
return own_peb;
}
/****************************************************************************/
#ifndef NOT_DYNAMORIO_CORE
/* avoid needing CXT_ macros and SELF_TIB_OFFSET from os_exports.h */
TEB *
get_teb(HANDLE h)
{
THREAD_BASIC_INFORMATION info;
NTSTATUS res = query_thread_info(h, &info);
if (!NT_SUCCESS(res))
return NULL;
else
return (TEB *)info.TebBaseAddress;
}
static app_pc ntdll_base;
void *
get_ntdll_base(void)
{
if (ntdll_base == NULL) {
#ifndef NOT_DYNAMORIO_CORE_PROPER
ASSERT(!dr_earliest_injected); /* Ldr not initialized yet */
#endif
ntdll_base = (app_pc)get_module_handle(L"ntdll.dll");
ASSERT(ntdll_base != NULL);
}
return ntdll_base;
}
#if !defined(NOT_DYNAMORIO_CORE_PROPER) && !defined(NOT_DYNAMORIO_CORE)
/* for early injection we can't use get_module_handle() to find it */
void
set_ntdll_base(app_pc base)
{
if (ntdll_base == NULL)
ntdll_base = base;
}
/* get_allocation_size() in os.c */
bool
is_in_ntdll(app_pc pc)
{
static app_pc ntdll_end;
app_pc base = get_ntdll_base();
if (ntdll_end == NULL) {
ntdll_end = base + get_allocation_size(base, NULL);
ASSERT(ntdll_end > base);
}
return (pc >= base && pc < ntdll_end);
}
static bool
context_check_extended_sizes(context_ex_t *cxt_ex, uint flags)
{
return (cxt_ex->all.offset == -(LONG)sizeof(CONTEXT) &&
cxt_ex->legacy.offset == -(LONG)sizeof(CONTEXT) &&
(cxt_ex->legacy.length == (DWORD)sizeof(CONTEXT)
/* We won't allocate space for ExtendedRegisters if not saving xmm */
IF_NOT_X64(|| (!TESTALL(CONTEXT_XMM_FLAG, flags) &&
cxt_ex->legacy.length ==
(DWORD)offsetof(CONTEXT, ExtendedRegisters)))));
}
/* get the ymm saved area from CONTEXT extended area
* returns NULL if the extended area is not initialized.
*/
byte *
context_ymmh_saved_area(CONTEXT *cxt)
{
/* i#437: ymm are inside XSTATE construct which should be
* laid out like this: {CONTEXT, CONTEXT_EX, XSTATE}.
* The gap between CONTEXT_EX and XSTATE varies due to
* alignment, should read CONTEXT_EX fields to get it.
*/
ptr_uint_t p = (ptr_uint_t)cxt;
context_ex_t our_cxt_ex;
context_ex_t *cxt_ex = (context_ex_t *)(p + sizeof(*cxt));
ASSERT(proc_avx_enabled());
/* verify the dr_cxt_ex is correct */
if (safe_read(cxt_ex, sizeof(*cxt_ex), &our_cxt_ex)) {
if (!context_check_extended_sizes(&our_cxt_ex, cxt->ContextFlags)) {
ASSERT_CURIOSITY(false && "CONTEXT_EX is not setup correctly");
return NULL;
}
} else {
ASSERT_CURIOSITY(false && "fail to read CONTEXT_EX");
}
/* XXX: XSTATE has xsave format minus first 512 bytes, so ymm0
* should be at offset 64.
* Should we use kernel32!LocateXStateFeature() or
* ntdll!RtlLocateExtendedFeature() to locate,
* or cpuid to find Ext_Save_Area_2?
* Currently, use hardcode XSTATE_HEADER_SIZE.
* mcontext_to_context() also uses this to get back to the header.
*/
p = p + sizeof(*cxt) + cxt_ex->xstate.offset + XSTATE_HEADER_SIZE;
return (byte *)p;
}
/* routines for conversion between CONTEXT and priv_mcontext_t */
/* assumes our segment registers are the same as the app and that
* we never touch floating-point state and debug registers.
* Note that this code will not compile for non-core (no proc_has_feature())
* but is not currently used there.
*/
/* all we need is CONTEXT_INTEGER and non-segment CONTEXT_CONTROL,
* and for PR 264138 we need the XMM registers
*/
void
context_to_mcontext(priv_mcontext_t *mcontext, CONTEXT *cxt)
{
/* i#437: cxt might come from kernel where XSTATE is not set */
/* FIXME: This opens us up to a bug in DR where DR requests a CONTEXT but
* forgets to set XSTATE even though app has used it and we then mess up
* the app's ymm state. Any way we can detect that?
* One way is to pass a flag to indicate if the context is from kernel or
* set by DR, but it requires update a chain of calls.
*/
ASSERT(TESTALL(CONTEXT_DR_STATE_NO_YMM, cxt->ContextFlags));
/* CONTEXT_INTEGER */
mcontext->xax = cxt->CXT_XAX;
mcontext->xbx = cxt->CXT_XBX;
mcontext->xcx = cxt->CXT_XCX;
mcontext->xdx = cxt->CXT_XDX;
mcontext->xsi = cxt->CXT_XSI;
mcontext->xdi = cxt->CXT_XDI;
#ifdef X64
mcontext->r8 = cxt->R8;
mcontext->r9 = cxt->R9;
mcontext->r10 = cxt->R10;
mcontext->r11 = cxt->R11;
mcontext->r12 = cxt->R12;
mcontext->r13 = cxt->R13;
mcontext->r14 = cxt->R14;
mcontext->r15 = cxt->R15;
#endif
if (CONTEXT_PRESERVE_XMM) { /* no harm done if no sse support */
/* CONTEXT_FLOATING_POINT or CONTEXT_EXTENDED_REGISTERS */
int i;
for (i = 0; i < NUM_XMM_SLOTS; i++)
memcpy(&mcontext->ymm[i], CXT_XMM(cxt, i), XMM_REG_SIZE);
}
/* if XSTATE is NOT set, the app has NOT used any ymm state and
* thus it's fine if we do not copy dr_mcontext_t ymm value.
*/
if (CONTEXT_PRESERVE_YMM && TESTALL(CONTEXT_XSTATE, cxt->ContextFlags)) {
byte *ymmh_area = context_ymmh_saved_area(cxt);
if (ymmh_area != NULL) {
int i;
for (i = 0; i < NUM_XMM_SLOTS; i++) {
memcpy(&mcontext->ymm[i].u32[4],
&YMMH_AREA(ymmh_area, i).u32[0],
YMMH_REG_SIZE);
}
}
}
/* CONTEXT_CONTROL without the segments */
mcontext->xbp = cxt->CXT_XBP;
mcontext->xsp = cxt->CXT_XSP;
mcontext->xflags = cxt->CXT_XFLAGS;
mcontext->pc = (app_pc) cxt->CXT_XIP; /* including XIP */
}
/* If set_cur_seg is true, cs and ss (part of CONTEXT_CONTROL) are set to
* the current values.
* If mcontext_to_context is used to set another thread's context,
* the caller must initialize the cs/ss value properly and set
* set_cur_seg to false
*/
void
mcontext_to_context(CONTEXT *cxt, priv_mcontext_t *mcontext, bool set_cur_seg)
{
/* xref comment in context_to_mcontext */
ASSERT(TESTALL(CONTEXT_DR_STATE_NO_YMM, cxt->ContextFlags));
if (set_cur_seg) {
/* i#1033: initialize CONTEXT_CONTROL segments for current thread */
get_segments_cs_ss(&cxt->SegCs, &cxt->SegSs);
}
/* CONTEXT_INTEGER */
cxt->CXT_XAX = mcontext->xax;
cxt->CXT_XBX = mcontext->xbx;
cxt->CXT_XCX = mcontext->xcx;
cxt->CXT_XDX = mcontext->xdx;
cxt->CXT_XSI = mcontext->xsi;
cxt->CXT_XDI = mcontext->xdi;
#ifdef X64
cxt->R8 = mcontext->r8;
cxt->R9 = mcontext->r9;
cxt->R10 = mcontext->r10;
cxt->R11 = mcontext->r11;
cxt->R12 = mcontext->r12;
cxt->R13 = mcontext->r13;
cxt->R14 = mcontext->r14;
cxt->R15 = mcontext->r15;
#endif
if (CONTEXT_PRESERVE_XMM) { /* no harm done if no sse support */
/* CONTEXT_FLOATING_POINT or CONTEXT_EXTENDED_REGISTERS */
int i;
/* We can't set just xmm and not the rest of the fp state
* so we fill in w/ the current (unchanged by DR) values
* (i#462, i#457)
*/
byte fpstate_buf[MAX_FP_STATE_SIZE];
byte *fpstate = (byte*)ALIGN_FORWARD(fpstate_buf, 16);
size_t written = proc_save_fpstate(fpstate);
#ifdef X64
ASSERT(sizeof(cxt->FltSave) == written);
memcpy(&cxt->FltSave, fpstate, written);
/* We also have to set the x64-only duplicate top-level MxCsr field (i#1081) */
cxt->MxCsr = cxt->FltSave.MxCsr;
#else
ASSERT(MAXIMUM_SUPPORTED_EXTENSION == written);
memcpy(&cxt->ExtendedRegisters, fpstate, written);
#endif
/* Now update w/ the xmm values from mcontext */
for (i = 0; i < NUM_XMM_SLOTS; i++)
memcpy(CXT_XMM(cxt, i), &mcontext->ymm[i], XMM_REG_SIZE);
}
if (CONTEXT_PRESERVE_YMM && TESTALL(CONTEXT_XSTATE, cxt->ContextFlags)) {
byte *ymmh_area = context_ymmh_saved_area(cxt);
if (ymmh_area != NULL) {
uint64 *header_bv = (uint64 *) (ymmh_area - XSTATE_HEADER_SIZE);
uint bv_high, bv_low;
int i;
#ifndef X64
/* In 32-bit Windows mcontext, we do not preserve xmm/ymm 6 and 7,
* which are callee saved registers, so we must fill them.
*/
dr_ymm_t ymms[2];
dr_ymm_t *ymm_ptr = ymms;
__asm { mov ecx, ymm_ptr}
/* Some supported (old) compilers do not support/understand AVX
* instructions, so we use RAW bit here instead.
*/
# define HEX(n) 0##n##h
# define RAW(n) __asm _emit 0x##n
/* c5 fc 11 71 00 vmovups %ymm6 -> 0x00(%XCX)
* c5 fc 11 79 20 vmovups %ymm7 -> 0x20(%XCX)
*/
RAW(c5) RAW(fc) RAW(11) RAW(71) RAW(00);
RAW(c5) RAW(fc) RAW(11) RAW(79) RAW(20);
/* XMM6/7 has been copied above, so only copy ymmh here */
memcpy(&YMMH_AREA(ymmh_area, 6).u32[0], &ymms[0].u32[4], YMMH_REG_SIZE);
memcpy(&YMMH_AREA(ymmh_area, 7).u32[0], &ymms[1].u32[4], YMMH_REG_SIZE);
#endif
for (i = 0; i < NUM_XMM_SLOTS; i++) {
memcpy(&YMMH_AREA(ymmh_area, i).u32[0],
&mcontext->ymm[i].u32[4],
YMMH_REG_SIZE);
}
/* The only un-reserved part of the AVX header saved by OP_xsave is
* the XSTATE_BV byte.
*/
dr_xgetbv(&bv_high, &bv_low);
*header_bv = (((uint64)bv_high)<<32) | bv_low;
}
}
/* CONTEXT_CONTROL without the segments */
cxt->CXT_XBP = mcontext->xbp;
cxt->CXT_XSP = mcontext->xsp;
IF_X64(ASSERT_TRUNCATE(cxt->CXT_XFLAGS, uint, mcontext->xflags));
cxt->CXT_XFLAGS = (uint) mcontext->xflags;
cxt->CXT_XIP = (ptr_uint_t)mcontext->pc; /* including XIP */
}
#endif /* core proper */
#endif /* !NOT_DYNAMORIO_CORE */
/****************************************************************************/
/****************************************************************************/
#if !defined(NOT_DYNAMORIO_CORE_PROPER) && !defined(NOT_DYNAMORIO_CORE)
/* avoid needing x86_code.c from x86.asm from get_own_context_helper(),
/* unstatic for use by GET_OWN_CONTEXT macro */
void
get_own_context_integer_control(CONTEXT *cxt, reg_t cs, reg_t ss,
priv_mcontext_t *mc)
{
/* We could change the parameter types to cxt_seg_t, but the args
* passed by get_own_context_helper() in x86.asm are best simply
* widened in passing
*/
DEBUG_DECLARE(uint origflags = cxt->ContextFlags;)
IF_X64(ASSERT_TRUNCATE(cxt->SegCs, short, cs));
cxt->SegCs = (WORD) cs; /* FIXME : need to sanitize? */
IF_X64(ASSERT_TRUNCATE(cxt->SegSs, short, ss));
cxt->SegSs = (WORD) ss;
/* avoid assert in mcontext_to_context about not having xmm flags.
* get rid of this once we implement PR 266070. */
DODEBUG({ cxt->ContextFlags = CONTEXT_DR_STATE_NO_YMM; });
mcontext_to_context(cxt, mc, false /* !set_cur_seg */);
DODEBUG({ cxt->ContextFlags = origflags; });
}
/* don't call this directly, use GET_OWN_CONTEXT macro instead (it fills
* in CONTEXT_INTEGER and CONTEXT_CONTROL values) */
void
get_own_context(CONTEXT *cxt)
{
if (TEST(CONTEXT_SEGMENTS, cxt->ContextFlags)) {
get_segments_defg(&cxt->SegDs, &cxt->SegEs, &cxt->SegFs, &cxt->SegGs);
}
/* FIXME : do we want CONTEXT_DEBUG_REGISTERS or CONTEXT_FLOATING_POINT
* or CONTEXT_EXTENDED_REGISTERS at some point?
* Especially in light of PR 264138. However, no current uses need
* to get our own xmm registers.
*/
ASSERT_NOT_IMPLEMENTED((cxt->ContextFlags &
~(CONTEXT_SEGMENTS|CONTEXT_INTEGER|CONTEXT_CONTROL))
== 0);
}
/***************************************************************************
* TLS
*/
/* Lock that protects the tls_*_taken arrays */
DECLARE_CXTSWPROT_VAR(static mutex_t alt_tls_lock, INIT_LOCK_FREE(alt_tls_lock));
#define TLS_SPAREBYTES_SLOTS \
((offsetof(TEB, TxFsContext) - offsetof(TEB, SpareBytes1))/sizeof(void*))
static bool alt_tls_spare_taken[TLS_SPAREBYTES_SLOTS];
#ifdef X64
# define TLS_POSTTEB_SLOTS 64
static bool alt_tls_post_taken[TLS_POSTTEB_SLOTS];
/* Use the slots at the end of the 2nd page */
# define TLS_POSTTEB_BASE_OFFS (PAGE_SIZE*2 - TLS_POSTTEB_SLOTS*sizeof(void*))
#endif
static void
tls_exit(void)
{
#ifdef DEBUG
DELETE_LOCK(alt_tls_lock);
#endif
}
/* Caller must synchronize */
static bool
alt_tls_acquire_helper(bool *taken, size_t taken_sz, size_t base_offs,
uint *teb_offs /* OUT */, int num_slots, uint alignment)
{
bool res = false;
uint i, start = 0;
int slots_found = 0;
for (i = 0; i < taken_sz; i++) {
size_t offs = base_offs + i*sizeof(void*);
if (slots_found == 0 && !taken[i] &&
(alignment == 0 || ALIGNED(offs, alignment))) {
start = i;
slots_found++;
} else if (slots_found > 0) {
if (!taken[i])
slots_found++;
else
slots_found = 0; /* start over */
}
if (slots_found >= num_slots)
break;
}
if (slots_found >= num_slots) {
ASSERT_TRUNCATE(uint, uint, base_offs + start*sizeof(void*));
*teb_offs = (uint)(base_offs + start*sizeof(void*));
for (i = start; i < start + num_slots; i++) {
ASSERT(!taken[i]);
taken[i] = true;
DOCHECK(1, {
/* Try to check for anyone else using these slots. The TEB pages
* are zeroed before use. This is only a curiosity, as we don't
* zero on a release and thus a release-and-re-alloc can hit this.
*/
TEB *teb = get_own_teb();
ASSERT_CURIOSITY(is_region_memset_to_char((byte *)teb + *teb_offs,
num_slots * sizeof(void*), 0));
});
}
res = true;
}
return res;
}
static bool
alt_tls_acquire(uint *teb_offs /* OUT */, int num_slots, uint alignment)
{
bool res = false;
ASSERT(DYNAMO_OPTION(alt_teb_tls));
/* Strategy: first, use TEB->SpareBytes1. The only known user of that field
* is WINE, although Vista stole some of the space there for the TxFsContext
* slot, and maybe now that Win8 has just about used up the TEB single page
* for 32-bit future versions will take more?
*
* Second, on 64-bit, use space beyond the TEB on the 2nd TEB page.
*/
mutex_lock(&alt_tls_lock);
res = alt_tls_acquire_helper(alt_tls_spare_taken, TLS_SPAREBYTES_SLOTS,
offsetof(TEB, SpareBytes1), teb_offs, num_slots,
alignment);
#ifdef X64
if (!res) {
ASSERT_NOT_TESTED();
ASSERT(TLS_POSTTEB_BASE_OFFS > sizeof(TEB));
res = alt_tls_acquire_helper(alt_tls_post_taken, TLS_POSTTEB_SLOTS,
TLS_POSTTEB_BASE_OFFS, teb_offs, num_slots,
alignment);
}
#endif
mutex_unlock(&alt_tls_lock);
return res;
}
/* Caller must synchronize */
static bool
alt_tls_release_helper(bool *taken, uint base_offs, uint teb_offs, int num_slots)
{
uint i;
uint start = (teb_offs - base_offs) / sizeof(void*);
for (i = start; i < start + num_slots; i++) {
ASSERT(taken[i]);
taken[i] = false;
/* XXX: I'd like to zero the slots out for all threads but there's
* no simple way to do that
*/
}
return true;
}
static bool
alt_tls_release(uint teb_offs, int num_slots)
{
bool res = false;
size_t base_offs = offsetof(TEB, SpareBytes1);
ASSERT(DYNAMO_OPTION(alt_teb_tls));
if (teb_offs >= base_offs &&
teb_offs < base_offs + TLS_SPAREBYTES_SLOTS*sizeof(void*)) {
mutex_lock(&alt_tls_lock);
res = alt_tls_release_helper(alt_tls_spare_taken, (uint) base_offs, teb_offs,
num_slots);
mutex_unlock(&alt_tls_lock);
}
#ifdef X64
if (!res) {
if (teb_offs >= TLS_POSTTEB_BASE_OFFS &&
teb_offs < TLS_POSTTEB_BASE_OFFS + TLS_POSTTEB_SLOTS*sizeof(void*)) {
mutex_lock(&alt_tls_lock);
res = alt_tls_release_helper(alt_tls_post_taken, TLS_POSTTEB_BASE_OFFS,
teb_offs, num_slots);
mutex_unlock(&alt_tls_lock);
}
}
#endif
return res;
}
static inline uint
tls_segment_offs(int slot)
{
return (uint) (offsetof(TEB, TlsSlots) + slot * sizeof(uint *));
}
/* returns the first block sequence of num_slots found either bottom
* up or top_down, that has the selected slot aligned to given alignment.
* Returns -1 on failure to find properly aligned sequence.
*
* Note that if we only want the whole sequence to fit in a cache line, callers
* should try either align_which_slot for either first or last.
*/
int
bitmap_find_free_sequence(byte *rtl_bitmap, int bitmap_size,
int num_requested_slots, bool top_down,
int align_which_slot, /* 0 based index */
uint alignment)
{
/* note: bitmap_find_set_block_sequence() works similarly on our
* internal bitmap_t which starts initialized to 0
*/
uint *p = (uint*)rtl_bitmap; /* we access in 32-bit words */
int start, open_end;
int step; /* +/- 1 */
int i;
int contig = 0;
int result;
ASSERT(ALIGNED(rtl_bitmap, sizeof(uint))); /* they promised */
ASSERT_CURIOSITY(bitmap_size == 64/*TLS*/ || bitmap_size == 128/*FLS*/);
ASSERT(num_requested_slots < bitmap_size);
ASSERT_CURIOSITY(alignment < 256);
ASSERT(align_which_slot >= 0 && /* including after last */
align_which_slot <= num_requested_slots);
if (top_down) {
start = bitmap_size - 1;
open_end = -1; /* 0 included */
step = -1;
} else {
start = 0;
open_end = bitmap_size;
step = +1;
}
for (i = start; i != open_end; i += step) {
uint taken = p[i/32] & (1 << (i % 32));
NTPRINT("tls slot %d is %d\n", i, taken);
if (!taken) {
if (contig == 0) {
/* check whether first element will be aligned */
/* don't bother starting if not */
/* FIXME: could add an argument which slot should be aligned here */
int proposed_align_slot = /* first */
(top_down ? i - (num_requested_slots - 1) :
i)
+ align_which_slot;
/* ALIGNED doesn't work for 0 so we have to special-case it */
bool aligned = (alignment == 0 ||
ALIGNED(tls_segment_offs(proposed_align_slot), alignment));
NTPRINT("\t => @ "PFX", pivot "PFX" %saligned to 0x%x\n",
tls_segment_offs(i),
tls_segment_offs(proposed_align_slot),
aligned ? "" : "not ", alignment);
if (aligned)
contig++;
else
contig = 0; /* try at next */
} else
contig++;
NTPRINT("\t => %d contig @ "PFX"\n", contig, tls_segment_offs(i));
ASSERT(contig <= num_requested_slots);
if (contig == num_requested_slots)
break;
} else {
contig = 0; /* start over! */
}
}
if (contig < num_requested_slots) {
result = -1; /* failure */
} else {
result = top_down ? i : i - (num_requested_slots - 1);
ASSERT(i >= 0 && i < bitmap_size);
/* ALIGNED doesn't work for 0 so we have to special-case it */
ASSERT(alignment == 0 ||
ALIGNED(tls_segment_offs(result + align_which_slot), alignment));
}
return result;
}
void
bitmap_mark_taken_sequence(byte *rtl_bitmap, int bitmap_size,
int first_slot, int last_slot_open_end)
{
int i;
uint *p = (uint*)rtl_bitmap; /* we access in 32-bit words */
ASSERT(ALIGNED(rtl_bitmap, sizeof(uint))); /* they promised */
ASSERT(first_slot >= 0 && last_slot_open_end <= bitmap_size);
for (i = first_slot; i < last_slot_open_end; i++)
p[i/32] |= (1 << (i % 32));
}
void
bitmap_mark_freed_sequence(byte *rtl_bitmap, int bitmap_size,
int first_slot, int num_slots)
{
int i;
uint *p = (uint*)rtl_bitmap; /* we access in 32-bit words */
for (i = first_slot; i < first_slot + num_slots; i++)
p[i/32] &= ~(1 << (i % 32));
}
/* Our version of kernel32's TlsAlloc
* If synch is false, assumes that the peb lock does not need to be obtained,
* which may be safer than acquiring the lock, though when there's only a single
* thread it shouldn't make any difference (it's a recursive lock).
*/
static bool
tls_alloc_helper(int synch, uint *teb_offs /* OUT */, int num_slots,
uint alignment, uint tls_flags)
{
PEB *peb = get_own_peb();
int start;
RTL_BITMAP local_bitmap;
bool using_local_bitmap = false;
NTSTATUS res;
if (synch) {
/* FIXME: I read somewhere they are removing more PEB pointers in Vista or earlier.. */
/* TlsAlloc calls RtlAcquirePebLock which calls RtlEnterCriticalSection */
res = RtlEnterCriticalSection(peb->FastPebLock);
if (!NT_SUCCESS(res))
return false;
}
/* we align the fs offset and assume that the fs base is page-aligned */
ASSERT(alignment < PAGE_SIZE);
/* Transparency notes: we doubt any app relies on a particular slot to be available.
* These are dynamic TLS slots, after all, used only for dlls, who don't know
* which other dlls may be in the address space. The app is going to use
* static TLS. Furthermore, NT only has 64 slots available, so it's unlikely
* an app uses up all the available TLS slots (though we have to have one that's
* in the TEB itself, meaning one of the first 64).
* We walk backward in an attempt to not disrupt the dynamic sequence if only a
* few are in use.
*/
/* case 6770: SQL Server 2005 broke most of the above assumptions:
* - it allocates 38 TLS entries and expects them to all be in
* TLS64 furthermore it assumes that 38 consecutive calls to
* TlsAlloc() return consecutive TLS slots. Therefore we should
* have to make sure we do not leave any slots in a shorter
* earlier sequence available. Although SQL can't handle going
* into the TlsExpansionBitMap
*/
if (peb->TlsBitmap == NULL) {
/* Not initialized yet so use a temp struct to point at the real bits.
* FIXME i#812: ensure our bits here don't get zeroed when ntdll is initialized
*/
ASSERT(dr_earliest_injected);
using_local_bitmap = true;
peb->TlsBitmap = &local_bitmap;
local_bitmap.SizeOfBitMap = 64;
local_bitmap.BitMapBuffer = (void *) &peb->TlsBitmapBits;
} else
ASSERT(peb->TlsBitmap != NULL);
/* TlsBitmap always points to next field, TlsBitmapBits, but we'll only
* use the pointer for generality
*/
ASSERT(&peb->TlsBitmapBits == (void*)peb->TlsBitmap->BitMapBuffer);
DOCHECK(1, {
int first_available = bitmap_find_free_sequence(peb->TlsBitmap->BitMapBuffer,
peb->TlsBitmap->SizeOfBitMap,
1, /* single */
false, /* bottom up */
0, 0 /* no alignment */);
/* On XP ntdll seems to grab slot 0 of the TlsBitmap before loading
* kernel32, see if early injection gets us before that */
/* On Win2k usually first_available == 0, but not in some
* runall tests, so can't assert on the exact value */
ASSERT_CURIOSITY(first_available >= 0);
});
/* only when filling need to find a first_empty in release */
/* TLS_FLAG_BITMAP_FILL - should first find a single slot
* available, then look for whole sequence, then should go through
* and mark ALL entries inbetween. Of course we know we can't go
* beyond index 63 in either request.
*/
if (TEST(TLS_FLAG_BITMAP_FILL, tls_flags)) {
int first_to_fill = bitmap_find_free_sequence(peb->TlsBitmap->BitMapBuffer,
peb->TlsBitmap->SizeOfBitMap,
1, /* single */
false, /* bottom up */
0, 0 /* no alignment */);
ASSERT_NOT_TESTED();
/* we only fill from the front - and taking all up to the top isn't nice */
ASSERT(!TEST(TLS_FLAG_BITMAP_TOP_DOWN, tls_flags));
ASSERT_NOT_IMPLEMENTED(false);
/* FIXME: need to save first slot, so we can free the
* filled slots on exit */
}
/* TLS_FLAG_BITMAP_TOP_DOWN will take a slot at end if possible
* for better transparency, also for better reproducibility */
/* TLS_FLAG_CACHE_LINE_START - will align the first entry,
* otherwise align either first or last since we only care to fit
* on a line */
/* FIXME: align at specific element - not necessary since not
* aligning at all works well for our current choice
*/
/* Note the TLS64 is at fs:[0xe10-0xf10)
* 0xf00 is a cache line start for either 32 or 64 byte
*
* If we want to have commonly used items on the same cache line,
* but also could care about starting at its beginning (not
* expected to matter for data but should measure).
*
* If we only needed 4 slots 0xf00 would be at a cache line start
* and satisfy all requirements.
*
* If we can get not so important items to cross the line, then
* we can have 0xf00 as the balancing item, and the previous 8
* slots will be in one whole cache line on both 32 and 64 byte.
* If we keep it at that then we don't really need alignment hint
* at all - grabbing last is good enough.
*
* Only on P4 we can fit more than 8 entries on the same cache
* line if presumed to all be hot, then we have to use 0xec0 as
* start and leave empty the 0xf00 line. On P3 however we can
* use 0xee0 - only in DEBUG=+HASHTABLE_STATISTICS we use one
* extra slot that ends up at 0xec0. The minor point for P4 is
* then whether we use the first 12 or the last 12 slots in the
* cache line.
*/
/* FIXME: cache line front, otherwise should retry when either
* start or end is fine, and choose closest to desired end of
* bitmap */
start = bitmap_find_free_sequence(peb->TlsBitmap->BitMapBuffer,
peb->TlsBitmap->SizeOfBitMap,
num_slots,
TEST(TLS_FLAG_BITMAP_TOP_DOWN, tls_flags),
0 /* align first element */,
alignment);
if (!TEST(TLS_FLAG_CACHE_LINE_START, tls_flags)) {
/* try either way, worthwhile only if we fit into an alignment unit */
int end_aligned =
bitmap_find_free_sequence(peb->TlsBitmap->BitMapBuffer,
peb->TlsBitmap->SizeOfBitMap,
num_slots,
TEST(TLS_FLAG_BITMAP_TOP_DOWN, tls_flags),
/* align the end of last
* element, so open ended */ num_slots,
alignment);
if (start < 0) {
ASSERT_NOT_TESTED();
start = end_aligned;
} else {
if (TEST(TLS_FLAG_BITMAP_TOP_DOWN, tls_flags)) {
/* prefer latest start */
if (start < end_aligned) {
start = end_aligned;
ASSERT_NOT_TESTED();
}
} else {
/* bottom up, prefer earlier start */
if (start > end_aligned) {
start = end_aligned;
}
}
}
}
if (start < 0) {
NTPRINT("Failed to find %d slots aligned at %d\n", num_slots, alignment);
goto tls_alloc_exit;
}
bitmap_mark_taken_sequence(peb->TlsBitmap->BitMapBuffer,
peb->TlsBitmap->SizeOfBitMap,
start,
/* FIXME: TLS_FLAG_BITMAP_FILL should use first_to_fill */
start + num_slots);
if (teb_offs != NULL) {
*teb_offs = tls_segment_offs(start);
/* mostly safe since using the small TLS map (of 64 entries)
* and that is on TEB so reachable with a short */
/* to avoid ASSERT_TRUNCATE in os_tls_offset() checking here */
ASSERT_TRUNCATE(ushort, ushort, *teb_offs);
NTPRINT("Taking %d tls slot(s) %d-%d at offset 0x%x\n", num_slots, start, start + num_slots, *teb_offs);
}
DOCHECK(1, {
int first_available =
bitmap_find_free_sequence(peb->TlsBitmap->BitMapBuffer,
peb->TlsBitmap->SizeOfBitMap,
1, /* single */
false, /* bottom up */
0, 0 /* no alignment */);
ASSERT_CURIOSITY(first_available >= 0);
/* SQL2005 assumes that first available slot means start of a
* sequence of 38 blanks that fit in TLS64. Unfortunately
* can't assert this for all processes, since even for make
* progrun (notepad on XP SP2, late injection) 16 bits are
* already taken by others. Worse, exactly in SQL server on
* Win2k, at the time we are started there is room, but later
* loaded DLLs use it. Case 6859 on other attempts to catch
* the problem.
*/
});
tls_alloc_exit:
if (using_local_bitmap)
peb->TlsBitmap = NULL;
if (synch) {
res = RtlLeaveCriticalSection(peb->FastPebLock);
if (!NT_SUCCESS(res))
return false;
}
/* ntdll seems to grab slot 0 of the TlsBitmap before loading
* kernel32, see if early injection gets us before that if we go
* bottom up, FIXME: if hit change interface, since 0 is returned
* on error
*/
ASSERT_CURIOSITY(start != 0);
if (start <= 0 && DYNAMO_OPTION(alt_teb_tls)) {
/* i#1163: fall back on other space in TEB */
return alt_tls_acquire(teb_offs, num_slots, alignment);
}
return (start > 0);
}
bool
tls_alloc(int synch, uint *teb_offs /* OUT */)
{
return tls_alloc_helper(synch, teb_offs, 1, 0/* any alignment */,
/* same top down or bottom up choice as tls_calloc */
DYNAMO_OPTION(tls_flags));
}
/* Allocates num tls slots aligned with particular alignment
* Alignment must be sub-page
*/
bool
tls_calloc(int synch, uint *teb_offs /* OUT */, int num, uint alignment)
{
return tls_alloc_helper(synch, teb_offs, num, alignment,
DYNAMO_OPTION(tls_flags));
}
static bool
tls_free_helper(int synch, uint teb_offs, int num)
{
PEB *peb = get_own_peb();
int i, start;
int slot;
uint *p;
NTSTATUS res;
GET_NTDLL(RtlTryEnterCriticalSection, (IN OUT RTL_CRITICAL_SECTION *crit));
if (DYNAMO_OPTION(alt_teb_tls) &&
alt_tls_release(teb_offs, num))
return true;
if (synch) {
/* TlsFree calls RtlAcquirePebLock which calls RtlEnterCriticalSection
* I'm worried about synch problems so I'm going to just do a Try
* and if it fails I simply will not free the slot, not too bad of a leak.
* On a detach a suspended thread might be holding this lock, or a thread
* killed due to an attack might have held it. We could, on failure to
* get the lock, xchg and read back what we write and try to fix up the bits,
* with the worst case being the app hasn't written but has read and thus
* our free won't go through, but in the past we just called TlsFree and
* never had a lock problem so I'm going to assume Try will work the vast
* majority of the time and the times it doesn't we can eat the leak.
*/
res = RtlTryEnterCriticalSection(peb->FastPebLock);
ASSERT_CURIOSITY(NT_SUCCESS(res));
if (!NT_SUCCESS(res))
return false;
}
ASSERT(peb->TlsBitmap != NULL);
/* TlsBitmap always points to next field, TlsBitmapBits, but we'll only
* use the pointer for generality
*/
p = (uint *) peb->TlsBitmap->BitMapBuffer;
start = (teb_offs - offsetof(TEB, TlsSlots)) / sizeof(uint *);
for (slot = 0, i = start; slot < num; slot++, i++) {
NTPRINT("Freeing tls slot %d at offset 0x%x -> index %d\n", slot, teb_offs, i);
/* In case we aren't synched, zero the tls field before we release it,
* (of course that only takes care of one of many possible races if we
* aren't synched). */
/* This will zero this tls index for all threads (see disassembly of
* FreeTls in kernel32, wine srcs). Strange interface using a
* thread handle, would be more sensical as a process info class (esp.
* with respect to permissions). Note that in the wine srcs at least
* this syscall will only accept NT_CURRENT_THREAD as the handle. Xref
* case 8143 for why we need to zero the tls slot for all threads. */
/* XXX i#1156: we can't zero on win8 where we write the
* termination syscall args into our TLS slots (i#565, r1630).
* We always synch there though.
*/
if (!synch || doing_detach) {
res = nt_raw_SetInformationThread(NT_CURRENT_THREAD,
ThreadZeroTlsCell,
&i, sizeof(i));
ASSERT(NT_SUCCESS(res));
}
p[i/32] &= ~(1 << (i % 32));
}
bitmap_mark_freed_sequence(peb->TlsBitmap->BitMapBuffer,
peb->TlsBitmap->SizeOfBitMap,
start, num);
if (synch) {
res = RtlLeaveCriticalSection(peb->FastPebLock);
ASSERT(NT_SUCCESS(res));
if (!NT_SUCCESS(res))
return false;
}
return true;
}
bool
tls_free(int synch, uint teb_offs)
{
return tls_free_helper(synch, teb_offs, 1);
}
bool
tls_cfree(int synch, uint teb_offs, int num)
{
return tls_free_helper(synch, teb_offs, num);
}
#endif /* !NOT_DYNAMORIO_CORE_PROPER */
/***************************************************************************/
bool
get_process_mem_stats(HANDLE h, VM_COUNTERS *info)
{
NTSTATUS res;
ULONG got;
res = NtQueryInformationProcess(h, ProcessVmCounters, info, sizeof(VM_COUNTERS), &got);
ASSERT(!NT_SUCCESS(res) || got == sizeof(VM_COUNTERS));
return NT_SUCCESS(res);
}
/* Get process quota limits information */
/* Note returns raw NTSTATUS */
NTSTATUS
get_process_mem_quota(HANDLE h, QUOTA_LIMITS *qlimits)
{
NTSTATUS res;
ULONG got;
res = NtQueryInformationProcess(h, ProcessQuotaLimits, qlimits, sizeof(QUOTA_LIMITS), &got);
ASSERT(!NT_SUCCESS(res) || got == sizeof(QUOTA_LIMITS));
return res;
}
/* Get process quota limits information */
/* Note returns raw NTSTATUS */
NTSTATUS
get_process_handle_count(HANDLE ph, ULONG *handle_count)
{
NTSTATUS res;
ULONG got;
res = NtQueryInformationProcess(ph, ProcessHandleCount, handle_count, sizeof(ULONG), &got);
ASSERT(!NT_SUCCESS(res) || got == sizeof(ULONG));
return res;
}
int
get_process_load(HANDLE h)
{
KERNEL_USER_TIMES times;
LONGLONG scheduled_time;
LONGLONG wallclock_time;
NTSTATUS res;
ULONG len = 0;
res = NtQueryInformationProcess((HANDLE) h, ProcessTimes,
&times, sizeof(times), &len);
if (!NT_SUCCESS(res))
return -1;
/* return length not trustworthy, according to Nebbett, so we don't test it */
/* we want %CPU == (scheduled time) / (wall clock time) */
scheduled_time = times.UserTime.QuadPart + times.KernelTime.QuadPart;
wallclock_time = query_time_100ns() - times.CreateTime.QuadPart;
if (wallclock_time <= 0)
return -1;
return (int) ((100 * scheduled_time) / wallclock_time);
}
/* Returns 0 for both known false and error
* FIXME: do we still have the restriction of not returning a bool for ntdll.c
* routines?!?
*/
bool
is_wow64_process(HANDLE h)
{
/* since this is called a lot we remember the result for the current process */
static bool self_init = false;
static bool self_is_wow64 = false;
if (!self_init || h != NT_CURRENT_PROCESS) {
ptr_uint_t is_wow64;
NTSTATUS res;
ULONG len = 0;
res = NtQueryInformationProcess((HANDLE) h, ProcessWow64Information,
&is_wow64, sizeof(is_wow64), &len);
if (!NT_SUCCESS(res) || len != sizeof(is_wow64)) {
#if !defined(NOT_DYNAMORIO_CORE_PROPER) && !defined(NOT_DYNAMORIO_CORE)
/* PR 233191: we expect failure on NT but nowhere else */
ASSERT(res == STATUS_INVALID_INFO_CLASS &&
get_os_version() == WINDOWS_VERSION_NT);
#endif
is_wow64 = 0;
}
if (h == NT_CURRENT_PROCESS) {
#if !defined(NOT_DYNAMORIO_CORE_PROPER) && !defined(NOT_DYNAMORIO_CORE)
ASSERT(!dynamo_initialized); /* .data should be writable */
#endif
self_is_wow64 = (is_wow64 != 0);
self_init = true;
}
return (is_wow64 != 0);
}
return self_is_wow64;
}
NTSTATUS
nt_get_drive_map(HANDLE process, PROCESS_DEVICEMAP_INFORMATION *map OUT)
{
ULONG len = 0;
return NtQueryInformationProcess(process, ProcessDeviceMap,
map, sizeof(*map), &len);
}
/* use base hint if present; will bump size up to PAGE_SIZE multiple
* Note returns raw NTSTATUS.
*/
NTSTATUS
nt_remote_allocate_virtual_memory(HANDLE process, void **base, size_t size,
uint prot, memory_commit_status_t commit)
{
NTSTATUS res;
SIZE_T sz = size;
ASSERT(ALIGNED(*base, PAGE_SIZE) && "base argument not initialized at PAGE_SIZE");
res = NT_SYSCALL(AllocateVirtualMemory, process, base, 0 /* zero bits */,
&sz, commit, prot);
if (res == STATUS_CONFLICTING_ADDRESSES) {
NTPRINT("NtAllocateVirtualMemory: conflict at base "PFX
", res="PFX"\n", *base, res);
/* Let caller decide whether to retry or not. */
}
/* FIXME: alert caller if sz > size? only happens if size not PAGE_SIZE multiple */
NTPRINT("NtAllocateVirtualMemory: asked for %d bytes, got %d bytes at "PFX"\n",
size, sz, *base);
ASSERT(sz >= size);
return res;
}
/* Decommit memory previously committed with nt_remote_allocate_virtual_memory()
* Note returns raw NTSTATUS.
*/
NTSTATUS
nt_remote_free_virtual_memory(HANDLE process, void *base)
{
NTSTATUS res;
SIZE_T sz = 0; /* has to be 0 for MEM_RELEASE */
res = NT_SYSCALL(FreeVirtualMemory, process, &base, &sz, MEM_RELEASE);
NTPRINT("NtRemoteFreeVirtualMemory: freed "SZFMT" bytes\n", sz);
return res;
}
/* use base hint is present; will bump size up to PAGE_SIZE multiple
* Note returns raw NTSTATUS.
*/
NTSTATUS
nt_allocate_virtual_memory(void **base, size_t size, uint prot, memory_commit_status_t commit)
{
return nt_remote_allocate_virtual_memory(NT_CURRENT_PROCESS, base,
size, prot, commit);
}
/* commit memory previously reserved with nt_allocate_virtual_memory()
* Note returns raw NTSTATUS.
*/
NTSTATUS
nt_commit_virtual_memory(void *base, size_t size, uint prot)
{
NTSTATUS res;
DEBUG_DECLARE(void *original_base = base;)
DEBUG_DECLARE(size_t original_size = size;)
res = NT_SYSCALL(AllocateVirtualMemory, NT_CURRENT_PROCESS, &base, 0, (SIZE_T*)&size,
MEM_COMMIT, /* should be already reserved */ prot);
ASSERT(base == original_base);
ASSERT(size == original_size);
ASSERT_CURIOSITY(NT_SUCCESS(res));
return res;
}
/* Decommit memory previously committed with nt_commit_virtual_memory() or
* nt_allocate_virtual_memory(). Still available for committing again.
* Note returns raw NTSTATUS.
*/
NTSTATUS
nt_decommit_virtual_memory(void *base, size_t size)
{
NTSTATUS res;
SIZE_T sz = size; /* copied to compare with OUT value */
res = NT_SYSCALL(FreeVirtualMemory, NT_CURRENT_PROCESS, &base, &sz, MEM_DECOMMIT);
ASSERT(sz == size);
NTPRINT("NtFreeVirtualMemory: decommitted %d bytes [res=%d]\n", sz, res);
ASSERT_CURIOSITY(NT_SUCCESS(res));
return res;
}
/* Decommit memory previously committed with nt_commit_virtual_memory() or
* nt_allocate_virtual_memory(). Still available for committing again.
* Note returns raw NTSTATUS.
*/
NTSTATUS
nt_free_virtual_memory(void *base)
{
NTSTATUS res;
SIZE_T sz = 0; /* has to be 0 for MEM_RELEASE */
res = NT_SYSCALL(FreeVirtualMemory, NT_CURRENT_PROCESS, &base, &sz, MEM_RELEASE);
NTPRINT("NtFreeVirtualMemory: freed "SZFMT" bytes\n", sz);
ASSERT_CURIOSITY(NT_SUCCESS(res));
return res;
}
/* FIXME: change name to nt_protect_virtual_memory() and use
* nt_remote_protect_virtual_memory(), or maybe just change callers to
* pass NT_CURRENT_PROCESS to nt_remote_protect_virtual_memory()
* instead to avoid the extra function call, especially with self-protection on
*/
bool
protect_virtual_memory(void *base, size_t size, uint prot, uint *old_prot)
{
NTSTATUS res;
SIZE_T sz = size;
res = NT_SYSCALL(ProtectVirtualMemory, NT_CURRENT_PROCESS, &base, &sz, prot,
(ULONG*)old_prot);
NTPRINT("NtProtectVirtualMemory: "PFX"-"PFX" 0x%x => 0x%x\n",
base, (byte *)base + size, prot, res);
ASSERT(sz == ALIGN_FORWARD(size, PAGE_SIZE));
return NT_SUCCESS(res);
}
bool
nt_remote_protect_virtual_memory(HANDLE process,
void *base, size_t size, uint prot, uint *old_prot)
{
NTSTATUS res;
SIZE_T sz = size;
res = NT_SYSCALL(ProtectVirtualMemory, process, &base, &sz, prot, (ULONG*)old_prot);
NTPRINT("NtProtectVirtualMemory: process "PFX" "PFX"-"PFX" 0x%x => 0x%x\n",
process, base, (byte *)base + size, prot, res);
ASSERT(ALIGNED(base, PAGE_SIZE) && "base argument not initialized at PAGE_SIZE");
NTPRINT("NtProtectVirtualMemory: intended to change %d bytes, "
"modified %d bytes at "PFX"\n", size, sz, base);
ASSERT(sz >= size);
return NT_SUCCESS(res);
}
NTSTATUS
nt_remote_query_virtual_memory(HANDLE process, const byte *pc,
MEMORY_BASIC_INFORMATION *mbi, size_t mbilen, size_t *got)
{
ASSERT(mbilen == sizeof(MEMORY_BASIC_INFORMATION));
memset(mbi, 0, sizeof(MEMORY_BASIC_INFORMATION));
return NT_SYSCALL(QueryVirtualMemory, process, pc, MemoryBasicInformation,
mbi, mbilen, (PSIZE_T)got);
}
/* We use this instead of VirtualQuery b/c there are problems using
* win32 API routines inside of the app using them
*/
/* We make our signature look like VirtualQuery */
size_t
query_virtual_memory(const byte *pc, MEMORY_BASIC_INFORMATION *mbi, size_t mbilen)
{
NTSTATUS res;
size_t got;
res = nt_remote_query_virtual_memory(NT_CURRENT_PROCESS, pc, mbi, mbilen, &got);
ASSERT(!NT_SUCCESS(res) || got == sizeof(MEMORY_BASIC_INFORMATION));
/* only 0 and sizeof(MEMORY_BASIC_INFORMATION) should be expected by callers */
if (!NT_SUCCESS(res))
got = 0;
#if !defined(NOT_DYNAMORIO_CORE_PROPER) && !defined(NOT_DYNAMORIO_CORE)
/* for stress testing a fake driver access */
if (INTERNAL_OPTION(stress_fake_userva) != 0) {
if (pc > (app_pc)INTERNAL_OPTION(stress_fake_userva))
return 0;
}
#endif
return got;
}
NTSTATUS
get_mapped_file_name(const byte *pc, PWSTR buf, USHORT buf_bytes)
{
NTSTATUS res;
SIZE_T got;
/* name.SectionFileName.Buffer MUST be inlined: even if Buffer is initialized
* to point elsewhere, the kernel modifies it. The size passed in must include
* the struct and the post-inlined buffer.
*/
MEMORY_SECTION_NAME *name = (MEMORY_SECTION_NAME *) buf;
name->SectionFileName.Length = 0;
name->SectionFileName.MaximumLength = buf_bytes - sizeof(*name);
name->SectionFileName.Buffer = buf + sizeof(*name);
res = NT_SYSCALL(QueryVirtualMemory, NT_CURRENT_PROCESS, pc, MemorySectionName,
name, buf_bytes, &got);
if (NT_SUCCESS(res)) {
/* save since we'll be clobbering the fields */
int len = name->SectionFileName.Length;
memmove(buf, name->SectionFileName.Buffer, len);
buf[len/sizeof(wchar_t)] = L'\0';
}
return res;
}
NTSTATUS
nt_raw_read_virtual_memory(HANDLE process, const void *base, void *buffer,
size_t buffer_length, size_t *bytes_read)
{
NTSTATUS res;
GET_NTDLL(NtReadVirtualMemory, (IN HANDLE ProcessHandle,
IN const void *BaseAddress,
OUT PVOID Buffer,
IN SIZE_T BufferLength,
OUT PSIZE_T ReturnLength OPTIONAL));
res = NtReadVirtualMemory(process, base, buffer,
buffer_length, (SIZE_T*)bytes_read);
return res;
}
bool
nt_read_virtual_memory(HANDLE process, const void *base, void *buffer,
size_t buffer_length, size_t *bytes_read)
{
return NT_SUCCESS(nt_raw_read_virtual_memory(process, base, buffer,
buffer_length, bytes_read));
}
NTSTATUS
nt_raw_write_virtual_memory(HANDLE process, void *base, const void *buffer,
size_t buffer_length, size_t *bytes_written)
{
NTSTATUS res;
GET_RAW_SYSCALL(WriteVirtualMemory, IN HANDLE ProcessHandle,
IN PVOID BaseAddress,
IN const void *Buffer,
IN SIZE_T BufferLength,
OUT PSIZE_T ReturnLength OPTIONAL);
res = NT_SYSCALL(WriteVirtualMemory, process, base, buffer,
buffer_length, (SIZE_T*)bytes_written);
return res;
}
bool
nt_write_virtual_memory(HANDLE process, void *base, const void *buffer,
size_t buffer_length, size_t *bytes_written)
{
return NT_SUCCESS(nt_raw_write_virtual_memory
(process, base, buffer, buffer_length, bytes_written));
}
/* There are no Win32 API routines to do this, so we use NtContinue */
void
nt_continue(CONTEXT *cxt)
{
GET_RAW_SYSCALL(Continue, IN PCONTEXT Context, IN BOOLEAN TestAlert);
NT_SYSCALL(Continue, cxt, 0/* don't change APC status */);
/* should not get here */
ASSERT_NOT_REACHED();
}
NTSTATUS
nt_get_context(HANDLE hthread, CONTEXT *cxt)
{
GET_RAW_SYSCALL(GetContextThread, IN HANDLE ThreadHandle,
OUT PCONTEXT Context);
/* PR 263338: we get STATUS_DATATYPE_MISALIGNMENT if not aligned */
IF_X64(ASSERT(ALIGNED(cxt, 16)));
return NT_SYSCALL(GetContextThread, hthread, cxt);
/* Don't assert here -- let the caller do so if it expects a particular value.
* If we asserted here when an ldmp is being generated, we could prevent
* generation of the ldmp if there is a handle privilege problem between
* the calling thread and hthread.
*/
}
/* WARNING: any time we set a thread's context we must make sure we can
* handle two cases:
* 1) the thread was at a syscall and now we won't recognize it as such
* (case 6113) (not to mention that the kernel will finish the
* syscall and clobber eax and ecx+edx after setting to cxt: case 5074)
* 2) the thread just hit a fault but the kernel has not yet copied the
* faulting context to the user mode structures for the handler
* (case 7393)
*/
NTSTATUS
nt_set_context(HANDLE hthread, CONTEXT *cxt)
{
GET_RAW_SYSCALL(SetContextThread, IN HANDLE ThreadHandle,
IN PCONTEXT Context);
/* PR 263338: we get STATUS_DATATYPE_MISALIGNMENT if not aligned */
IF_X64(ASSERT(ALIGNED(cxt, 16)));
return NT_SYSCALL(SetContextThread, hthread, cxt);
}
bool
nt_thread_suspend(HANDLE hthread, int *previous_suspend_count)
{
NTSTATUS res;
GET_RAW_SYSCALL(SuspendThread, IN HANDLE ThreadHandle,
OUT PULONG PreviousSuspendCount OPTIONAL);
res = NT_SYSCALL(SuspendThread, hthread, (ULONG *)previous_suspend_count);
/* Don't assert here -- let the caller do so if it expects a particular value.
* If we asserted here when an ldmp is being generated, we could prevent
* generation of the ldmp if there is a handle privilege problem between
* the calling thread and hthread.
*/
return NT_SUCCESS(res);
}
bool
nt_thread_resume(HANDLE hthread, int *previous_suspend_count)
{
NTSTATUS res;
GET_RAW_SYSCALL(ResumeThread, IN HANDLE ThreadHandle,
OUT PULONG PreviousSuspendCount OPTIONAL);
res = NT_SYSCALL(ResumeThread, hthread, (ULONG *)previous_suspend_count);
return NT_SUCCESS(res);
}
#if !defined(NOT_DYNAMORIO_CORE_PROPER) && !defined(NOT_DYNAMORIO_CORE)
NTSTATUS
nt_thread_iterator_next(HANDLE hprocess, HANDLE cur_thread, HANDLE *next_thread,
ACCESS_MASK access)
{
if (NtGetNextThread == NULL)
return STATUS_NOT_IMPLEMENTED;
return NtGetNextThread(hprocess, cur_thread, access, 0, 0, next_thread);
}
#endif
bool
nt_terminate_thread(HANDLE hthread, NTSTATUS exit_code)
{
NTSTATUS res;
GET_RAW_SYSCALL(TerminateThread, IN HANDLE ThreadHandle OPTIONAL,
IN NTSTATUS ExitStatus);
/* hthread == 0 means current thread, match kernel32 TerminateThread which
* disallows null to avoid bugs in our code (we should always be passing
* a valid handle or NT_CURRENT_THREAD) */
ASSERT(hthread != (HANDLE)0);
res = NT_SYSCALL(TerminateThread, hthread, exit_code);
ASSERT(hthread != NT_CURRENT_THREAD && "terminate current thread failed");
return NT_SUCCESS(res);
}
bool
nt_terminate_process(HANDLE hprocess, NTSTATUS exit_code)
{
NTSTATUS res;
GET_RAW_SYSCALL(TerminateProcess, IN HANDLE ProcessHandle OPTIONAL,
IN NTSTATUS ExitStatus);
/* hprocess == 0 has special meaning (terminate all threads but this one),
* kernel32!TerminateProcess disallows it and we currently don't use
* that functionality */
ASSERT(hprocess != (HANDLE)0);
res = NT_SYSCALL(TerminateProcess, hprocess, exit_code);
ASSERT(hprocess != NT_CURRENT_PROCESS && "terminate current process failed");
return NT_SUCCESS(res);
}
NTSTATUS
nt_terminate_process_for_app(HANDLE hprocess, NTSTATUS exit_code)
{
GET_RAW_SYSCALL(TerminateProcess, IN HANDLE ProcessHandle OPTIONAL,
IN NTSTATUS ExitStatus);
/* we allow any argument or result values */
return NT_SYSCALL(TerminateProcess, hprocess, exit_code);
}
bool
am_I_sole_thread(HANDLE hthread, int *amI /*OUT*/)
{
NTSTATUS res;
ULONG got;
res = NT_SYSCALL(QueryInformationThread, hthread, ThreadAmILastThread,
amI, sizeof(*amI), &got);
return NT_SUCCESS(res);
}
/* checks current thread, and turns errors into false */
bool
check_sole_thread()
{
int amI;
if (!am_I_sole_thread(NT_CURRENT_THREAD, &amI))
return false;
else
return (amI != 0);
}
HANDLE
nt_create_and_set_timer(PLARGE_INTEGER due_time, LONG period)
{
NTSTATUS res;
HANDLE htimer;
enum {
NotificationTimer,
SynchronizationTimer
};
GET_NTDLL(NtCreateTimer, (OUT PHANDLE TimerHandle,
IN ACCESS_MASK DesiredAccess,
IN POBJECT_ATTRIBUTES ObjectAttributes,
IN DWORD TimerType /* TIMER_TYPE */
));
res = NtCreateTimer(&htimer, TIMER_ALL_ACCESS, NULL /* no name */,
SynchronizationTimer);
ASSERT(NT_SUCCESS(res));
{
GET_NTDLL(NtSetTimer, (IN HANDLE TimerHandle,
IN PLARGE_INTEGER DueTime,
IN PVOID TimerApcRoutine, /* PTIMER_APC_ROUTINE */
IN PVOID TimerContext,
IN BOOLEAN Resume,
IN LONG Period,
OUT PBOOLEAN PreviousState));
res = NtSetTimer(htimer, due_time, NULL, NULL, false, period, NULL);
ASSERT(NT_SUCCESS(res));
}
return htimer;
}
bool
nt_sleep(PLARGE_INTEGER due_time)
{
NTSTATUS res;
GET_NTDLL(NtDelayExecution, (IN BOOLEAN Alertable,
IN PLARGE_INTEGER Interval
));
res = NtDelayExecution(false, /* non alertable sleep */
due_time);
return NT_SUCCESS(res);
}
void
nt_yield()
{
GET_NTDLL(NtYieldExecution, (VOID));
NtYieldExecution();
}
void *
get_section_address(HANDLE h)
{
SECTION_BASIC_INFORMATION info;
NTSTATUS res;
ULONG got;
memset(&info, 0, sizeof(SECTION_BASIC_INFORMATION));
res = NtQuerySection(h, SectionBasicInformation,
&info, sizeof(SECTION_BASIC_INFORMATION), &got);
ASSERT(NT_SUCCESS(res) && got == sizeof(SECTION_BASIC_INFORMATION));
return info.BaseAddress;
}
/* returns true if attributes can be read and sets them,
* otherwise the values are not modified
*/
bool
get_section_attributes(HANDLE h, uint *section_attributes /* OUT */,
LARGE_INTEGER* section_size /* OPTIONAL OUT */)
{
SECTION_BASIC_INFORMATION info;
NTSTATUS res;
ULONG got;
memset(&info, 0, sizeof(SECTION_BASIC_INFORMATION));
ASSERT(section_attributes != NULL);
res = NtQuerySection(h, SectionBasicInformation,
&info, sizeof(SECTION_BASIC_INFORMATION), &got);
if (NT_SUCCESS(res)) {
ASSERT(got == sizeof(SECTION_BASIC_INFORMATION));
*section_attributes = info.Attributes;
if (section_size != NULL) {
*section_size = info.Size;
}
return true;
} else {
/* Unfortunately, we are often passed section handles that are
* created as GrantedAccess 0xe: None,
* MapWrite,MapRead,MapExecute which cannot be queried
*/
return false;
}
}
NTSTATUS
nt_raw_close(HANDLE h)
{
GET_RAW_SYSCALL(Close,
IN HANDLE Handle);
return NT_SYSCALL(Close, h);
}
bool
close_handle(HANDLE h)
{
return NT_SUCCESS(nt_raw_close(h));
}
/* Note returns raw NTSTATUS */
NTSTATUS
duplicate_handle(HANDLE source_process, HANDLE source, HANDLE target_process,
HANDLE *target, ACCESS_MASK access, uint attributes,
uint options)
{
NTSTATUS res;
GET_RAW_SYSCALL(DuplicateObject,
IN HANDLE SourceProcessHandle,
IN HANDLE SourceHandle,
IN HANDLE TargetProcessHandle,
OUT PHANDLE TargetHandle OPTIONAL,
IN ACCESS_MASK DesiredAcess,
IN ULONG Atrributes,
IN ULONG options_t);
res = NT_SYSCALL(DuplicateObject, source_process, source, target_process, target,
access, attributes, options);
return res;
}
GET_NTDLL(NtQueryObject, (IN HANDLE ObjectHandle,
IN OBJECT_INFORMATION_CLASS ObjectInformationClass,
OUT PVOID ObjectInformation,
IN ULONG ObjectInformationLength,
OUT PULONG ReturnLength OPTIONAL));
ACCESS_MASK
nt_get_handle_access_rights(HANDLE handle)
{
NTSTATUS res;
OBJECT_BASIC_INFORMATION obj_info;
ULONG needed_length;
res = NtQueryObject(handle, ObjectBasicInformation, &obj_info,
sizeof(obj_info), &needed_length);
ASSERT(needed_length == sizeof(obj_info));
ASSERT(NT_SUCCESS(res));
return obj_info.GrantedAccess;
}
/* byte_length is total size of UNICODE_STRING struct and an embedded buffer */
NTSTATUS
nt_get_object_name(HANDLE handle, OBJECT_NAME_INFORMATION* object_name /* OUT */,
uint byte_length, uint *returned_byte_length /* OUT */)
{
NTSTATUS res;
res = NtQueryObject(handle, ObjectNameInformation, object_name,
byte_length, (ULONG *)returned_byte_length);
ASSERT(NT_SUCCESS(res));
return res;
}
NTSTATUS
wchar_to_unicode(PUNICODE_STRING dst, PCWSTR src)
{
NTSTATUS res;
GET_NTDLL(RtlInitUnicodeString, (IN OUT PUNICODE_STRING DestinationString,
IN PCWSTR SourceString));
res = RtlInitUnicodeString(dst, src);
return res;
}
/* we don't want to allocate memory, so caller must provide
* a buffer that's big enough for char -> wchar conversion
*/
static NTSTATUS
char_to_unicode(PUNICODE_STRING dst, PCSTR src, PWSTR buf, size_t buflen)
{
_snwprintf(buf, buflen, L"%S", src);
return wchar_to_unicode(dst, buf);
}
static void
char_to_ansi(PANSI_STRING dst, const char *str)
{
GET_NTDLL(RtlInitAnsiString, (IN OUT PANSI_STRING DestinationString,
IN PCSTR SourceString));
RtlInitAnsiString(dst, str);
}
/* Collects file attributes.
* Returns 1 if successful; 0 otherwise.
* (Using bool is problematic for non-core users.)
*/
bool
query_full_attributes_file(IN PCWSTR filename,
OUT PFILE_NETWORK_OPEN_INFORMATION info)
{
NTSTATUS result;
OBJECT_ATTRIBUTES attributes;
UNICODE_STRING objname;
memset(&attributes, 0, sizeof(attributes));
wchar_to_unicode(&objname, filename);
InitializeObjectAttributes(&attributes, &objname,
OBJ_CASE_INSENSITIVE,
NULL, NULL);
result = nt_raw_QueryFullAttributesFile(&attributes, info);
return NT_SUCCESS(result);
}
NTSTATUS
nt_query_value_key(IN HANDLE key,
IN PUNICODE_STRING value_name,
IN KEY_VALUE_INFORMATION_CLASS class,
OUT PVOID info,
IN ULONG info_length,
OUT PULONG res_length)
{
GET_NTDLL(NtQueryValueKey, (IN HANDLE KeyHandle,
IN PUNICODE_STRING ValueName,
IN KEY_VALUE_INFORMATION_CLASS KeyValueInformationClass,
OUT PVOID KeyValueInformation,
IN ULONG Length,
OUT PULONG ResultLength));
return NtQueryValueKey(key, value_name, class, info, info_length, res_length);
}
/* rights should be KEY_READ or KEY_WRITE or both */
/* parent handle HAS to be opened with an absolute name */
HANDLE
reg_create_key(HANDLE parent, PCWSTR keyname, ACCESS_MASK rights)
{
NTSTATUS res;
OBJECT_ATTRIBUTES attr;
UNICODE_STRING objname;
ULONG disp;
HANDLE hkey;
res = wchar_to_unicode(&objname, keyname);
if (!NT_SUCCESS(res))
return NULL;
InitializeObjectAttributes(&attr, &objname,
OBJ_CASE_INSENSITIVE,
parent, NULL);
res = nt_raw_CreateKey(&hkey, rights, &attr, 0, NULL, 0, &disp);
if (!NT_SUCCESS(res)) {
NTPRINT("Error 0x%x in create key for \"%S\"\n", res, objname.Buffer);
return NULL;
} else
return hkey;
}
/* rights should be KEY_READ or KEY_WRITE or both */
HANDLE
reg_open_key(PCWSTR keyname, ACCESS_MASK rights)
{
NTSTATUS res;
HANDLE hkey;
OBJECT_ATTRIBUTES attr;
UNICODE_STRING objname;
GET_RAW_SYSCALL(OpenKey,
OUT PHANDLE KeyHandle,
IN ACCESS_MASK DesiredAccess,
IN POBJECT_ATTRIBUTES ObjectAttributes);
res = wchar_to_unicode(&objname, keyname);
if (!NT_SUCCESS(res)) {
NTPRINT("Error in wchar to unicode\n");
return NULL;
}
InitializeObjectAttributes(&attr, &objname,
OBJ_CASE_INSENSITIVE,
NULL, NULL);
res = NT_SYSCALL(OpenKey, &hkey, rights, &attr);
if (!NT_SUCCESS(res)) {
NTPRINT("Error 0x%x in open key for \"%S\"\n", res, objname.Buffer);
return NULL;
} else
return hkey;
}
bool
reg_close_key(HANDLE hkey)
{
return close_handle(hkey);
}
bool
reg_delete_key(HANDLE hkey)
{
NTSTATUS res;
GET_NTDLL(NtDeleteKey, (IN HANDLE KeyHandle));
res = NtDeleteKey(hkey);
NTPRINT("Got %d for deleting key\n", res);
return NT_SUCCESS(res);
}
/* Enumerates the values of a registry key via the NtEnumerateValueKey
* system call.
*
* Note that the caller must allocate memory at the end of
* KEY_VALUE_FULL_INFORMATION to store the actual data.
* WARNING: the Name field often has no null terminating it. It
* either runs right up next to Data or has an un-initialized value
* in it -- so make sure you zero out your buffer before calling
* this routine, and use the NameLength field (bytes not chars) and
* then check for null and skip over it if nec. to find the data start.
*/
reg_query_value_result_t
reg_query_value(IN PCWSTR keyname,
IN PCWSTR subkeyname,
IN KEY_VALUE_INFORMATION_CLASS info_class,
OUT PVOID info,
IN ULONG info_size,
IN ACCESS_MASK rights)
{
int res;
ULONG outlen = 0;
UNICODE_STRING valuename;
HANDLE hkey = reg_open_key(keyname, KEY_READ | rights);
if (hkey == NULL)
return REG_QUERY_FAILURE;
res = wchar_to_unicode(&valuename, subkeyname);
if (!NT_SUCCESS(res))
return REG_QUERY_FAILURE;
res = nt_query_value_key(hkey, &valuename, info_class, info, info_size, &outlen);
reg_close_key(hkey);
#if VERBOSE
if (!NT_SUCCESS(res))
NTPRINT("Error 0x%x in query key \"%S\"\n", res, subkeyname);
#endif
/* When buffer is insufficient I see it return BUFFER_OVERFLOW, but nebbet
* mentions BUFFER_TOO_SMALL as well. */
if (res == STATUS_BUFFER_TOO_SMALL || res == STATUS_BUFFER_OVERFLOW) {
return REG_QUERY_BUFFER_TOO_SMALL;
}
return NT_SUCCESS(res) ? REG_QUERY_SUCCESS : REG_QUERY_FAILURE;
}
GET_RAW_SYSCALL(SetValueKey,
IN HANDLE KeyHandle,
IN PUNICODE_STRING ValueName,
IN ULONG TitleIndex OPTIONAL,
IN ULONG Type,
IN PVOID Data,
IN ULONG DataSize);
bool
reg_set_key_value(HANDLE hkey, PCWSTR subkey, PCWSTR val)
{
UNICODE_STRING name;
UNICODE_STRING value;
NTSTATUS res;
res = wchar_to_unicode(&name, subkey);
if (!NT_SUCCESS(res))
return NT_SUCCESS(res);
res = wchar_to_unicode(&value, val);
if (!NT_SUCCESS(res))
return NT_SUCCESS(res);
/* Length field is really size in bytes, have to add 1 for final 0 */
res = NT_SYSCALL(SetValueKey,
hkey, &name, 0, REG_SZ, (LPBYTE) value.Buffer,
value.Length+sizeof(wchar_t));
return NT_SUCCESS(res);
}
bool
reg_set_dword_key_value(HANDLE hkey, PCWSTR subkey, DWORD value)
{
UNICODE_STRING name;
NTSTATUS res;
res = wchar_to_unicode(&name, subkey);
if (!NT_SUCCESS(res))
return NT_SUCCESS(res);
res = NT_SYSCALL(SetValueKey,
hkey, &name, 0, REG_DWORD, &value, sizeof(DWORD));
return NT_SUCCESS(res);
}
/* Flushes registry changes for the given key to the disk.
* Returns 1 on success, 0 otherwise.
* Notes: See case 4138. For a valid opened key, failure can happen
* only if registry IO fails, i.e., this function shouldn't fail
* for most cases.
*/
bool
reg_flush_key(HANDLE hkey)
{
NTSTATUS res;
GET_NTDLL(NtFlushKey, (IN HANDLE KeyHandle));
res = NtFlushKey(hkey);
return NT_SUCCESS(res);
}
/* Enumerates the subkeys of a registry key via the NtEnumerateKey
* system call.
*
* Note that the caller must allocate memory at the end of
* KEY_VALUE_FULL_INFORMATION to store the actual data.
* WARNING: the Name field often has no null terminating it. It
* either runs right up next to Data or has an un-initialized value
* in it -- so make sure you zero out your buffer before calling
* this routine, and use the NameLength field (bytes not chars) and
* then check for null and skip over it if nec. to find the data start.
*
* Returns 1 on success, 0 otherwise.
*/
bool
reg_enum_key(IN PCWSTR keyname,
IN ULONG index,
IN KEY_INFORMATION_CLASS info_class,
OUT PVOID key_info,
IN ULONG key_info_size)
{
NTSTATUS result;
ULONG received = 0;
HANDLE hkey = reg_open_key(keyname, KEY_READ);
GET_NTDLL(NtEnumerateKey, (IN HANDLE hkey,
IN ULONG index,
IN KEY_INFORMATION_CLASS info_class,
OUT PVOID key_info,
IN ULONG key_info_size,
OUT PULONG bytes_received));
if (hkey == NULL)
return false;
result = NtEnumerateKey(hkey, index, info_class, key_info,
key_info_size, &received);
reg_close_key(hkey);
return NT_SUCCESS(result);
}
/* Enumerates the values of a registry key via the NtEnumerateValueKey
* system call.
*
* Note that the caller must allocate memory at the end of
* KEY_VALUE_FULL_INFORMATION to store the actual data.
* WARNING: the Name field often has no null terminating it. It
* either runs right up next to Data or has an un-initialized value
* in it -- so make sure you zero out your buffer before calling
* this routine, and use the NameLength field (bytes not chars) and
* then check for null and skip over it if nec. to find the data start.
* Returns 1 on success, 0 otherwise.
*/
bool
reg_enum_value(IN PCWSTR keyname,
IN ULONG index,
IN KEY_VALUE_INFORMATION_CLASS info_class,
OUT PVOID key_info,
IN ULONG key_info_size)
{
NTSTATUS result;
ULONG bytes_received = 0;
HANDLE hkey = reg_open_key(keyname, KEY_READ);
GET_NTDLL(NtEnumerateValueKey, (IN HANDLE hKey,
IN ULONG index,
IN KEY_VALUE_INFORMATION_CLASS info_class,
OUT PVOID key_info,
IN ULONG key_info_size,
OUT PULONG bytes_received));
if (hkey == NULL)
return false;
result = NtEnumerateValueKey(hkey, index, info_class, key_info,
key_info_size, &bytes_received);
reg_close_key(hkey);
return NT_SUCCESS(result);
}
/* queries the process env vars: NOT the separate copies used in the C
* library and in other libraries
*/
bool
env_get_value(PCWSTR var, wchar_t *val, size_t valsz)
{
PEB *peb = get_own_peb();
PWSTR env = (PWSTR)
get_process_param_buf(peb->ProcessParameters, peb->ProcessParameters->Environment);
NTSTATUS res;
UNICODE_STRING var_us, val_us;
GET_NTDLL(RtlQueryEnvironmentVariable_U, (PWSTR Environment,
PUNICODE_STRING Name,
PUNICODE_STRING Value));
res = wchar_to_unicode(&var_us, var);
if (!NT_SUCCESS(res))
return false;
val_us.Length = 0;
val_us.MaximumLength = (USHORT) valsz;
val_us.Buffer = val;
res = RtlQueryEnvironmentVariable_U(env, &var_us, &val_us);
return NT_SUCCESS(res);
}
/* thread token can be primary token, impersonated, or anonymous */
NTSTATUS
get_current_user_token(PTOKEN_USER ptoken, USHORT token_buffer_length)
{
NTSTATUS res;
HANDLE htoken;
ULONG len = 0;
res = nt_raw_OpenThreadToken(NT_CURRENT_THREAD, TOKEN_QUERY,
TRUE, &htoken);
if (!NT_SUCCESS(res)) {
/* anonymous impersonation token cannot be opened */
res = nt_raw_OpenProcessToken(NT_CURRENT_PROCESS, TOKEN_QUERY, &htoken);
if (!NT_SUCCESS(res)) {
return res;
}
}
res = NtQueryInformationToken(htoken, TokenUser, ptoken, token_buffer_length, &len);
close_handle(htoken);
ASSERT(len <= token_buffer_length);
if (!NT_SUCCESS(res)) {
ASSERT_CURIOSITY(false && "can't query token, impersonated?");
}
return res;
}
NTSTATUS
get_primary_user_token(PTOKEN_USER ptoken, USHORT token_buffer_length)
{
NTSTATUS res;
HANDLE htoken;
ULONG len = 0;
res = nt_raw_OpenProcessToken(NT_CURRENT_PROCESS, TOKEN_QUERY, &htoken);
if (!NT_SUCCESS(res)) {
return res;
}
res = NtQueryInformationToken(htoken, TokenUser, ptoken, token_buffer_length, &len);
close_handle(htoken);
ASSERT(len <= token_buffer_length);
if (!NT_SUCCESS(res)) {
ASSERT_CURIOSITY(false && "can't query token?");
}
return res;
}
/* returns the Owner that will be recorded for any objects created by
* this process (when not impersonating)
*/
NTSTATUS
get_primary_owner_token(PTOKEN_OWNER powner, USHORT owner_buffer_length)
{
NTSTATUS res;
HANDLE htoken;
ULONG len = 0;
res = nt_raw_OpenProcessToken(NT_CURRENT_PROCESS, TOKEN_QUERY, &htoken);
if (!NT_SUCCESS(res)) {
return res;
}
res = NtQueryInformationToken(htoken, TokenOwner, powner, owner_buffer_length, &len);
close_handle(htoken);
ASSERT(len <= owner_buffer_length);
if (!NT_SUCCESS(res)) {
ASSERT_CURIOSITY(false && "can't query token?");
}
return res;
}
/* Note that the caller must allocate buffer_length bytes in sid_string */
NTSTATUS
get_current_user_SID(PWSTR sid_string, USHORT buffer_length)
{
GET_NTDLL(RtlConvertSidToUnicodeString, (OUT PUNICODE_STRING UnicodeString,
IN PSID Sid,
BOOLEAN AllocateDestinationString));
NTSTATUS res;
UNICODE_STRING ustr;
UCHAR buf[SECURITY_MAX_TOKEN_SIZE];
PTOKEN_USER ptoken = (PTOKEN_USER)buf;
res = get_current_user_token(ptoken, sizeof(buf));
if (!NT_SUCCESS(res)) {
return res;
}
ustr.Length = 0;
ustr.MaximumLength = buffer_length;
ustr.Buffer = sid_string;
/* We assume that by passing FALSE, no memory will be allocated
* and the routine is reentrant.
*/
res = RtlConvertSidToUnicodeString(&ustr, ptoken->User.Sid, FALSE);
return res;
}
const PSID
get_process_primary_SID()
{
static PSID primary_SID = NULL;
static UCHAR buf[SECURITY_MAX_TOKEN_SIZE];
if (primary_SID == NULL) {
PTOKEN_USER ptoken = (PTOKEN_USER)buf;
NTSTATUS res;
res = get_primary_user_token(ptoken, sizeof(buf));
ASSERT(NT_SUCCESS(res));
if (!NT_SUCCESS(res)) {
return NULL;
}
primary_SID = ptoken->User.Sid;
}
return primary_SID;
}
/* based on RtlpQuerySecurityDescriptorPointers from reactos/0.2.9/lib/rtl/sd.c */
static
void
get_sd_pointers(IN PISECURITY_DESCRIPTOR SecurityDescriptor,
OUT PSID *Owner OPTIONAL,
OUT PSID *Group OPTIONAL,
OUT PACL *Sacl OPTIONAL,
OUT PACL *Dacl OPTIONAL)
{
/* we usually deal with self-relative SIDs as returned by NtQuerySecurityObject */
if (TEST(SE_SELF_RELATIVE, SecurityDescriptor->Control)) {
PISECURITY_DESCRIPTOR_RELATIVE RelSD =
(PISECURITY_DESCRIPTOR_RELATIVE)SecurityDescriptor;
if(Owner != NULL) {
*Owner = ((RelSD->Owner != 0) ?
(PSID)((ULONG_PTR)RelSD + RelSD->Owner) : NULL);
}
if(Group != NULL) {
*Group = ((RelSD->Group != 0) ?
(PSID)((ULONG_PTR)RelSD + RelSD->Group) : NULL);
}
if(Sacl != NULL) {
*Sacl = (((RelSD->Control & SE_SACL_PRESENT) && (RelSD->Sacl != 0)) ?
(PSID)((ULONG_PTR)RelSD + RelSD->Sacl) : NULL);
}
if(Dacl != NULL) {
*Dacl = (((RelSD->Control & SE_DACL_PRESENT) && (RelSD->Dacl != 0)) ?
(PSID)((ULONG_PTR)RelSD + RelSD->Dacl) : NULL);
}
} else {
if(Owner != NULL) {
*Owner = SecurityDescriptor->Owner;
}
if(Group != NULL) {
*Group = SecurityDescriptor->Group;
}
if(Sacl != NULL) {
*Sacl = ((SecurityDescriptor->Control & SE_SACL_PRESENT) ?
SecurityDescriptor->Sacl : NULL);
}
if(Dacl != NULL) {
*Dacl = ((SecurityDescriptor->Control & SE_DACL_PRESENT) ?
SecurityDescriptor->Dacl : NULL);
}
}
}
bool
get_owner_sd(PISECURITY_DESCRIPTOR SecurityDescriptor,
OUT PSID *Owner)
{
/* RtlGetOwnerSecurityDescriptor is clean enough, so could be used
* without reentrancy risks instead of writing ours here
*/
if (SecurityDescriptor->Revision != SECURITY_DESCRIPTOR_REVISION1) {
return false;
}
get_sd_pointers(SecurityDescriptor,
Owner,
NULL,
NULL,
NULL);
return true;
}
void
initialize_security_descriptor(PISECURITY_DESCRIPTOR SecurityDescriptor)
{
SecurityDescriptor->Revision = SECURITY_DESCRIPTOR_REVISION1;
SecurityDescriptor->Sbz1 = 0;
/* note using absolute format, not SE_SELF_RELATIVE */
SecurityDescriptor->Control = 0;
SecurityDescriptor->Owner = NULL;
SecurityDescriptor->Group = NULL;
SecurityDescriptor->Sacl = NULL;
SecurityDescriptor->Dacl = NULL;
}
/* use only on security descriptors created with initialize_security_descriptor() */
bool
set_owner_sd(PISECURITY_DESCRIPTOR SecurityDescriptor,
PSID Owner)
{
/* RtlGetOwnerSecurityDescriptor is clean enough, so could be used
* without reentrancy risks instead of writing ours here
*/
if (SecurityDescriptor->Revision != SECURITY_DESCRIPTOR_REVISION1) {
return false;
}
if (TEST(SE_SELF_RELATIVE, SecurityDescriptor->Control)) {
ASSERT(false && "we only create absolute security descriptors");
return false;
}
ASSERT(ALIGNED(SecurityDescriptor->Owner, sizeof(void*)));
SecurityDescriptor->Owner = Owner;
/* In case we are editing an existing SD makes remove possible tag
* that Owner field was provided with default or inheritance
* mechanisms.. Otherwise practically a nop for us when building
* an SD from scratch
*/
SecurityDescriptor->Control &= ~SE_OWNER_DEFAULTED;
return true;
}
static int
length_sid(IN PSID Sid_)
{
PISID Sid = Sid_;
/* we only know about usable length of SID */
return LengthRequiredSID(Sid->SubAuthorityCount);
}
bool
equal_sid(IN PSID Sid1_, IN PSID Sid2_)
{
PISID Sid1 = Sid1_;
PISID Sid2 = Sid2_;
/* note ntdll!RtlEqualSid returns BOOLEAN and so its result is
* just in AL! I don't want to deal with here after it got me
* once when assuming regular bool=int.
*
* ntdll!RtlEqualSid+0x2e:
* 7c91a493 32c0 xor al,al
* ...
* 7c91a498 c20800 ret 0x8
*/
/* preferred to reimplement based on reactos/0.2.x/lib/rtl/sid.c*/
SIZE_T SidLen;
if (Sid1->Revision != Sid2->Revision ||
Sid1->SubAuthorityCount != Sid2->SubAuthorityCount) {
return(FALSE);
}
SidLen = length_sid(Sid1);
return memcmp(Sid1, Sid2, SidLen) == 0;
}
#ifndef NOT_DYNAMORIO_CORE
/* To avoid any possible races, we ensure tbat the static buffers are
* initialized before we become multi-threaded via
* os_init->init_debugbox_title_buf() which calls these routines */
/* get application name, (cached), used for options, event logging and
* following children */
char*
get_application_name()
{
static char exename[MAXIMUM_PATH];
if (!exename[0]) {
snprintf(exename, BUFFER_SIZE_ELEMENTS(exename), "%ls",
get_own_qualified_name());
NULL_TERMINATE_BUFFER(exename);
}
return exename;
}
const char *
get_application_short_name()
{
static char short_exename[MAXIMUM_PATH];
if (!short_exename[0]) {
snprintf(short_exename, BUFFER_SIZE_ELEMENTS(short_exename), "%ls",
get_own_short_qualified_name());
NULL_TERMINATE_BUFFER(short_exename);
}
return short_exename;
}
const char *
get_application_short_unqualified_name()
{
static char short_unqual_exename[MAXIMUM_PATH];
if (!short_unqual_exename[0]) {
snprintf(short_unqual_exename, BUFFER_SIZE_ELEMENTS(short_unqual_exename), "%ls",
get_own_short_unqualified_name());
NULL_TERMINATE_BUFFER(short_unqual_exename);
}
return short_unqual_exename;
}
/* get application pid, (cached), used for event logging */
char*
get_application_pid()
{
static char pidstr[16];
if (!pidstr[0]) {
process_id_t pid = get_process_id();
snprintf(pidstr, BUFFER_SIZE_ELEMENTS(pidstr), PIDFMT, pid);
NULL_TERMINATE_BUFFER(pidstr);
}
return pidstr;
}
#endif /* NOT_DYNAMORIO_CORE */
wchar_t *
get_process_param_buf(RTL_USER_PROCESS_PARAMETERS *params, wchar_t *buf)
{
#if !defined(NOT_DYNAMORIO_CORE_PROPER) && !defined(NOT_DYNAMORIO_CORE)
/* Many of the UNICODE_STRING.Buffer fields contain a relative offset
* from the start of ProcessParameters as set by the parent process,
* until the child's init updates it, on pre-Vista.
* Xref the adjustments done inside the routines here that read
* a child's params.
*/
if (dr_earliest_injected && get_os_version() < WINDOWS_VERSION_VISTA &&
/* sanity check: some may be real ptrs, such as Environment which
* we replaced from parent. the offsets should all be small, laid
* out after the param struct.
*/
(ptr_uint_t)buf < 64*1024) {
return (wchar_t *) ((ptr_uint_t)buf + (ptr_uint_t)params);
} else
return buf;
#else
/* Shouldn't need this routine since shouldn't be reading own params, but
* rather than ifdef-ing out all callers we just make it work
*/
return buf;
#endif
}
wchar_t *
get_application_cmdline(void)
{
PEB *peb = get_own_peb();
return get_process_param_buf(peb->ProcessParameters,
peb->ProcessParameters->CommandLine.Buffer);
}
LONGLONG
query_time_100ns()
{
/* FIXME: we could use KUSER_SHARED_DATA here, but it's too volatile
* since we can't programmatically grab its address (all we know is
* 0x7ffe0000) and it changed on win2003 (tickcount deprecated, e.g.).
* Since these time routines aren't currently on critical path we just
* use the more-stable syscalls.
*/
LARGE_INTEGER systime;
GET_NTDLL(NtQuerySystemTime, (IN PLARGE_INTEGER SystemTime));