blob: 7426994c9f6c62fac588b4d2a9f334190c4dba36 [file] [log] [blame]
/* **********************************************************
* Copyright (c) 2011-2020 Google, Inc. All rights reserved.
* Copyright (c) 2009-2010 VMware, Inc. All rights reserved.
* **********************************************************/
/*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* * Neither the name of VMware, Inc. nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
/* DRSyms DynamoRIO Extension */
/* Symbol lookup for Windows
*
* This library is intended to support both online (in-process) symbol
* loading and sideline (out-of-process via communication with a symbol
* server) symbol access.
*
* Uses dbghelp.dll, which comes with Windows 2000+ as version 5.0.
* However, 5.0 does not have SymFromAddr. Plus, XP's 5.2 has
* SymFromName but it doesn't work (returns error every time).
* So, we rely on redistributing 6.x+.
* 6.3+ is required for SymSearch, but the VS2005sp1 headers have 6.1.
*
* We do not use SymInitialize's feature of loading symbols for all
* modules in a process as we do not need our own nor DR's symbols
* (xref PR 463897).
*
* TODO i#450: support symbol stores of downloaded Windows system pdbs
*
* TODO i#449: be more robust about handling failures packing in
* loaded modules. E.g., today we will probably fail if passed two
* .exe's (non-relocatable). See further comments in load_module()
* below.
*/
#ifdef WINDOWS
# define _CRT_SECURE_NO_DEPRECATE 1
#endif
/* We use the DR API's mutex and heap whether as a client utility library
* or (via DR standalone API) in a symbol server process
*/
#include "dr_api.h"
#include "drsyms_private.h"
#include <windows.h>
#include <dbghelp.h>
#include <stdio.h> /* _vsnprintf */
#include <stddef.h> /* offsetof */
/* We use the Container Extension's hashtable */
#include "hashtable.h"
#include "drsyms.h"
#include "wininc/dia2.h" /* for BasicType and SymTagEnum */
#include "dbghelp_imports.h"
#ifdef DEBUG
# define ASSERT(x, msg) DR_ASSERT_MSG(x, msg)
#else
# define ASSERT(x, msg) /* nothing */
#endif
#if _MSC_VER <= 1400 /* VS2005- */
/* Not present in VS2005 DbgHelp.h. Our own dbghelp_imports.lib lets us link.
* This is present in dbghelp.dll 6.0+ which we already say we require.
*/
DWORD64 IMAGEAPI
SymLoadModuleExW(__in HANDLE hProcess, __in_opt HANDLE hFile, __in_opt PCWSTR ImageName,
__in_opt PCWSTR ModuleName, __in DWORD64 BaseOfDll, __in DWORD DllSize,
__in_opt PMODLOAD_DATA Data, __in_opt DWORD Flags);
BOOL IMAGEAPI
SymGetLineFromAddrW64(__in HANDLE hProcess, __in DWORD64 dwAddr,
__out PDWORD pdwDisplacement, __out PIMAGEHLP_LINEW64 Line);
#endif
/* SymSearch is not present in VS2005sp1 headers */
typedef BOOL(__stdcall *func_SymSearch_t)(
__in HANDLE hProcess, __in ULONG64 BaseOfDll, __in_opt DWORD Index,
__in_opt DWORD SymTag, __in_opt PCSTR Mask, __in_opt DWORD64 Address,
__in PSYM_ENUMERATESYMBOLS_CALLBACK EnumSymbolsCallback, __in_opt PVOID UserContext,
__in DWORD Options);
/* only valid for dbghelp 6.6+ */
#ifndef SYMSEARCH_ALLITEMS
# define SYMSEARCH_ALLITEMS 0x08
#endif
/* SymGetSymbolFile is not present in VS2005sp1 headers */
typedef BOOL(__stdcall *func_SymGetSymbolFileW_t)(
__in_opt HANDLE hProcess, __in_opt PCWSTR SymPath, __in PCWSTR ImageFile,
__in DWORD Type, __out_ecount(cSymbolFile) PWSTR SymbolFile, __in size_t cSymbolFile,
__out_ecount(cDbgFile) PWSTR DbgFile, __in size_t cDbgFile);
typedef struct _mod_entry_t {
/* whether to use pecoff table + unix-style debug info, or use dbghelp */
bool use_pecoff_symtable;
union {
void *pecoff_data;
DWORD64 load_base; /* for dbghelp */
} u;
uint64 size;
} mod_entry_t;
/* All dbghelp routines are un-synchronized so we provide our own synch.
* We use a recursive lock to allow queries to be called from enumerate
* or search callbacks.
*/
static void *symbol_lock;
/* We have to restrict operations when operating in a nested query from a callback */
static bool recursive_context;
/* Hashtable for mapping module paths to addresses */
#define MODTABLE_HASH_BITS 8
static hashtable_t modtable;
/* For debugging */
static bool verbose = false;
/* Sideline server support */
static const wchar_t *shmid;
static int drsyms_init_count;
#define IS_SIDELINE (shmid != 0)
/* We assume that the DWORD64 type used by dbghelp for module base addresses
* is fine to be truncated to a 32-bit void* for 32-bit code
*/
static DWORD64 next_load = 0x11000000;
static void
unload_module(HANDLE proc, DWORD64 base);
static size_t
demangle_symbol(char *dst DR_PARAM_OUT, size_t dst_sz, const char *mangled, uint flags);
static void
modtable_entry_free(void *p)
{
mod_entry_t *mod = (mod_entry_t *)p;
if (mod->use_pecoff_symtable)
drsym_unix_unload(mod->u.pecoff_data);
else
unload_module(GetCurrentProcess(), mod->u.load_base);
dr_global_free(mod, sizeof(*mod));
}
DR_EXPORT
drsym_error_t
drsym_init(const wchar_t *shmid_in)
{
/* handle multiple sets of init/exit calls */
int count = dr_atomic_add32_return_sum(&drsyms_init_count, 1);
if (count > 1)
return DRSYM_SUCCESS;
shmid = shmid_in;
symbol_lock = dr_recurlock_create();
if (IS_SIDELINE) {
/* FIXME NYI: establish connection with sideline server via
* shared memory specified by shmid
*/
} else {
hashtable_init_ex(&modtable, MODTABLE_HASH_BITS, HASH_STRING_NOCASE,
true /*strdup*/, false /*!synch: using symbol_lock*/,
modtable_entry_free, NULL, NULL);
/* FIXME i#601: We'd like to honor the mangling flags passed to each
* search routine, but the demangling process used by SYMOPT_UNDNAME
* loses information, so we can provide neither the fully mangled name
* nor the parameter types for the symbol. We can't change
* SYMOPT_UNDNAME while we're running, either, or we get stuck with
* whatever version of the symbols were loaded into memory when we load
* the module.
*/
SymSetOptions(SymGetOptions() | SYMOPT_LOAD_LINES | SYMOPT_UNDNAME);
if (!SymInitialize(GetCurrentProcess(), NULL, FALSE)) {
NOTIFY("SymInitialize error %d\n", GetLastError());
return DRSYM_ERROR;
}
}
drsym_unix_init();
return DRSYM_SUCCESS;
}
DR_EXPORT
drsym_error_t
drsym_exit(void)
{
drsym_error_t res = DRSYM_SUCCESS;
/* handle multiple sets of init/exit calls */
int count = dr_atomic_add32_return_sum(&drsyms_init_count, -1);
if (count > 0)
return res;
if (count < 0)
return DRSYM_ERROR;
if (!IS_SIDELINE) {
hashtable_delete(&modtable);
if (!SymCleanup(GetCurrentProcess())) {
NOTIFY("SymCleanup error %d\n", GetLastError());
res = DRSYM_ERROR;
}
}
dr_recurlock_destroy(symbol_lock);
return res;
}
/* Queries the available debug information for a module.
* kind_p is optional. Returns true on success.
*/
static bool
query_available(HANDLE proc, DWORD64 base, drsym_debug_kind_t *kind_p)
{
drsym_debug_kind_t kind;
IMAGEHLP_MODULEW64 info;
/* i#1376c#12: we want to use the pre-SDK 8.0 size. Otherwise we'll
* fail when used with an older dbghelp.dll.
*/
#define IMAGEHLP_MODULEW64_SIZE_COMPAT 0xcb8
memset(&info, 0, sizeof(info));
info.SizeOfStruct = IMAGEHLP_MODULEW64_SIZE_COMPAT;
/* i#1197: SymGetModuleInfo64 fails on internal wide-to-ascii conversion,
* so we use wchar version SymGetModuleInfoW64 instead.
*/
if (SymGetModuleInfoW64(proc, base, &info)) {
kind = 0;
switch (info.SymType) {
case SymNone: NOTIFY("No symbols found\n"); break;
case SymExport: NOTIFY("Only export symbols found\n"); break;
case SymPdb:
NOTIFY("Loaded pdb symbols from %S\n", info.LoadedPdbName);
kind |= DRSYM_SYMBOLS | DRSYM_PDB;
break;
case SymDeferred: NOTIFY("Symbol load deferred\n"); break;
case SymCoff:
case SymCv:
case SymSym:
case SymVirtual:
case SymDia: NOTIFY("Symbols in image file loaded\n"); break;
default: NOTIFY("Symbols in unknown format.\n"); break;
}
if (info.LineNumbers) {
NOTIFY(" module has line number information.\n");
kind |= DRSYM_LINE_NUMS;
}
/* could print out info.ImageName and info.LoadedImageName
* and warn if info.PdbUnmatched or info.DbgUnmatched
*/
} else {
NOTIFY("SymGetModuleInfoW64 failed: %d\n", GetLastError());
return false;
}
if (kind_p != NULL)
*kind_p = kind;
return true;
}
static size_t
module_image_size(void *modbase)
{
IMAGE_DOS_HEADER *dos = (IMAGE_DOS_HEADER *)modbase;
IMAGE_NT_HEADERS *nt;
if (dos->e_magic != IMAGE_DOS_SIGNATURE)
return 0;
nt = (IMAGE_NT_HEADERS *)(((ptr_uint_t)dos) + dos->e_lfanew);
if (nt == NULL || nt->Signature != IMAGE_NT_SIGNATURE)
return 0;
return nt->OptionalHeader.SizeOfImage;
}
static DWORD64
load_module(HANDLE proc, const char *path, DR_PARAM_OUT uint64 *size_out)
{
DWORD64 base, loaded_base;
DWORD64 size;
wchar_t wpath[MAX_PATH];
int err;
file_t f;
uint64 map_size;
size_t actual_size = 0;
bool ok;
void *map = NULL;
/* UTF-8 to wide string. */
dr_snwprintf(wpath, BUFFER_SIZE_ELEMENTS(wpath), L"%S", path);
NULL_TERMINATE_BUFFER(wpath);
/* We specify bases and try to pack the address space, except for
* the .exe which is not relocatable (although with later dbghelp
* it seems to accept a preferred base for non-ALSR executables too?).
* However, for a preferred base of 0 (and /dynamicbase) executable,
* dbghelp doesn't work well (i#1169): thus we always try a specified
* base and fall back to 0 if that fails (b/c we don't want to bother
* parsing the headers looking for the base and /dynamicbase).
*/
/* Any base will do, but we need the image size (not the file size: i#1171) */
/* XXX: should we try with 0 base first to avoid this overhead?
* Though in some tests I don't see any perf hit from this map, prob b/c
* we only read one word from the first page and then unmap.
*/
f = dr_open_file(path, DR_FILE_READ);
if (f == INVALID_FILE)
return 0;
ok = dr_file_size(f, &map_size);
if (ok) {
actual_size = (size_t)map_size;
if (actual_size != map_size) { /* overflow check */
NOTIFY("Overflow on module %s size", path);
return 0;
}
map = dr_map_file(f, &actual_size, 0, NULL, DR_MEMPROT_READ, 0);
}
if (!ok || map == NULL || actual_size < map_size) {
if (map != NULL)
dr_unmap_file(map, actual_size);
NOTIFY("Failed to map module %s to find image size", path);
return 0;
}
size = module_image_size(map);
if (size_out != NULL)
*size_out = size;
dr_unmap_file(map, actual_size);
dr_close_file(f);
base = next_load;
/* XXX i#449: if we decide to perform GC and unload older modules we
* should avoid doing it for recursive_context == true to avoid
* removing resources needed for finishing an iteration
*/
do {
loaded_base = SymLoadModuleExW(GetCurrentProcess(), NULL, wpath, NULL, base,
(DWORD)size /*should check trunc*/, NULL, 0);
err = GetLastError();
/* dbghelp will return 0 on already-loaded (documented behavior) or
* if passed-in base is 0 and module has preferred base of 0 -- though
* the latter case is a fatal situation as the other dbghelp routines won't
* accept a 0 base! Xref i#1169..
*/
if (loaded_base == 0) {
if (err == ERROR_SUCCESS && base != 0 /*else can't recover: see above*/) {
/* We can't unload (gives ERROR_INVALID_PARAMETER) so we load again */
#ifdef DEBUG
char *ext = strrchr(path, '.');
if (ext != NULL && !stri_eq(ext, ".exe"))
NOTIFY("Failed to load %s at our chosen base\n", path);
#endif
/* Can pass 0 to SymLoadModuleExW */
base = 0;
size = 0;
continue;
} else {
/* FIXME PR 463897: for !single_target, we should handle load
* failure by trying a different address, informed by some
* memory queries. For now we assume only one .exe and that
* it's below our start load address and that we won't fail.
*/
NOTIFY("SymLoadModuleExW error %d\n", err);
return 0;
}
}
break;
} while (true);
next_load += ALIGN_FORWARD(size, 64 * 1024);
if (verbose) {
NOTIFY("loaded %s at 0x%I64x\n", path, base);
query_available(GetCurrentProcess(), base, NULL);
}
return loaded_base;
}
static void
unload_module(HANDLE proc, DWORD64 base)
{
if (!SymUnloadModule64(GetCurrentProcess(), base)) {
NOTIFY("SymUnloadModule64 error %d\n", GetLastError());
}
}
/* If !use_dbghelp, returns NULL if not PECOFF */
static mod_entry_t *
lookup_or_load(const char *modpath, bool use_dbghelp)
{
mod_entry_t *mod = (mod_entry_t *)hashtable_lookup(&modtable, (void *)modpath);
if (mod == NULL) {
mod = dr_global_alloc(sizeof(*mod));
memset(mod, 0, sizeof(*mod));
/* First, see whether the module has pecoff symbols */
mod->u.pecoff_data = drsym_unix_load(modpath);
if (mod->u.pecoff_data == NULL) {
/* If no pecoff, use dbghelp */
if (use_dbghelp) {
mod->use_pecoff_symtable = false;
mod->u.load_base = load_module(GetCurrentProcess(), modpath, &mod->size);
}
if (mod->u.load_base == 0) {
dr_global_free(mod, sizeof(*mod));
return NULL;
}
} else {
mod->use_pecoff_symtable = true;
}
hashtable_add(&modtable, (void *)modpath, (void *)mod);
}
return mod;
}
/***************************************************************************
* SYMBOL HANDLING
*/
/* SYMBOL_INFO.Name has 1 char in the struct */
#define NAME_EXTRA_SZ(full_sz) ((full_sz)-1)
enum {
/* MAX_SYM_NAME comes from dbghelp.h and is equal to 2000 */
SYMBOL_INFO_SIZE = (sizeof(SYMBOL_INFO) + NAME_EXTRA_SZ(MAX_SYM_NAME * sizeof(TCHAR)))
};
#define OPERATOR "operator"
#define LEN_OPERATOR 8 /* strlen("operator") */
/* Removes the template parameters from the passed-in demangled
* symbol. Assumes there is no function return type (or other tokens)
* prior to the name, and that there are no function arguments: thus,
* templates can only appear in class names (classes could be nested)
* and the function name itself. This makes our job much easier: we just
* remove everything between outer <> pairs in all class names. In
* the function name, we look for leading < to keep (for operators).
* Note that there is not always a space between the operator name and
* the template start (e.g., "std::operator<<<std::char_traits<char> >").
*
* Supports in-buffer detemplatizing: i.e., dst can equal src, but there
* should be no partial (non-full) overlap.
*/
static drsym_error_t
detemplatize(char *dst DR_PARAM_OUT, size_t dst_sz, const char *src,
size_t *dst_written DR_PARAM_OUT)
{
const char *c = src;
char *d = dst;
uint in_template = 0;
bool just_copy = false;
while (*c != '\0') {
if (just_copy)
just_copy = false;
else if (*c == '<') {
/* Look for "operator<<" */
if (in_template == 0 && *(c + 1) == '<' &&
/* To support in-buffer, any time we look backward we have
* to look at dst and not src
*/
d - dst > LEN_OPERATOR && *(d - 1) == 'r' && /* quick check */
strncmp(d - LEN_OPERATOR, OPERATOR, LEN_OPERATOR) == 0) {
/* We've hit an "operator<<" corner case. We need two walks
* from here to the end. We go with a quick walk now for code
* simplicity. We assume there's either no truncation (src is
* MAX_SYM_NAME long from a direct dbghelp query) or if the user
* passed us a truncated symbol (to drsym_demangle_symbol())
* it's ok to fail.
*
* XXX: using namespace __identifier("operator<"), it's possible
* to construct a symbol name that fools us. We could take
* extra effort to try and detect malformed names up front,
* instead of incrementally until we've already written to dst --
* which matters for src==dst. For now we rely on the caller
* recovering.
*/
const char *forw;
uint count = 0;
for (forw = c + 1; *forw != '\0'; forw++) {
if (*forw == '<')
count++;
else if (*forw == '>')
count--;
}
if (count == 0) {
/* "operator<" */
} else if (count == 1) {
/* "operator<<" */
just_copy = true; /* just copy 2nd < */
} else /* malformed input */
return DRSYM_ERROR;
} else if (*(c + 1) != '=' && /* rule out "operator<=" */
/* Rule out <> used in identifiers, such as in
* "<CrtImplementationDetails>::NativeDll::ProcessAttach"
* (constructed for MSCRT via __identifier).
*/
c != src && *(d - 1) != ':')
in_template++;
} else if (in_template > 0 && *c == '>') {
if (in_template == 0)
return DRSYM_ERROR; /* malformed input */
in_template--;
}
/* We include the outer '<>' to match Linux. */
if (in_template == 0 || (in_template == 1 && *c == '<')) {
*d = *c;
d++;
if (d - dst >= (ssize_t)dst_sz - 1) {
dst[dst_sz - 1] = '\0';
return DRSYM_ERROR_NOMEM;
}
}
c++;
}
if (in_template > 0) {
/* Input is truncated. Just close the <>. */
*d = '>';
d++;
if (d - dst >= (ssize_t)dst_sz - 1) {
dst[dst_sz - 1] = '\0';
return DRSYM_ERROR_NOMEM;
}
}
*d = '\0';
if (dst_written != NULL)
*dst_written = d + 1 - dst;
return DRSYM_SUCCESS;
}
/* Allocates a SYMBOL_INFO struct. Initializes the SizeOfStruct and MaxNameLen
* fields.
*/
static PSYMBOL_INFO
alloc_symbol_info(void)
{
/* N.B.: we do not call dr_get_current_drcontext() and use dr_thread_alloc()
* b/c that's not supported in standalone mode and we want standalone
* tools to be able to use drsyms
*/
PSYMBOL_INFO info = (PSYMBOL_INFO)dr_global_alloc(SYMBOL_INFO_SIZE);
info->SizeOfStruct = sizeof(SYMBOL_INFO);
info->MaxNameLen = MAX_SYM_NAME;
return info;
}
static void
free_symbol_info(PSYMBOL_INFO info)
{
dr_global_free(info, SYMBOL_INFO_SIZE);
}
/* File and line info is assumed to not be available and already zeroed out */
static drsym_error_t
fill_in_drsym_info(drsym_info_t *out DR_PARAM_INOUT, PSYMBOL_INFO info, DWORD64 base,
bool set_debug_kind, uint flags)
{
drsym_error_t res = DRSYM_SUCCESS;
if (set_debug_kind && !query_available(GetCurrentProcess(), base, &out->debug_kind)) {
out->debug_kind = 0;
}
out->start_offs = (size_t)(info->Address - base);
out->end_offs = (size_t)((info->Address + info->Size) - base);
out->name_available_size = info->NameLen * sizeof(char);
out->type_id = info->TypeIndex;
if (out->name != NULL) {
/* We don't check for DRSYM_DEMANGLE b/c that's always implied for PDB */
if (TESTANY(DRSYM_DEMANGLE_FULL | DRSYM_DEMANGLE_PDB_TEMPLATES, flags) ||
/* On failure to detemplatize we fall back to straight copy */
detemplatize(out->name, out->name_size, info->Name, NULL) != DRSYM_SUCCESS)
strncpy(out->name, info->Name, out->name_size);
out->name[out->name_size - 1] = '\0';
}
/* Fields beyond name require compatibility checks */
if (out->struct_size > offsetof(drsym_info_t, flags)) {
/* Remove unsupported flags */
out->flags = flags & ~(UNSUPPORTED_PDB_FLAGS);
out->flags |= DRSYM_DEMANGLE; /* always done (xref i#601) */
}
return res;
}
static drsym_error_t
drsym_lookup_address_local(const char *modpath, size_t modoffs,
drsym_info_t *out DR_PARAM_INOUT, uint flags)
{
mod_entry_t *mod;
DWORD64 base;
DWORD64 disp;
IMAGEHLP_LINEW64 line;
DWORD line_disp;
PSYMBOL_INFO info;
if (modpath == NULL || out == NULL)
return DRSYM_ERROR_INVALID_PARAMETER;
/* If we add fields in the future we would dispatch on out->struct_size */
if (out->struct_size != sizeof(*out) &&
/* Check for pre-flags field */
out->struct_size != offsetof(drsym_info_t, flags))
return DRSYM_ERROR_INVALID_SIZE;
dr_recurlock_lock(symbol_lock);
mod = lookup_or_load(modpath, true /*use dbghelp*/);
if (mod == NULL) {
dr_recurlock_unlock(symbol_lock);
return DRSYM_ERROR_LOAD_FAILED;
}
if (mod->use_pecoff_symtable) {
drsym_error_t symerr =
drsym_unix_lookup_address(mod->u.pecoff_data, modoffs, out, flags);
dr_recurlock_unlock(symbol_lock);
return symerr;
}
base = mod->u.load_base;
info = alloc_symbol_info();
/* Symbols are stored as UTF-8 in the PE and PDB files so we don't need
* to use SymFromAddrW or do any conversion (i#1085).
*/
if (SymFromAddr(GetCurrentProcess(), base + modoffs, &disp, info)) {
drsym_error_t res = fill_in_drsym_info(out, info, base, true, flags);
if (res != DRSYM_SUCCESS) {
free_symbol_info(info);
dr_recurlock_unlock(symbol_lock);
return res;
}
NOTIFY("Symbol 0x%I64x => %s+0x%x (0x%I64x-0x%I64x)\n", base + modoffs, out->name,
disp, info->Address, info->Address + info->Size);
} else {
NOTIFY("SymFromAddr error %d\n", GetLastError());
free_symbol_info(info);
dr_recurlock_unlock(symbol_lock);
return DRSYM_ERROR_SYMBOL_NOT_FOUND;
}
free_symbol_info(info);
line.SizeOfStruct = sizeof(line);
if (SymGetLineFromAddrW64(GetCurrentProcess(), base + modoffs, &line_disp, &line)) {
NOTIFY("%S:%u+0x%x\n", line.FileName, line.LineNumber, line_disp);
out->file_available_size = wcslen(line.FileName) * sizeof(wchar_t);
if (out->file != NULL) {
/* Convert the wide filename to UTF-8 (i#1085).
* MSDN docs imply that FileName is reused on subsequent
* calls: hence we must copy into a caller-provided buffer
* even if we didn't want to convert to UTF-8.
*/
dr_snprintf(out->file, out->file_size, "%S", line.FileName);
out->file[out->file_size - 1] = '\0';
}
out->line = line.LineNumber;
out->line_offs = line_disp;
} else {
NOTIFY("SymGetLineFromAddr64 error %d\n", GetLastError());
out->file_available_size = 0;
if (out->file != NULL)
out->file[0] = '\0';
out->line = 0;
out->line_offs = 0;
dr_recurlock_unlock(symbol_lock);
return DRSYM_ERROR_LINE_NOT_AVAILABLE;
}
dr_recurlock_unlock(symbol_lock);
return DRSYM_SUCCESS;
}
static drsym_error_t
drsym_lookup_symbol_local(const char *modpath, const char *symbol,
size_t *modoffs DR_PARAM_OUT, uint flags)
{
mod_entry_t *mod;
drsym_error_t r;
PSYMBOL_INFO info;
if (modpath == NULL || symbol == NULL || modoffs == NULL)
return DRSYM_ERROR_INVALID_PARAMETER;
dr_recurlock_lock(symbol_lock);
mod = lookup_or_load(modpath, true /*use dbghelp*/);
if (mod == NULL) {
dr_recurlock_unlock(symbol_lock);
return DRSYM_ERROR_LOAD_FAILED;
}
if (mod->use_pecoff_symtable) {
drsym_error_t symerr =
drsym_unix_lookup_symbol(mod->u.pecoff_data, symbol, modoffs, flags);
dr_recurlock_unlock(symbol_lock);
return symerr;
}
/* the only thing identifying the target module is the symbol name,
* which should be of "modname!symname" format
*/
info = alloc_symbol_info();
if (SymFromName(GetCurrentProcess(), (char *)symbol, info)) {
/* i#4153: Sometimes SymFromName returns a bogus address outside the library! */
if (info->Address < mod->u.load_base ||
info->Address >= mod->u.load_base + mod->size) {
NOTIFY("SymFromName => 0x%I64x outside module bounds 0x%I64x-0x%I64x\n",
info->Address, mod->u.load_base, mod->u.load_base + mod->size);
r = DRSYM_ERROR_SYMBOL_NOT_FOUND;
} else {
NOTIFY("%s => 0x%I64x\n", __FUNCTION__, info->Address);
*modoffs = (size_t)(info->Address - mod->u.load_base);
r = DRSYM_SUCCESS;
}
} else {
NOTIFY("SymFromName error %d %s\n", GetLastError(), symbol);
r = DRSYM_ERROR_SYMBOL_NOT_FOUND;
}
free_symbol_info(info);
dr_recurlock_unlock(symbol_lock);
return r;
}
typedef struct _enum_info_t {
drsym_enumerate_cb cb;
drsym_enumerate_ex_cb cb_ex;
drsym_info_t *out;
uint flags;
void *data;
DWORD64 base;
bool found_match;
} enum_info_t;
static BOOL CALLBACK
enum_cb(PSYMBOL_INFO pSymInfo, ULONG SymbolSize, PVOID Context)
{
enum_info_t *info = (enum_info_t *)Context;
info->found_match = true;
if (info->cb_ex != NULL) {
if (pSymInfo->NameLen * sizeof(char) > info->out->name_size) {
/* We're using MAX_SYM_NAME so this shouldn't happen. If it turns
* out it can happen we should realloc here.
*/
NOTIFY("symbol enum name exceeded MAX_SYM_NAME size\n");
}
/* We ignore errors in detemplatizing */
fill_in_drsym_info(info->out, pSymInfo, info->base, false, info->flags);
/* It seems to be impossible to get line # info for dup syms at same addr
* b/c none of the search/enum routines return it: it has to be looked
* up from the addr, which is not good enough.
*/
return (BOOL)(*info->cb_ex)(info->out, DRSYM_ERROR_LINE_NOT_AVAILABLE,
info->data);
} else {
/* XXX: we do this in place, but it's dbghelp's buffer -- we assume
* it's dead space at this point.
*/
/* We don't check for DRSYM_DEMANGLE b/c that's always implied for PDB */
if (!TESTANY(DRSYM_DEMANGLE_FULL | DRSYM_DEMANGLE_PDB_TEMPLATES, info->flags)) {
if (detemplatize(pSymInfo->Name, pSymInfo->MaxNameLen, pSymInfo->Name,
NULL) != DRSYM_SUCCESS) {
/* XXX: we can't do much about it so we carry on */
}
}
return (BOOL)(*info->cb)(pSymInfo->Name, (size_t)(pSymInfo->Address - info->base),
info->data);
}
}
static drsym_error_t
drsym_enumerate_symbols_local(const char *modpath, const char *match,
drsym_enumerate_cb callback,
drsym_enumerate_ex_cb callback_ex, size_t info_size,
void *data, uint flags)
{
mod_entry_t *mod;
enum_info_t info;
if (modpath == NULL || (callback == NULL && callback_ex == NULL))
return DRSYM_ERROR_INVALID_PARAMETER;
dr_recurlock_lock(symbol_lock);
mod = lookup_or_load(modpath, true /*use dbghelp*/);
if (mod == NULL) {
dr_recurlock_unlock(symbol_lock);
return DRSYM_ERROR_LOAD_FAILED;
}
recursive_context = true;
if (mod->use_pecoff_symtable) {
drsym_error_t symerr = drsym_unix_enumerate_symbols(
mod->u.pecoff_data, callback, callback_ex, info_size, data, flags);
recursive_context = false;
dr_recurlock_unlock(symbol_lock);
return symerr;
}
info.cb = callback;
info.cb_ex = callback_ex;
if (info.cb_ex != NULL) {
if (info_size != sizeof(drsym_info_t))
return DRSYM_ERROR_INVALID_SIZE;
info.out = (drsym_info_t *)dr_global_alloc(info_size);
info.out->struct_size = info_size;
info.out->name = (char *)dr_global_alloc(MAX_SYM_NAME);
info.out->name_size = MAX_SYM_NAME;
if (!query_available(GetCurrentProcess(), mod->u.load_base,
&info.out->debug_kind))
info.out->debug_kind = 0;
info.out->file = NULL;
info.out->file_size = 0;
info.out->file_available_size = 0;
info.out->line = 0;
info.out->line_offs = 0;
} else
info.out = NULL;
info.flags = flags;
info.data = data;
info.base = mod->u.load_base;
info.found_match = false;
if (!SymEnumSymbols(GetCurrentProcess(), mod->u.load_base, match, enum_cb,
(PVOID)&info)) {
NOTIFY("SymEnumSymbols error %d\n", GetLastError());
}
if (info.out != NULL) {
dr_global_free(info.out->name, MAX_SYM_NAME);
dr_global_free(info.out, info_size);
}
recursive_context = false;
dr_recurlock_unlock(symbol_lock);
if (!info.found_match)
return DRSYM_ERROR_SYMBOL_NOT_FOUND;
return DRSYM_SUCCESS;
}
/* SymSearch (w/ default flags) is much faster than SymEnumSymbols or even
* SymFromName so we export it separately for Windows (Dr. Memory i#313).
*/
static drsym_error_t
drsym_search_symbols_local(const char *modpath, const char *match, uint flags,
drsym_enumerate_cb callback, drsym_enumerate_ex_cb callback_ex,
size_t info_size, void *data)
{
mod_entry_t *mod;
drsym_error_t res = DRSYM_SUCCESS;
/* dbghelp.dll 6.3+ is required for SymSearch, but the VS2005sp1
* headers and lib have only 6.1, so we dynamically look it up
*/
static func_SymSearch_t func;
if (modpath == NULL || (callback == NULL && callback_ex == NULL))
return DRSYM_ERROR_INVALID_PARAMETER;
dr_recurlock_lock(symbol_lock);
mod = lookup_or_load(modpath, true /*use dbghelp*/);
if (mod == NULL)
res = DRSYM_ERROR_LOAD_FAILED;
else if (mod->use_pecoff_symtable) {
/* pecoff doesn't support search, and the enumerate impl in
* drsyms_unix.c doesn't take a pattern
*/
res = DRSYM_ERROR_NOT_IMPLEMENTED;
} else {
enum_info_t info;
if (func == NULL) {
/* if we fail to find it we'll pay the lookup cost every time,
* but if we succeed we'll cache it
*/
HMODULE hmod = GetModuleHandle("dbghelp.dll");
if (hmod != NULL)
func = (func_SymSearch_t)GetProcAddress(hmod, "SymSearch");
if (func == NULL) {
dr_recurlock_unlock(symbol_lock);
/* fall back to slower enum */
return drsym_enumerate_symbols_local(modpath, match, callback,
callback_ex, info_size, data, flags);
}
}
recursive_context = true;
info.cb = callback;
info.cb_ex = callback_ex;
if (info.cb_ex != NULL) {
if (info_size != sizeof(drsym_info_t))
return DRSYM_ERROR_INVALID_SIZE;
info.out = (drsym_info_t *)dr_global_alloc(info_size);
info.out->struct_size = info_size;
info.out->name = (char *)dr_global_alloc(MAX_SYM_NAME);
info.out->name_size = MAX_SYM_NAME;
info.out->file = NULL;
info.out->file_size = 0;
info.out->file_available_size = 0;
info.out->line = 0;
info.out->line_offs = 0;
} else
info.out = NULL;
info.flags = flags;
info.data = data;
info.base = mod->u.load_base;
if (!(*func)(GetCurrentProcess(), mod->u.load_base, 0, 0, match, 0, enum_cb,
(PVOID)&info,
TEST(DRSYM_FULL_SEARCH, flags) ? SYMSEARCH_ALLITEMS : 0)) {
NOTIFY("SymSearch error %d\n", GetLastError());
res = DRSYM_ERROR_SYMBOL_NOT_FOUND;
}
if (info.out != NULL) {
dr_global_free(info.out->name, MAX_SYM_NAME);
dr_global_free(info.out, info_size);
}
recursive_context = false;
}
dr_recurlock_unlock(symbol_lock);
return res;
}
static size_t
demangle_symbol(char *dst DR_PARAM_OUT, size_t dst_sz, const char *mangled, uint flags)
{
DWORD undec_flags;
size_t len;
if (TEST(DRSYM_DEMANGLE_FULL, flags)) {
/* FIXME: I'd like to suppress "class" from the types, but I can't find
* an option to control it other than UNDNAME_NAME_ONLY, which
* suppresses overloads, which we want.
*/
undec_flags = (UNDNAME_COMPLETE | UNDNAME_NO_ALLOCATION_LANGUAGE |
UNDNAME_NO_ALLOCATION_MODEL | UNDNAME_NO_MEMBER_TYPE |
UNDNAME_NO_FUNCTION_RETURNS | UNDNAME_NO_ACCESS_SPECIFIERS |
UNDNAME_NO_MS_KEYWORDS);
} else {
/* i#587: this still expands templates, but we remove those below. */
undec_flags = UNDNAME_NAME_ONLY;
}
len = (size_t)UnDecorateSymbolName(mangled, dst, (DWORD)dst_sz, undec_flags);
/* The truncation behavior is not documented, but testing shows dbghelp
* truncates and returns the number of characters written, not how many it
* would take to hold the buffer. It also returns 2 less than dst_sz if
* truncating, one for the nul byte and it's not clear what the other is
* for.
*/
if (len != 0 && len + 2 < dst_sz) {
if (!TESTANY(DRSYM_DEMANGLE_FULL | DRSYM_DEMANGLE_PDB_TEMPLATES, flags)) {
/* We do this in place. */
if (detemplatize(dst, dst_sz, dst, &len) != DRSYM_SUCCESS) {
/* Revert to a raw copy */
UnDecorateSymbolName(mangled, dst, (DWORD)dst_sz, undec_flags);
return 0;
}
}
return len; /* Success. */
} else if (len == 0) {
/* The docs say the contents of dst are undetermined, so we cannot rely
* on it being truncated.
*/
strncpy(dst, mangled, dst_sz);
dst[dst_sz - 1] = '\0';
NOTIFY("UnDecorateSymbolName error %d\n", GetLastError());
} else if (len + 2 >= dst_sz) {
NOTIFY("UnDecorateSymbolName overflowed\n");
/* FIXME: This return value is made up and may not be large enough.
* It will work eventually if the caller reallocates their buffer
* and retries in a loop, or if they just want to detect truncation.
*/
len = dst_sz * 2;
}
return len;
}
typedef struct _enum_line_info_t {
drsym_enumerate_lines_cb cb;
void *data;
DWORD64 base;
drsym_error_t success;
} enum_line_info_t;
static BOOL CALLBACK
enum_lines_cb(PSRCCODEINFO in, void *data)
{
enum_line_info_t *enum_info = (enum_line_info_t *)data;
drsym_line_info_t out;
if (in->SizeOfStruct < sizeof(*in)) {
/* Old dbghelp or something: bail */
enum_info->success = DRSYM_ERROR_FEATURE_NOT_AVAILABLE;
return FALSE;
}
out.cu_name = in->Obj;
out.file = in->FileName;
out.line = in->LineNumber;
out.line_addr = (size_t)(in->Address - enum_info->base);
if (!(*enum_info->cb)(&out, enum_info->data))
return FALSE;
else
return TRUE;
}
static drsym_error_t
drsym_enumerate_lines_local(const char *modpath, drsym_enumerate_lines_cb callback,
void *data)
{
mod_entry_t *mod;
enum_line_info_t info;
if (modpath == NULL || callback == NULL)
return DRSYM_ERROR_INVALID_PARAMETER;
dr_recurlock_lock(symbol_lock);
mod = lookup_or_load(modpath, true /*use dbghelp*/);
if (mod == NULL) {
dr_recurlock_unlock(symbol_lock);
return DRSYM_ERROR_LOAD_FAILED;
}
recursive_context = true;
if (mod->use_pecoff_symtable) {
drsym_error_t symerr =
drsym_unix_enumerate_lines(mod->u.pecoff_data, callback, data);
recursive_context = false;
dr_recurlock_unlock(symbol_lock);
return symerr;
}
info.cb = callback;
info.data = data;
info.base = mod->u.load_base;
info.success = DRSYM_SUCCESS;
/* SymEnumSourceLines does not include compiler-provided files.
* We assume the caller wants all files, so we use SymEnumLines.
*/
if (!SymEnumLines(GetCurrentProcess(), mod->u.load_base, NULL /*all*/, NULL /*all*/,
enum_lines_cb, &info)) {
NOTIFY("SymEnumLines error %d\n", GetLastError());
info.success = DRSYM_ERROR_LINE_NOT_AVAILABLE;
}
recursive_context = false;
dr_recurlock_unlock(symbol_lock);
return info.success;
}
/***************************************************************************
* Dbghelp type information decoding routines.
*/
/* The initial size of our type hashtable used to avoid recursion */
#define TYPE_MAP_HASH_BITS 6
/* Common data passed among these routines */
typedef struct _type_query_t {
DWORD64 base;
mempool_t pool;
/* Hashtable for mapping type indices to type data structures, to avoid recursion. */
hashtable_t type_map_table;
} type_query_t;
static drsym_error_t
decode_type(type_query_t *query, ULONG type_idx, uint expand_sub,
drsym_type_t **type_out DR_PARAM_OUT);
static drsym_error_t
make_unknown(type_query_t *query, drsym_type_t **type_out DR_PARAM_OUT);
static bool
get_type_info(DWORD64 base, ULONG type_idx, IMAGEHLP_SYMBOL_TYPE_INFO property, void *arg)
{
bool r =
CAST_TO_bool(SymGetTypeInfo(GetCurrentProcess(), base, type_idx, property, arg));
if (verbose && !r) {
dr_fprintf(STDERR, "drsyms: Error %d getting property %d of type index %d\n",
GetLastError(), (int)property, (int)type_idx);
}
return r;
}
static drsym_error_t
decode_func_type(type_query_t *query, ULONG type_idx, uint expand_sub,
drsym_type_t **type_out DR_PARAM_OUT)
{
drsym_func_type_t *func_type;
DWORD arg_count;
TI_FINDCHILDREN_PARAMS *children = NULL;
uint i;
drsym_error_t r;
ULONG ret_type_idx;
bool expand = (expand_sub > 0);
if (expand)
expand_sub--;
if (!get_type_info(query->base, type_idx, TI_GET_CHILDRENCOUNT, &arg_count))
return DRSYM_ERROR;
if (expand && arg_count > 0) {
children = POOL_ALLOC_SIZE(&query->pool, TI_FINDCHILDREN_PARAMS,
(sizeof(*children) + arg_count * sizeof(ULONG)));
if (children == NULL)
return DRSYM_ERROR_NOMEM;
children->Count = arg_count;
children->Start = 0;
if (!get_type_info(query->base, type_idx, TI_FINDCHILDREN, children))
return DRSYM_ERROR;
}
func_type = POOL_ALLOC_SIZE(&query->pool, drsym_func_type_t, sizeof(*func_type));
if (func_type == NULL)
return DRSYM_ERROR_NOMEM;
func_type->type.kind = DRSYM_TYPE_FUNC;
func_type->type.size = 0; /* Not valid. */
func_type->type.id = type_idx;
func_type->num_args = arg_count;
if (!get_type_info(query->base, type_idx, TI_GET_TYPE, &ret_type_idx))
return DRSYM_ERROR;
r = decode_type(query, ret_type_idx, expand_sub, &func_type->ret_type);
if (r != DRSYM_SUCCESS)
return r;
if (expand && arg_count > 0) {
func_type->arg_types = POOL_ALLOC_SIZE(
&query->pool, drsym_type_t *, arg_count * sizeof(func_type->arg_types[0]));
if (func_type->arg_types == NULL)
return DRSYM_ERROR_NOMEM;
for (i = 0; i < children->Count; i++) {
r = decode_type(query, children->ChildId[i], expand_sub,
&func_type->arg_types[i]);
if (r != DRSYM_SUCCESS)
return r;
}
} else
func_type->arg_types = NULL;
*type_out = &func_type->type;
return DRSYM_SUCCESS;
}
static drsym_error_t
decode_ptr_type(type_query_t *query, ULONG type_idx, uint expand_sub,
drsym_type_t **type_out DR_PARAM_OUT)
{
drsym_ptr_type_t *ptr_type;
ULONG64 length;
ULONG elt_type_idx;
ptr_type = POOL_ALLOC(&query->pool, drsym_ptr_type_t);
if (ptr_type == NULL)
return DRSYM_ERROR_NOMEM;
ptr_type->type.kind = DRSYM_TYPE_PTR;
if (!get_type_info(query->base, type_idx, TI_GET_LENGTH, &length))
return DRSYM_ERROR;
ptr_type->type.size = (size_t)length;
ptr_type->type.id = type_idx;
if (!get_type_info(query->base, type_idx, TI_GET_TYPE, &elt_type_idx))
return DRSYM_ERROR;
*type_out = &ptr_type->type;
/* Tail call reduces stack usage. */
return decode_type(query, elt_type_idx, expand_sub, &ptr_type->elt_type);
}
static drsym_error_t
decode_base_type(type_query_t *query, ULONG type_idx, uint expand_sub,
drsym_type_t **type_out DR_PARAM_OUT)
{
DWORD base_type; /* BasicType */
bool is_signed;
ULONG64 length;
drsym_int_type_t *int_type;
if (!get_type_info(query->base, type_idx, TI_GET_BASETYPE, &base_type))
return DRSYM_ERROR;
/* See if this base type is an int and if it's signed. */
switch (base_type) {
case btChar: /* neither signed nor unsigned */
case btWChar:
case btUInt:
case btBool:
case btULong: is_signed = false; break;
case btInt:
case btLong: is_signed = true; break;
case btVoid: {
drsym_type_t *vtype = POOL_ALLOC(&query->pool, drsym_type_t);
if (vtype == NULL)
return DRSYM_ERROR_NOMEM;
vtype->kind = DRSYM_TYPE_VOID;
vtype->size = 0;
*type_out = vtype;
return DRSYM_SUCCESS;
}
default: return make_unknown(query, type_out);
}
if (!get_type_info(query->base, type_idx, TI_GET_LENGTH, &length))
return DRSYM_ERROR;
int_type = POOL_ALLOC(&query->pool, drsym_int_type_t);
if (int_type == NULL)
return DRSYM_ERROR_NOMEM;
int_type->type.kind = DRSYM_TYPE_INT;
int_type->type.size = (size_t)length;
int_type->type.id = type_idx;
int_type->is_signed = is_signed;
*type_out = &int_type->type;
return DRSYM_SUCCESS;
}
static drsym_error_t
decode_array_type(type_query_t *query, ULONG type_idx, uint expand_sub,
drsym_type_t **type_out DR_PARAM_OUT)
{
DWORD type_id; /* BasicType */
ULONG64 length;
drsym_ptr_type_t *array_type;
array_type = POOL_ALLOC(&query->pool, drsym_ptr_type_t);
if (array_type == NULL)
return DRSYM_ERROR_NOMEM;
/* get array length */
if (!get_type_info(query->base, type_idx, TI_GET_LENGTH, &length))
return DRSYM_ERROR;
array_type->type.size = (size_t)length;
array_type->type.kind = DRSYM_TYPE_ARRAY;
array_type->type.id = type_idx;
/* get basic type of array elements */
if (!get_type_info(query->base, type_idx, TI_GET_TYPEID, &type_id))
return DRSYM_ERROR;
*type_out = &array_type->type;
return decode_type(query, type_id, expand_sub, &array_type->elt_type);
}
static drsym_error_t
decode_typedef(type_query_t *query, ULONG type_idx, uint expand_sub,
drsym_type_t **type_out DR_PARAM_OUT)
{
/* Go through typedefs. */
ULONG base_type_idx;
if (!get_type_info(query->base, type_idx, TI_GET_TYPE, &base_type_idx))
return DRSYM_ERROR;
return decode_type(query, base_type_idx, expand_sub, type_out);
}
static drsym_error_t
decode_arg_type(type_query_t *query, ULONG type_idx, uint expand_sub,
drsym_type_t **type_out DR_PARAM_OUT)
{
ULONG base_type_idx;
if (!get_type_info(query->base, type_idx, TI_GET_TYPE, &base_type_idx))
return DRSYM_ERROR;
if (base_type_idx == type_idx)
return DRSYM_ERROR;
return decode_type(query, base_type_idx, expand_sub, type_out);
}
static drsym_error_t
decode_compound_type(type_query_t *query, ULONG type_idx, uint expand_sub,
drsym_type_t **type_out DR_PARAM_OUT)
{
drsym_compound_type_t *compound_type;
DWORD field_count;
TI_FINDCHILDREN_PARAMS *children = NULL;
uint i;
drsym_error_t r;
ULONG64 length;
wchar_t *name;
bool expand = (expand_sub > 0);
if (expand)
expand_sub--;
if (!get_type_info(query->base, type_idx, TI_GET_CHILDRENCOUNT, &field_count))
return DRSYM_ERROR;
if (expand && field_count > 0) {
children = POOL_ALLOC_SIZE(&query->pool, TI_FINDCHILDREN_PARAMS,
sizeof(*children) + field_count * sizeof(ULONG));
if (children == NULL)
return DRSYM_ERROR_NOMEM;
children->Count = field_count;
children->Start = 0;
if (!get_type_info(query->base, type_idx, TI_FINDCHILDREN, children))
return DRSYM_ERROR;
}
compound_type =
POOL_ALLOC_SIZE(&query->pool, drsym_compound_type_t, sizeof(*compound_type));
if (compound_type == NULL)
return DRSYM_ERROR_NOMEM;
/* XXX: no idea how to distinguish class from struct from union.
* DWARF2 has separates types for those, but I guess we do the LCD here.
*/
compound_type->type.kind = DRSYM_TYPE_COMPOUND;
if (!get_type_info(query->base, type_idx, TI_GET_LENGTH, &length))
return DRSYM_ERROR;
compound_type->type.size = (size_t)length;
compound_type->type.id = type_idx;
compound_type->num_fields = field_count;
/* Since Linux will have char*, for simpler cross-platform code we
* convert dbghelp's wchar_t here.
*/
if (!get_type_info(query->base, type_idx, TI_GET_SYMNAME, &name))
return DRSYM_ERROR;
compound_type->name =
POOL_ALLOC_SIZE(&query->pool, char, (wcslen(name) + 1) * sizeof(char));
if (compound_type->name == NULL) {
LocalFree(name);
return DRSYM_ERROR;
}
_snprintf(compound_type->name, wcslen(name) + 1, "%S", name);
/* Docs aren't very clear, but online examples use LocalFree, and new
* redirection of LocalAlloc proves it.
*/
LocalFree(name);
if (expand && field_count > 0) {
compound_type->field_types =
POOL_ALLOC_SIZE(&query->pool, drsym_type_t *,
field_count * sizeof(compound_type->field_types[0]));
if (compound_type->field_types == NULL)
return DRSYM_ERROR_NOMEM;
for (i = 0; i < children->Count; i++) {
r = decode_type(query, children->ChildId[i], expand_sub,
&compound_type->field_types[i]);
if (r != DRSYM_SUCCESS)
return r;
}
} else
compound_type->field_types = NULL;
*type_out = &compound_type->type;
return DRSYM_SUCCESS;
}
static drsym_error_t
make_unknown(type_query_t *query, drsym_type_t **type_out DR_PARAM_OUT)
{
drsym_type_t *type = POOL_ALLOC(&query->pool, drsym_type_t);
if (type == NULL)
return DRSYM_ERROR_NOMEM;
type->kind = DRSYM_TYPE_OTHER;
type->size = 0;
type->id = 0;
*type_out = type;
return DRSYM_SUCCESS;
}
/* Return an error code or success, store a pointer to the type created in
* *type_out.
*/
static drsym_error_t
decode_type(type_query_t *query, ULONG type_idx, uint expand_sub,
drsym_type_t **type_out DR_PARAM_OUT)
{
DWORD tag; /* SymTagEnum */
drsym_error_t res = DRSYM_ERROR;
/* Avoid recursion.
* We assume that either this hashtable is local to this query, or
* that the caller holds a big lock. Thus we can
* reference hashtable data after the lookup
*/
drsym_type_t *recurse = (drsym_type_t *)hashtable_lookup(
&query->type_map_table, (void *)(ptr_uint_t)type_idx);
if (recurse != NULL) {
*type_out = recurse;
return DRSYM_SUCCESS;
}
if (!get_type_info(query->base, type_idx, TI_GET_SYMTAG, &tag) ||
/* DrMem i#1255: this means "no type info" so let's turn it into an error */
tag == SymTagNull) {
return DRSYM_ERROR;
}
if (verbose) {
switch (tag) {
case SymTagFunctionType: dr_fprintf(STDERR, "SymTagFunctionType\n"); break;
case SymTagPointerType: dr_fprintf(STDERR, "SymTagPointerType\n"); break;
case SymTagBaseType: dr_fprintf(STDERR, "SymTagBaseType\n"); break;
case SymTagTypedef: dr_fprintf(STDERR, "SymTagTypedef\n"); break;
case SymTagFunctionArgType: dr_fprintf(STDERR, "SymTagFunctionArgType\n"); break;
case SymTagUDT: dr_fprintf(STDERR, "SymTagUDT\n"); break;
case SymTagData: dr_fprintf(STDERR, "SymTagData\n"); break;
case SymTagFunction: dr_fprintf(STDERR, "SymTagFunction\n"); break;
default: dr_fprintf(STDERR, "unknown: %d\n", tag);
}
}
switch (tag) {
case SymTagFunctionType:
res = decode_func_type(query, type_idx, expand_sub, type_out);
break;
case SymTagPointerType:
res = decode_ptr_type(query, type_idx, expand_sub, type_out);
break;
case SymTagArrayType:
res = decode_array_type(query, type_idx, expand_sub, type_out);
break;
case SymTagBaseType:
res = decode_base_type(query, type_idx, expand_sub, type_out);
break;
case SymTagTypedef:
res = decode_typedef(query, type_idx, expand_sub, type_out);
break;
case SymTagFunctionArgType:
res = decode_arg_type(query, type_idx, expand_sub, type_out);
break;
case SymTagUDT:
res = decode_compound_type(query, type_idx, expand_sub, type_out);
break;
/* Using decode_arg_type() b/c we just need to do a further query: */
case SymTagFunction:
res = decode_arg_type(query, type_idx, expand_sub, type_out);
break;
case SymTagData: res = decode_arg_type(query, type_idx, expand_sub, type_out); break;
default: res = make_unknown(query, type_out); break;
}
if (res == DRSYM_SUCCESS)
hashtable_add(&query->type_map_table, (void *)(ptr_uint_t)type_idx, *type_out);
return res;
}
/***************************************************************************
* Exported routines.
*/
DR_EXPORT
drsym_error_t
drsym_lookup_address(const char *modpath, size_t modoffs,
drsym_info_t *out DR_PARAM_INOUT, uint flags)
{
if (IS_SIDELINE) {
return DRSYM_ERROR_NOT_IMPLEMENTED;
} else {
return drsym_lookup_address_local(modpath, modoffs, out, flags);
}
}
DR_EXPORT
drsym_error_t
drsym_lookup_symbol(const char *modpath, const char *symbol, size_t *modoffs DR_PARAM_OUT,
uint flags)
{
if (IS_SIDELINE) {
return DRSYM_ERROR_NOT_IMPLEMENTED;
} else {
return drsym_lookup_symbol_local(modpath, symbol, modoffs, flags);
}
}
DR_EXPORT
drsym_error_t
drsym_enumerate_symbols(const char *modpath, drsym_enumerate_cb callback, void *data,
uint flags)
{
if (IS_SIDELINE) {
return DRSYM_ERROR_NOT_IMPLEMENTED;
} else {
return drsym_enumerate_symbols_local(modpath, NULL, callback, NULL,
sizeof(drsym_info_t), data, flags);
}
}
DR_EXPORT
drsym_error_t
drsym_enumerate_symbols_ex(const char *modpath, drsym_enumerate_ex_cb callback,
size_t info_size, void *data, uint flags)
{
if (IS_SIDELINE) {
return DRSYM_ERROR_NOT_IMPLEMENTED;
} else {
return drsym_enumerate_symbols_local(modpath, NULL, NULL, callback, info_size,
data, flags);
}
}
DR_EXPORT
drsym_error_t
drsym_search_symbols(const char *modpath, const char *match, bool full,
drsym_enumerate_cb callback, void *data)
{
if (IS_SIDELINE) {
return DRSYM_ERROR_NOT_IMPLEMENTED;
} else {
return drsym_search_symbols_local(
modpath, match, (full ? DRSYM_FULL_SEARCH : 0) | DRSYM_DEFAULT_FLAGS,
callback, NULL, sizeof(drsym_info_t), data);
}
}
DR_EXPORT
drsym_error_t
drsym_search_symbols_ex(const char *modpath, const char *match, uint flags_in,
drsym_enumerate_ex_cb callback, size_t info_size, void *data)
{
if (IS_SIDELINE) {
return DRSYM_ERROR_NOT_IMPLEMENTED;
} else {
uint flags;
/* Compatibility check (xref i#1350): prior to adding the
* flags field, this routine took "bool full" instead of "uint
* flags".
*/
if (info_size == offsetof(drsym_info_t, flags)) {
bool full = (bool)flags_in;
flags = (full ? DRSYM_FULL_SEARCH : 0) | DRSYM_DEFAULT_FLAGS;
} else
flags = flags_in;
return drsym_search_symbols_local(modpath, match, flags, NULL, callback,
info_size, data);
}
}
DR_EXPORT
size_t
drsym_demangle_symbol(char *dst DR_PARAM_OUT, size_t dst_sz, const char *mangled,
uint flags)
{
size_t r;
dr_recurlock_lock(symbol_lock);
/* Assume dbghelp is what we want unless it's Itanium "_Z" style */
if (mangled[0] == '_' && mangled[1] == 'Z')
r = drsym_unix_demangle_symbol(dst, dst_sz, mangled, flags);
else
r = demangle_symbol(dst, dst_sz, mangled, flags);
dr_recurlock_unlock(symbol_lock);
return r;
}
/* The routine returns type info in drsym_type_t structure by type_id and
* expands subtypes. The caller can pass 0 in levels_to_expand arg to avoid
* subtypes expanding. The caller should lock symbol_lock before call this routine.
* Returns DRSYM_ERROR_SUCCESS or error.
*/
static drsym_error_t
drsym_get_type_by_id(mod_entry_t *mod, uint type_id, uint levels_to_expand, char *buf,
size_t buf_sz, drsym_type_t **type DR_PARAM_OUT)
{
type_query_t query;
drsym_error_t r;
/* Check that caller locked symbol_lock */
ASSERT(dr_recurlock_self_owns(symbol_lock),
"drsym_get_type_by_id called without symbol lock");
/* Prevent recursion by recording index to pointer mappings.
* We could perhaps try to stick this in buf but given the symbol_lock
* it seems simpler to just use a static data structure.
*/
hashtable_init_ex(&query.type_map_table, TYPE_MAP_HASH_BITS, HASH_INTPTR,
false /*!strdup*/, false /*!synch*/, NULL, NULL, NULL);
pool_init(&query.pool, buf, buf_sz);
query.base = mod->u.load_base;
r = decode_type(&query, type_id, levels_to_expand, type);
hashtable_delete(&query.type_map_table);
return r;
}
/* Shared path for lookup and expansion. have_type_id specifies which of
* modoffs and type_id should be used.
*/
static drsym_error_t
drsym_get_type_common(const char *modpath, bool have_type_id, size_t modoffs,
uint type_id, uint levels_to_expand, char *buf, size_t buf_sz,
drsym_type_t **expanded_type DR_PARAM_OUT)
{
mod_entry_t *mod;
drsym_error_t r;
PSYMBOL_INFO info;
if (modpath == NULL || buf == NULL || expanded_type == NULL)
return DRSYM_ERROR_INVALID_PARAMETER;
dr_recurlock_lock(symbol_lock);
mod = lookup_or_load(modpath, true /*use dbghelp*/);
if (mod == NULL) {
dr_recurlock_unlock(symbol_lock);
return DRSYM_ERROR_LOAD_FAILED;
}
if (mod->use_pecoff_symtable) {
drsym_error_t symerr = drsym_unix_expand_type(
mod->u.pecoff_data, type_id, levels_to_expand, buf, buf_sz, expanded_type);
dr_recurlock_unlock(symbol_lock);
return symerr;
}
if (!have_type_id) {
/* XXX: For a perf boost, we could expose the concept of a
* cursor/handle/index/whatever that refers to a given symbol and skip this
* address lookup. DWARF should have a similar construct we could expose.
* However, that would break backwards compat. We assume that the client is
* not a debugger, and that they just want type info for a handful of
* interesting symbols. Therefore we can afford the overhead of the address
* lookup.
* Update: there doesn't seem to be any "handle" exposed by dbghelp.
* But we do have the type_id in drsym_info_t, which avoids this lookup
* by allowing most callers to use drsym_expand_type().
*/
info = alloc_symbol_info();
if (SymFromAddr(GetCurrentProcess(), mod->u.load_base + modoffs, NULL, info)) {
type_id = info->TypeIndex;
} else {
NOTIFY("SymFromAddr error %d\n", GetLastError());
free_symbol_info(info);
dr_recurlock_unlock(symbol_lock);
return DRSYM_ERROR_SYMBOL_NOT_FOUND;
}
free_symbol_info(info);
}
r = drsym_get_type_by_id(mod, type_id, levels_to_expand, buf, buf_sz, expanded_type);
dr_recurlock_unlock(symbol_lock);
return r;
}
DR_EXPORT
drsym_error_t
drsym_get_type(const char *modpath, size_t modoffs, uint levels_to_expand, char *buf,
size_t buf_sz, drsym_type_t **type DR_PARAM_OUT)
{
return drsym_get_type_common(modpath, false /*need to look up type index*/, modoffs,
0, levels_to_expand, buf, buf_sz, type);
}
DR_EXPORT
drsym_error_t
drsym_get_type_by_name(const char *modpath, const char *type_name, char *buf,
size_t buf_sz, drsym_type_t **type DR_PARAM_OUT)
{
mod_entry_t *mod;
drsym_error_t r;
PSYMBOL_INFO info;
if (modpath == NULL || type_name == NULL || type == NULL)
return DRSYM_ERROR_INVALID_PARAMETER;
dr_recurlock_lock(symbol_lock);
mod = lookup_or_load(modpath, true /*use dbghelp*/);
if (mod == NULL) {
dr_recurlock_unlock(symbol_lock);
return DRSYM_ERROR_LOAD_FAILED;
}
if (mod->use_pecoff_symtable) {
/* The function supports only PDB lookup. */
dr_recurlock_unlock(symbol_lock);
return DRSYM_ERROR_NOT_IMPLEMENTED;
}
info = alloc_symbol_info();
if (SymGetTypeFromName(GetCurrentProcess(), mod->u.load_base, type_name, info)) {
r = drsym_get_type_by_id(mod, info->TypeIndex, 0, buf, buf_sz, type);
} else {
NOTIFY("SymGetTypeFromName error %d\n", GetLastError());
r = DRSYM_ERROR_SYMBOL_NOT_FOUND;
}
free_symbol_info(info);
dr_recurlock_unlock(symbol_lock);
return r;
}
DR_EXPORT
drsym_error_t
drsym_get_func_type(const char *modpath, size_t modoffs, char *buf, size_t buf_sz,
drsym_func_type_t **func_type DR_PARAM_OUT)
{
/* Expand the function args, but none of the child function
* or compound types.
*/
drsym_error_t r =
drsym_get_type(modpath, modoffs, 1, buf, buf_sz, (drsym_type_t **)func_type);
if (r == DRSYM_SUCCESS && (*func_type)->type.kind != DRSYM_TYPE_FUNC)
return DRSYM_ERROR;
return r;
}
/* XXX: We assume that type indices will not change across an unload-reload
* of a symbol file. Even if these are indices into dbghelp internal data
* structures, we assume that those are constructed deterministically.
* If not, we'll need some other way for the user to expand types than by
* passing back just an index: a multi-call sequence where the lock is
* held or something.
*/
DR_EXPORT
drsym_error_t
drsym_expand_type(const char *modpath, uint type_id, uint levels_to_expand, char *buf,
size_t buf_sz, drsym_type_t **expanded_type DR_PARAM_OUT)
{
return drsym_get_type_common(modpath, true /*have type index*/, 0, type_id,
levels_to_expand, buf, buf_sz,
(drsym_type_t **)expanded_type);
}
DR_EXPORT
drsym_error_t
drsym_get_module_debug_kind(const char *modpath, drsym_debug_kind_t *kind DR_PARAM_OUT)
{
if (IS_SIDELINE) {
return DRSYM_ERROR_NOT_IMPLEMENTED;
} else {
mod_entry_t *mod;
drsym_error_t r;
if (modpath == NULL || kind == NULL)
return DRSYM_ERROR_INVALID_PARAMETER;
dr_recurlock_lock(symbol_lock);
mod = lookup_or_load(modpath, true /*use dbghelp*/);
if (mod == NULL) {
r = DRSYM_ERROR_LOAD_FAILED;
} else if (mod->use_pecoff_symtable) {
r = drsym_unix_get_module_debug_kind(mod->u.pecoff_data, kind);
} else {
if (query_available(GetCurrentProcess(), mod->u.load_base, kind)) {
r = DRSYM_SUCCESS;
} else {
r = DRSYM_ERROR;
}
}
dr_recurlock_unlock(symbol_lock);
return r;
}
}
DR_EXPORT
drsym_error_t
drsym_module_has_symbols(const char *modpath)
{
if (IS_SIDELINE) {
return DRSYM_ERROR_NOT_IMPLEMENTED;
} else {
mod_entry_t *mod;
drsym_error_t r;
drsym_debug_kind_t kind;
/* dbghelp.dll 6.3+ is required for SymGetSymbolFile, but the VS2005sp1
* headers and lib have only 6.1, so we dynamically look it up
*/
static func_SymGetSymbolFileW_t func;
if (modpath == NULL)
return DRSYM_ERROR_INVALID_PARAMETER;
dr_recurlock_lock(symbol_lock);
/* Unfortunately we have to load the file and check whether it's
* PECOFF but our load is faster than dbghelp's load
*/
mod = lookup_or_load(modpath, false /*!use dbghelp*/);
if (mod == NULL) {
r = DRSYM_ERROR_LOAD_FAILED;
} else if (!mod->use_pecoff_symtable) {
if (func == NULL) {
/* if we fail to find it we'll pay the lookup cost every time,
* but if we succeed we'll cache it
*/
HMODULE hmod = GetModuleHandle("dbghelp.dll");
if (hmod != NULL) {
func = (func_SymGetSymbolFileW_t)GetProcAddress(hmod,
"SymGetSymbolFileW");
}
}
if (func != NULL) {
/* more efficient than fully loading the pdb */
static wchar_t pdb_name[MAXIMUM_PATH];
static wchar_t pdb_path[MAXIMUM_PATH];
wchar_t wmodpath[MAXIMUM_PATH];
/* UTF-8 to wide string. */
dr_snwprintf(wmodpath, BUFFER_SIZE_ELEMENTS(wmodpath), L"%S", modpath);
NULL_TERMINATE_BUFFER(wmodpath);
/* i#917: sfPdb is not in VS2005's dbghelp.h. Unfortunately it's
* an enum so we can't test whether it's defined, so we
* override it and assume its value will not change (unlikely
* since that would break binary compatibility).
*/
#define sfPdb 2
if ((*func)(GetCurrentProcess(), NULL, wmodpath, sfPdb, pdb_name,
BUFFER_SIZE_ELEMENTS(pdb_name), pdb_path,
BUFFER_SIZE_ELEMENTS(pdb_path))) {
/* If we ever use the name/path, note that path seems to be
* empty while name has the full path (the docs seem to
* imply the opposite).
*/
r = DRSYM_SUCCESS;
} else {
r = DRSYM_ERROR;
}
dr_recurlock_unlock(symbol_lock);
return r;
}
}
dr_recurlock_unlock(symbol_lock);
/* fall back to slower lookup */
r = drsym_get_module_debug_kind(modpath, &kind);
if (r == DRSYM_SUCCESS && !TEST(DRSYM_SYMBOLS, kind))
r = DRSYM_ERROR;
return r;
}
}
/* We do not want to take unlimited resources when a client queries a whole
* bunch of libraries. Usually the client will query at module load and
* then not again, unless in a callstack later. So we can save a lot of memory
* (hundreds of MB) by unloading then. xref DrMem i#982.
*
* XXX i#449: while too-frequent internal GC can result in repeated
* loading and re-loading for callstacks or other symbol queries
* during execution and can result in fragmentation and a failure to
* load symbols later, we probably do want some kind of internal GC.
* If we keep the frequency not too high should be ok wrt
* fragmentation. Perhaps just hashtable_clear() every time it hits
* 25 modules or sthg.
*/
DR_EXPORT
drsym_error_t
drsym_free_resources(const char *modpath)
{
if (IS_SIDELINE) {
return DRSYM_ERROR_NOT_IMPLEMENTED;
} else {
bool found;
if (modpath == NULL)
return DRSYM_ERROR_INVALID_PARAMETER;
/* unsafe to free during iteration */
if (recursive_context)
return DRSYM_ERROR_RECURSIVE;
dr_recurlock_lock(symbol_lock);
found = hashtable_remove(&modtable, (void *)modpath);
dr_recurlock_unlock(symbol_lock);
return (found ? DRSYM_SUCCESS : DRSYM_ERROR);
}
}
DR_EXPORT
drsym_error_t
drsym_enumerate_lines(const char *modpath, drsym_enumerate_lines_cb callback, void *data)
{
if (IS_SIDELINE) {
return DRSYM_ERROR_NOT_IMPLEMENTED;
} else {
return drsym_enumerate_lines_local(modpath, callback, data);
}
}