blob: a4e2e979015ba076d5aa3f67a8bf19945f185d09 [file] [log] [blame]
/* **********************************************************
* Copyright (c) 2012-2024 Google, Inc. All rights reserved.
* **********************************************************/
/*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* * Neither the name of VMware, Inc. nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
/* DRSyms DynamoRIO Extension */
/* Symbol lookup for DWARF format on Windows
* (generated by cygwin or mingw gcc)
*
* We use the PECOFF symbol table for symbol and address lookup.
* XXX: we do not look at exports separately: we assume they're in the table,
* unless the symbol table is stripped (i#1395).
*
* For line numbers, we use DWARF info if available.
*/
#ifdef WINDOWS
# define _CRT_SECURE_NO_DEPRECATE 1
#endif
#include "dr_api.h"
#include "drsyms.h"
#include "drsyms_private.h"
#include "drsyms_obj.h"
#include "dwarf.h"
#include "libdwarf.h"
#include <windows.h>
#include <stdio.h> /* sscanf */
#include <stdlib.h> /* qsort */
/* For debugging */
static uint verbose = 0;
#undef NOTIFY
#define NOTIFY(n, ...) \
do { \
if (verbose >= (n)) { \
dr_fprintf(STDERR, __VA_ARGS__); \
} \
} while (0)
#define NOTIFY_DWARF(de) \
do { \
if (verbose) { \
dr_fprintf(STDERR, "drsyms: Dwarf error: %s\n", dwarf_errmsg(de)); \
} \
} while (0)
/* MS tools use this value insted of the others */
#define IMAGE_SYM_TYPE_FUNCTION 0x20
typedef struct _export_info_t {
const char *name;
app_pc addr;
} export_info_t;
typedef struct _pecoff_data_t {
byte *map_base;
size_t map_size;
byte *preferred_base;
bool is_64;
IMAGE_SYMBOL *symbol_table;
uint symbol_count;
const char *string_table;
/* array of symbols sorted by address */
IMAGE_SYMBOL **sorted_syms;
uint sorted_count;
/* array of section bases */
size_t *section_base;
uint section_count;
/* stored section info */
drsym_debug_kind_t debug_kind;
byte *debuglink;
/* i#1395: handle stripped MinGW */
bool exports_only;
export_info_t *sorted_exports;
} pecoff_data_t;
/* We synchronize all our operations but we assume the outer drsyms_windows
* grabs the lock before calling routines here
*/
static void
drsym_pecoff_sort_symbols(pecoff_data_t *mod);
static bool
drsym_pecoff_sort_exports(pecoff_data_t *mod);
/******************************************************************************
* Init and exit
*/
void
drsym_obj_init(void)
{
}
/* The string may not be null-terminated so we return the max size */
static const char *
drsym_pecoff_get_section_name(pecoff_data_t *mod, IMAGE_SECTION_HEADER *sec,
size_t *max_size)
{
if (sec->Name[0] == '/') {
/* "/N" where N is index into string table for >8-char name */
uint index;
if (sscanf((char *)sec->Name + 1, "%u", &index) == 1) {
const char *name = mod->string_table + index;
if (max_size != NULL)
*max_size = strlen(name);
return name;
}
}
if (max_size != NULL)
*max_size = sizeof(sec->Name);
return (const char *)sec->Name;
}
void *
drsym_obj_mod_init_pre(byte *map_base, size_t map_size)
{
IMAGE_DOS_HEADER *dos = (IMAGE_DOS_HEADER *)map_base;
IMAGE_NT_HEADERS *nt;
IMAGE_SECTION_HEADER *sec;
uint i;
pecoff_data_t *mod;
bool is_mingw = false;
if (dos->e_magic != IMAGE_DOS_SIGNATURE)
return NULL;
nt = (IMAGE_NT_HEADERS *)(((ptr_uint_t)dos) + dos->e_lfanew);
if (nt == NULL || nt->Signature != IMAGE_NT_SIGNATURE)
return NULL;
mod = dr_global_alloc(sizeof(*mod));
memset(mod, 0, sizeof(*mod));
mod->map_base = map_base;
mod->map_size = map_size;
mod->symbol_table = (IMAGE_SYMBOL *)(map_base + nt->FileHeader.PointerToSymbolTable);
mod->symbol_count = nt->FileHeader.NumberOfSymbols;
NOTIFY(1, "%s: mapped @" PFX " w/ %d symbols\n", __FUNCTION__, map_base,
mod->symbol_count);
/* String table immediately follows symbol table */
mod->string_table = ((const char *)mod->symbol_table) +
(nt->FileHeader.NumberOfSymbols * sizeof(IMAGE_SYMBOL));
if (mod->symbol_count > 0)
mod->debug_kind |= DRSYM_SYMBOLS | DRSYM_PECOFF_SYMTAB;
mod->is_64 = (nt->OptionalHeader.Magic == IMAGE_NT_OPTIONAL_HDR64_MAGIC);
mod->preferred_base =
(byte *)(ptr_uint_t)(mod->is_64
? ((IMAGE_OPTIONAL_HEADER64 *)(&nt->OptionalHeader))
->ImageBase
: nt->OptionalHeader.ImageBase);
/* We sort the symbols only once we know the time spent is worth it in init_post */
mod->section_count = nt->FileHeader.NumberOfSections;
mod->section_base =
(size_t *)dr_global_alloc(mod->section_count * sizeof(*mod->section_base));
sec = IMAGE_FIRST_SECTION(nt);
for (i = 0; i < nt->FileHeader.NumberOfSections; i++, sec++) {
size_t name_maxsz;
const char *secname = drsym_pecoff_get_section_name(mod, sec, &name_maxsz);
NOTIFY(2, "%s: %.*s\n", __FUNCTION__, name_maxsz, secname);
if (strncmp(secname, ".debug_line", name_maxsz) == 0) {
mod->debug_kind |= DRSYM_LINE_NUMS | DRSYM_DWARF_LINE;
}
if (strncmp(secname, ".gnu_debuglink", name_maxsz) == 0) {
mod->debuglink = sec->PointerToRawData + mod->map_base;
}
/* i#1395: heuristic to identify MinGW stripped libraries */
if (strncmp(secname, ".eh_frame", name_maxsz) == 0 ||
strncmp(secname, ".CRT", name_maxsz) == 0) {
is_mingw = true;
}
mod->section_base[i] = sec->VirtualAddress;
}
/* i#1395: since dbghelp does not handle MinGW exports properly (strips leading
* underscore and does not demangle) we handle them ourselves.
*
* XXX: it might be better to check for presence of a PDB first, just in case
* our heuristics match non-MinGW -- but that's a little awkward to arrange
* with the current code setup.
*/
if (is_mingw &&
TEST(IMAGE_FILE_LOCAL_SYMS_STRIPPED, nt->FileHeader.Characteristics) &&
mod->symbol_count == 0) {
mod->exports_only = true;
NOTIFY(1, "%s: no pecoff symbols and likely no pdb, so using exports\n",
__FUNCTION__);
}
return (void *)mod;
}
bool
drsym_obj_remap_as_image(void *mod_in)
{
pecoff_data_t *mod = (pecoff_data_t *)mod_in;
return mod->exports_only;
}
bool
drsym_obj_mod_init_post(void *mod_in, byte *map_base, void *dwarf_info)
{
pecoff_data_t *mod = (pecoff_data_t *)mod_in;
/* Now that we know we're using this for sure, do any heavyweight init */
/* We bail and go to dbghelp if there are no symbols in the pecoff
* symbol table.
* XXX i#672: there may still be dwarf2 or stabs sections even if the
* symtable is stripped and we could do symbol lookup via dwarf2
*/
if (mod->symbol_count == 0 && !mod->exports_only) {
NOTIFY(1, "%s: no pecoff symbols\n", __FUNCTION__);
return false;
}
mod->map_base = map_base;
if (mod->exports_only)
return drsym_pecoff_sort_exports(mod);
else
drsym_pecoff_sort_symbols(mod);
return true;
}
bool
drsym_obj_dwarf_init(void *mod_in, dwarf_lib_handle_t *dbg)
{
pecoff_data_t *mod = (pecoff_data_t *)mod_in;
Dwarf_Error de; /* expensive to init (DrM#1770) */
if (mod == NULL)
return false;
if (dwarf_pecoff_init(mod->map_base, DW_DLC_READ, NULL, NULL, dbg, &de) !=
DW_DLV_OK) {
NOTIFY_DWARF(de);
return false;
}
return true;
}
void
drsym_obj_mod_exit(void *mod_in)
{
pecoff_data_t *mod = (pecoff_data_t *)mod_in;
if (mod->section_base != NULL) {
dr_global_free(mod->section_base,
mod->section_count * sizeof(*mod->section_base));
}
if (mod->sorted_syms != NULL)
dr_global_free(mod->sorted_syms, mod->symbol_count * sizeof(*mod->sorted_syms));
if (mod->sorted_exports != NULL) {
dr_global_free(mod->sorted_exports,
mod->sorted_count * sizeof(*mod->sorted_exports));
}
dr_global_free(mod, sizeof(*mod));
}
/******************************************************************************
* Lookup routines
*/
drsym_debug_kind_t
drsym_obj_info_avail(void *mod_in)
{
pecoff_data_t *mod = (pecoff_data_t *)mod_in;
return mod->debug_kind;
}
byte *
drsym_obj_load_base(void *mod_in)
{
pecoff_data_t *mod = (pecoff_data_t *)mod_in;
return mod->preferred_base;
}
/* Return the path contained in the .gnu_debuglink section or NULL if we cannot
* find it.
*/
const char *
drsym_obj_debuglink_section(void *mod_in, const char *modpath)
{
pecoff_data_t *mod = (pecoff_data_t *)mod_in;
return (const char *)mod->debuglink;
}
/* caller holds lock */
static const char *
drsym_pecoff_symbol_name(pecoff_data_t *mod, IMAGE_SYMBOL *sym)
{
static char buf[sizeof(sym->N.ShortName) + 1];
const char *name;
size_t name_sz;
if (sym->N.Name.Short == 0) {
/* longer than 8 chars, so index into string table */
name = mod->string_table + sym->N.Name.Long;
name_sz = strlen(name);
} else {
const char *c;
name = (const char *)sym->N.ShortName;
/* not null-terminated if 8 chars. caller holds lock so we can
* use a static buffer to add a NULL, which caller requires.
*/
for (c = name; c < name + sizeof(sym->N.ShortName); c++) {
if (*c == '\0')
break;
}
if (c == name + sizeof(sym->N.ShortName)) {
memcpy(buf, name, sizeof(sym->N.ShortName));
NULL_TERMINATE_BUFFER(buf);
name = buf;
}
}
#ifndef X64
/* XXX: all 32-bit pecoff symtables I've seen have leading underscores,
* which we drop here. Is this always the case? It's true for Cygwin
* gcc 3.4.4 and MinGW gcc 4.6.1. It's NOT true for MinGWx64 gcc 4.7.0
* but there's no 4.7 32-bit so I'm not sure whether the next MinGW
* release is going to break us. We at least don't remove from "_Z"
* so we'll work w/ mangled names (though there could be a C name that
* starts w/ Z that was added by the linker and should start w/ _?)
*/
if (name[0] == '_' && name[1] != 'Z')
name++;
#endif
return name;
}
static int
compare_symbols(const void *a_in, const void *b_in)
{
const IMAGE_SYMBOL *a = *(const IMAGE_SYMBOL **)a_in;
const IMAGE_SYMBOL *b = *(const IMAGE_SYMBOL **)b_in;
/* sections must be ascending order, according to pecoff_v8.doc.
* if <= 0 we want those first anyway
*/
if (a->SectionNumber > b->SectionNumber)
return 1;
if (a->SectionNumber < b->SectionNumber)
return -1;
if (a->Value > b->Value)
return 1;
if (a->Value < b->Value)
return -1;
/* sort the section name entries that sometimes have same Value as func */
if (a->Type > b->Type)
return 1;
if (a->Type < b->Type)
return -1;
return 0;
}
/* Creates a sorted array of IMAGE_SYMBOL* entries that we can use for address lookup
* and for simpler iteration with no gaps from aux entries
*/
static void
drsym_pecoff_sort_symbols(pecoff_data_t *mod)
{
IMAGE_SYMBOL *sym = mod->symbol_table;
uint i;
uint aux_skip = 0;
/* symbol count includes aux entries so it's an over-count but it's not worth
* doing a separate pass to count, or re-allocating
*/
mod->sorted_syms =
(IMAGE_SYMBOL **)dr_global_alloc(mod->symbol_count * sizeof(*mod->sorted_syms));
mod->sorted_count = 0;
for (i = 0; i < mod->symbol_count; i++, sym++) {
if (aux_skip > 0) {
aux_skip--;
continue;
}
/* just skip if invalid entry */
if (sym->SectionNumber != IMAGE_SYM_UNDEFINED)
mod->sorted_syms[mod->sorted_count++] = sym;
/* aux entries just have more info on same symbol so skip */
aux_skip = sym->NumberOfAuxSymbols;
}
/* XXX: for now using ntdll.dll/libc qsort. We could put qsort sources
* into DR if we want more lib independence or are worried about the lib
* impl calling some non-re-entrant routine: though unlikely as this is
* an in-place sort and the BSD and glibc impls are self-contained.
*/
qsort(mod->sorted_syms, mod->sorted_count, sizeof(IMAGE_SYMBOL *), compare_symbols);
if (verbose >= 3) {
NOTIFY(3, "%s:\n", __FUNCTION__);
for (i = 0; i < mod->sorted_count; i++) {
sym = mod->sorted_syms[i];
NOTIFY(3, " #%d: %-20s Value=0x%x, Sec=%d, Type=%d, Storage=%d, #aux=%d\n",
i, drsym_pecoff_symbol_name(mod, sym), sym->Value, sym->SectionNumber,
sym->Type, sym->StorageClass, sym->NumberOfAuxSymbols);
}
}
}
uint
drsym_obj_num_symbols(void *mod_in)
{
pecoff_data_t *mod = (pecoff_data_t *)mod_in;
if (mod == NULL)
return 0;
return mod->sorted_count;
}
/* caller holds lock */
const char *
drsym_obj_symbol_name(void *mod_in, uint idx)
{
IMAGE_SYMBOL *sym;
pecoff_data_t *mod = (pecoff_data_t *)mod_in;
if (mod == NULL || idx >= mod->sorted_count)
return NULL;
if (mod->exports_only)
return mod->sorted_exports[idx].name;
/* index is into sorted_syms to avoid aux entries */
sym = mod->sorted_syms[idx];
return drsym_pecoff_symbol_name(mod, sym);
}
static drsym_error_t
drsym_pecoff_symbol_offs(pecoff_data_t *mod, IMAGE_SYMBOL *sym, size_t *offs DR_PARAM_OUT)
{
/* SectionNumber is 1-based */
if (offs == NULL)
return DRSYM_ERROR_INVALID_PARAMETER;
if (sym->SectionNumber > 0 && (uint)sym->SectionNumber <= mod->section_count)
*offs = sym->Value + mod->section_base[sym->SectionNumber - 1];
else if (sym->SectionNumber == IMAGE_SYM_ABSOLUTE ||
sym->SectionNumber == IMAGE_SYM_DEBUG) {
/* No offset */
*offs = 0;
/* XXX: still return success? Someone might want to look it up.
* It's not like an import in .dynsym (i#1256).
*/
} else {
NOTIFY(1, "%s: unknown section # %d val 0x%x\n", __FUNCTION__, sym->SectionNumber,
sym->Value);
*offs = 0;
return DRSYM_ERROR_NOT_IMPLEMENTED;
}
return DRSYM_SUCCESS;
}
drsym_error_t
drsym_obj_symbol_offs(void *mod_in, uint idx, size_t *offs_start DR_PARAM_OUT,
size_t *offs_end DR_PARAM_OUT)
{
pecoff_data_t *mod = (pecoff_data_t *)mod_in;
drsym_error_t res;
if (offs_start == NULL || mod == NULL || idx >= mod->sorted_count)
return DRSYM_ERROR_INVALID_PARAMETER;
if (mod->exports_only) {
*offs_start = mod->sorted_exports[idx].addr - mod->map_base;
if (offs_end != NULL) {
if (idx + 1 < mod->sorted_count) {
*offs_end = mod->sorted_exports[idx + 1].addr - mod->map_base;
} else
*offs_end = *offs_start + 1;
}
return DRSYM_SUCCESS;
}
res = drsym_pecoff_symbol_offs(mod, mod->sorted_syms[idx], offs_start);
if (res != DRSYM_SUCCESS)
return res;
if (offs_end != NULL) {
/* XXX: we don't have the end offs so we use the next sym */
#if 0 /* recording code to identify function */
/* From pecoff_v8.doc 5.5.1, these define a function */
/* XXX: how know when to use IMAGE_SYMBOL_EX? */
if ((sym->Type & 0xff) == IMAGE_SYM_TYPE_FUNCTION &&
(sym->StorageClass == IMAGE_SYM_CLASS_EXTERNAL ||
sym->StorageClass == IMAGE_SYM_CLASS_STATIC)) {
/* It's a function.
* XXX: doesn't have the aux entries in the pecoff_v8 spec
* so we can't find the end bound of the function.
*/
}
#endif
if (idx + 1 < mod->sorted_count) {
res = drsym_pecoff_symbol_offs(mod, mod->sorted_syms[idx + 1], offs_end);
} else
*offs_end = *offs_start + 1;
}
return res;
}
drsym_error_t
drsym_obj_addrsearch_symtab(void *mod_in, size_t modoffs, uint *idx DR_PARAM_OUT)
{
/* This routine is used for both symbol table (mod->sorted_syms[])
* and exports (mod->sorted_exports) searching, so don't go and
* access mod->sorted_syms[] w/o checking for mod->exports_only!
*/
pecoff_data_t *mod = (pecoff_data_t *)mod_in;
uint min = 0;
uint max = mod->sorted_count - 1;
int min_lower = -1;
drsym_error_t res;
if (mod == NULL || idx == NULL)
return DRSYM_ERROR_INVALID_PARAMETER;
if (modoffs >= mod->map_size)
return DRSYM_ERROR_SYMBOL_NOT_FOUND;
/* XXX: if a function is split into non-contiguous pieces, will it
* have multiple entries?
*/
/* binary search */
NOTIFY(1, "%s: 0x%x\n", __FUNCTION__, modoffs);
while (max >= min) {
uint i = (min + max) / 2;
size_t symoffs;
/* we ignore unknown sec here and treat all such as 0 at front of array */
res = drsym_obj_symbol_offs(mod, i, &symoffs, NULL);
NOTIFY(2, "\tbinary search %d => 0x%x == %s\n", i, symoffs,
drsym_obj_symbol_name(mod_in, i));
if (res != DRSYM_SUCCESS && res != DRSYM_ERROR_SYMBOL_NOT_FOUND)
return res;
if (modoffs < symoffs) {
max = i - 1;
} else if (modoffs >= symoffs) {
if (max == min || modoffs == symoffs) {
/* found closest sym with offs <= target */
min_lower = i;
break;
} else {
min_lower = i;
min = i + 1;
}
}
}
NOTIFY(2, "\tbinary search => %d\n", min_lower);
if (min_lower > -1 && !mod->exports_only) {
/* found closest sym with offs <= target */
/* sometimes a section-name entry will have the same offs as a function.
* prefer the function.
* we sorted by type so we know function is later.
*/
if (mod->sorted_syms[min_lower]->Type == 0 &&
(uint)min_lower + 1 < mod->sorted_count &&
mod->sorted_syms[min_lower]->Value == mod->sorted_syms[min_lower + 1]->Value)
min_lower++;
*idx = min_lower;
return DRSYM_SUCCESS;
}
return DRSYM_ERROR_SYMBOL_NOT_FOUND;
}
/******************************************************************************
* Exports-only
*/
static int
compare_exports(const void *a_in, const void *b_in)
{
const export_info_t *a = (const export_info_t *)a_in;
const export_info_t *b = (const export_info_t *)b_in;
if (a->addr > b->addr)
return 1;
if (a->addr < b->addr)
return -1;
return 0;
}
static bool
drsym_pecoff_sort_exports(pecoff_data_t *mod)
{
uint i = 0;
dr_symbol_export_iterator_t *exp_iter;
/* We need to map as an image to read the exports dir (and we can't map
* as an image for symtable reading b/c that won't come along).
* XXX: unmap the non-image mapped by drsyms_unix to save space.
* XXX: for online use, use the actual already-loaded image to save space.
*/
/* 1st pass to get the count */
exp_iter = dr_symbol_export_iterator_start((module_handle_t)mod->map_base);
while (dr_symbol_export_iterator_hasnext(exp_iter)) {
dr_symbol_export_t *sym = dr_symbol_export_iterator_next(exp_iter);
if (sym->is_code)
i++;
}
dr_symbol_export_iterator_stop(exp_iter);
if (i == 0) {
/* No exports found, so better to try our luck with dbghelp */
return false;
}
mod->sorted_count = i;
mod->sorted_exports = (export_info_t *)dr_global_alloc(mod->sorted_count *
sizeof(*mod->sorted_exports));
/* 2nd pass */
i = 0;
dr_symbol_export_iterator_start((module_handle_t)mod->map_base);
while (dr_symbol_export_iterator_hasnext(exp_iter)) {
dr_symbol_export_t *sym = dr_symbol_export_iterator_next(exp_iter);
if (sym->is_code) {
mod->sorted_exports[i].name = sym->name;
mod->sorted_exports[i].addr = sym->addr;
i++;
}
}
dr_symbol_export_iterator_stop(exp_iter);
/* XXX: using ndll qsort just like drsym_pecoff_sort_symbols */
qsort(mod->sorted_exports, mod->sorted_count, sizeof(*mod->sorted_exports),
compare_exports);
if (verbose >= 3) {
NOTIFY(3, "%s:\n", __FUNCTION__);
for (i = 0; i < mod->sorted_count; i++) {
NOTIFY(3, " #%d: %-20s addr=" PFX "\n", i, mod->sorted_exports[i].name,
mod->sorted_exports[i].addr);
}
}
return true;
}
/******************************************************************************
* Linux-specific helpers
*/
/* Returns true if the two paths have the same inode. Returns false if there
* was an error or they are different.
*/
bool
drsym_obj_same_file(const char *path1, const char *path2)
{
/* XXX: ignoring symlinks and 8.3 */
return (strcmp(path1, path2) == 0);
}
const char *
drsym_obj_debug_path(void)
{
/* XXX: figure out where cygwin is really installed */
/* XXX: also search mingw debug path */
return "c:\\cygwin\\lib\\debug";
}
const char *
drsym_obj_build_id(void *mod_in)
{
/* NYI. Are build id-based dirs used on cygwin? */
return NULL;
}