blob: aeb11ed291f01186bd46b6670d9aa0c11fce1c69 [file] [log] [blame]
/* **********************************************************
* Copyright (c) 2010-2017 Google, Inc. All rights reserved.
* Copyright (c) 2008-2010 VMware, Inc. All rights reserved.
* **********************************************************/
/* Dr. Memory: the memory debugger
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation;
* version 2.1 of the License, and no later version.
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
/***************************************************************************
* instru.c: Dr. Memory top-level instrumentation control routines
*/
#include "dr_api.h"
#include "drreg.h"
#include "drutil.h"
#include "drmemory.h"
#include "slowpath.h"
#include "spill.h"
#include "fastpath.h"
#include "stack.h"
#include "annotations.h"
#include "replace.h"
#include "report.h"
#include "syscall.h"
#include "shadow.h"
#include "alloc.h"
#include "alloc_drmem.h"
#include "pattern.h"
#include "heap.h"
/* State restoration: need to record which bbs have eflags-save-at-top.
* We store the app pc of the last instr in the bb.
*/
#define BB_HASH_BITS 12
hashtable_t bb_table;
/* PR 493257: share shadow translation across multiple instrs. But, abandon
* sharing for memrefs that cross 64K boundaries and keep exiting to slowpath.
* This table tracks slowpath exits and whether to share.
*/
#define XL8_SHARING_HASH_BITS 10
hashtable_t xl8_sharing_table;
/* alloca handling in fastpath (i#91) */
#define IGNORE_UNADDR_HASH_BITS 6
hashtable_t ignore_unaddr_table;
#ifdef X86
/* Handle slowpath for OP_loop in repstr_to_loop properly (i#391).
* We map the address of an allocated OP_loop to the app_pc of the original
* app rep-stringop instr. We also map the reverse so we can delete it
* (we don't want to pay the cost of storing this in bb_saved_info_t for
* every single bb). We're helped there b/c repstr_to_loop always
* has single-instr bbs so the tag is the rep-stringop instr pc.
*/
# define STRINGOP_HASH_BITS 6
hashtable_t stringop_us2app_table;
static hashtable_t stringop_app2us_table;
void *stringop_lock; /* protects both tables */
/* Entry in stringop_app2us_table */
typedef struct _stringop_entry_t {
/* an OP_loop encoding, decoded by slow_path */
byte loop_instr[LOOP_INSTR_LENGTH];
/* This is used to handle non-precise flushing */
byte ignore_next_delete;
} stringop_entry_t;
#endif
#ifdef TOOL_DR_MEMORY
/* We wait until 1st bb to set thread data structs, as we want the mcxt
* and DR doesn't provide it at initial thread init (i#117).
*/
bool first_bb = true;
#endif
#ifdef TOOL_DR_MEMORY
static dr_emit_flags_t
instru_event_bb_app2app(void *drcontext, void *tag, instrlist_t *bb,
bool for_trace, bool translating, OUT void **user_data);
static dr_emit_flags_t
instru_event_bb_analysis(void *drcontext, void *tag, instrlist_t *bb,
bool for_trace, bool translating, void *user_data);
static dr_emit_flags_t
instru_event_bb_insert(void *drcontext, void *tag, instrlist_t *bb, instr_t *inst,
bool for_trace, bool translating, void *user_data);
static dr_emit_flags_t
instru_event_bb_instru2instru(void *drcontext, void *tag, instrlist_t *bb,
bool for_trace, bool translating, void *user_data);
#endif
/***************************************************************************
* FRAGMENT DELETION
*/
void
instrument_fragment_delete(void *drcontext/*may be NULL*/, void *tag)
{
bb_saved_info_t *save;
IF_X86(stringop_entry_t *stringop;)
uint bb_size = 0;
#ifdef TOOL_DR_MEMORY
if (!INSTRUMENT_MEMREFS())
return;
#endif
hashtable_lock(&bb_table);
save = (bb_saved_info_t *) hashtable_lookup(&bb_table, tag);
if (save != NULL) {
/* PR 495787: handle non-precise flushing where new bbs can be created
* before the old ones are fully deleted
*/
LOG(2, "event_fragment_delete "PFX" ignore_next_delete=%d\n",
tag, save->ignore_next_delete);
if (save->ignore_next_delete == 0) {
bb_size = save->bb_size;
hashtable_remove(&bb_table, tag);
} else /* hashtable lock is held so no race here */
save->ignore_next_delete--;
}
hashtable_unlock(&bb_table);
if (options.shadowing && bb_size > 0) {
/* i#260: remove xl8_sharing_table entries. We can't
* decode forward (not always safe) and query every app pc, so we store the
* bb size and assume bbs are contiguous (no elision) and there are no traces
* (already assuming that for i#114 and dr_fragment_exists_at()). We assume
* walking these hashtables is faster than switching to an rbtree, and it's
* not worth storing pointers in bb_saved_info_t.
*
* Without removing, new code that replaces old code at the same address can
* fail to be optimized b/c it will use the old code's history: so a perf
* failure, not a correctness failure.
*/
/* XXX i#551: -single_arg_slowpath adds a second xl8_sharing_table entry with
* cache pc for each app pc entry which we are not deleting yet. May need a
* table to map the two. Xref DRi#409: while there's no good solution from
* the DR side for app pc flushing, perhaps some event on re-using cache pcs
* could work but seems too specialized.
*/
/* i#768: We used to invalidate entries from ignore_unaddr_table here,
* but that ends up thrashing the code cache. Instead we remove stale
* entries in the new bb event if the alloca pattern no longer matches.
*/
app_pc start = dr_fragment_app_pc(tag);
/* It turns out that hashtable_remove_range() is really slow: xl8_sharing_table
* gets quite large (12 bits on chrome ui_tests single test) and walking
* it on every single fragment delete is quite slow.
* This is faster:
*/
int i;
for (i = 0; i < bb_size; i++) {
hashtable_remove(&xl8_sharing_table, (void *)(start + i));
}
}
#ifdef X86
dr_mutex_lock(stringop_lock);
/* We rely on repstr_to_loop arranging the repstr to be the only
* instr and thus the tag (i#391) (and we require -disable_traces)
*/
stringop = (stringop_entry_t *) hashtable_lookup(&stringop_app2us_table, tag);
if (stringop != NULL) {
if (stringop->ignore_next_delete == 0) {
IF_DEBUG(bool found;)
hashtable_remove(&stringop_app2us_table, tag);
IF_DEBUG(found =)
hashtable_remove(&stringop_us2app_table, (void *)stringop);
LOG(2, "removing tag "PFX" and stringop entry "PFX"\n",
tag, stringop);
ASSERT(found, "entry should be in both tables");
} else {
LOG(2, "stringop entry "PFX" for tag "PFX" nextdel=%d\n",
stringop, tag, stringop->ignore_next_delete);
stringop->ignore_next_delete--;
}
}
dr_mutex_unlock(stringop_lock);
#endif
}
static void
bb_table_free_entry(void *entry)
{
bb_saved_info_t *save = (bb_saved_info_t *) entry;
ASSERT(save->ignore_next_delete == 0, "premature deletion");
global_free(save, sizeof(*save), HEAPSTAT_PERBB);
}
#ifdef X86
static void
stringop_free_entry(void *entry)
{
stringop_entry_t *e = (stringop_entry_t *) entry;
ASSERT(e->loop_instr[0] == LOOP_INSTR_OPCODE, "invalid entry");
LOG(3, "freeing stringop entry "PFX" ignore_next_delete %d\n",
e, e->ignore_next_delete);
global_free(e, sizeof(*e), HEAPSTAT_PERBB);
}
#endif
/***************************************************************************
* TOP-LEVEL
*/
#ifdef TOOL_DR_MEMORY
# ifdef WINDOWS
static ptr_uint_t note_base;
enum {
NOTE_NULL = 0,
NOTE_SEH_EPILOG_RETADDR,
NOTE_CHKSTK_RETADDR,
NOTE_MAX_VALUE,
};
# endif
#endif
void
instrument_init(void)
{
drmgr_priority_t priority = {sizeof(priority), "drmemory.instru", NULL, NULL,
DRMGR_PRIORITY_INSTRU};
drutil_init();
annotate_init();
#ifdef TOOL_DR_MEMORY
/* XXX: at some point we should design a cleaner interaction between
* various drmemory/ components and drheapstat/.
* For now sticking w/ the original where drheapstat's bb events
* call into here.
*/
if (!drmgr_register_bb_instrumentation_ex_event
(instru_event_bb_app2app, instru_event_bb_analysis,
instru_event_bb_insert, instru_event_bb_instru2instru,
&priority)) {
ASSERT(false, "drmgr registration failed");
}
# ifdef WINDOWS
note_base = drmgr_reserve_note_range(NOTE_MAX_VALUE);
ASSERT(note_base != DRMGR_NOTE_NONE, "failed to get note value");
# endif /* WINDOWS */
#endif
/* we need bb event for leaks_only */
if (!INSTRUMENT_MEMREFS())
return;
instru_tls_init();
if (options.shadowing) {
gencode_init();
hashtable_init(&xl8_sharing_table, XL8_SHARING_HASH_BITS, HASH_INTPTR,
false/*!strdup*/);
hashtable_init(&ignore_unaddr_table, IGNORE_UNADDR_HASH_BITS, HASH_INTPTR,
false/*!strdup*/);
}
hashtable_init_ex(&bb_table, BB_HASH_BITS, HASH_INTPTR, false/*!strdup*/,
false/*!synch*/, bb_table_free_entry, NULL, NULL);
#ifdef X86
stringop_lock = dr_mutex_create();
hashtable_init_ex(&stringop_app2us_table, STRINGOP_HASH_BITS, HASH_INTPTR,
false/*!strdup*/, false/*!synch*/,
stringop_free_entry, NULL, NULL);
hashtable_init_ex(&stringop_us2app_table, STRINGOP_HASH_BITS, HASH_INTPTR,
false/*!strdup*/, false/*!synch*/, NULL, NULL, NULL);
#endif
#ifdef TOOL_DR_MEMORY
if (INSTRUMENT_MEMREFS())
replace_init();
#endif
}
void
instrument_exit(void)
{
annotate_exit();
drutil_exit();
if (!INSTRUMENT_MEMREFS())
return;
if (options.shadowing) {
gencode_exit();
}
if (options.shadowing) {
hashtable_delete_with_stats(&xl8_sharing_table, "xl8_sharing");
hashtable_delete_with_stats(&ignore_unaddr_table, "ignore_unaddr");
}
hashtable_delete_with_stats(&bb_table, "bb_table");
#ifdef X86
dr_mutex_destroy(stringop_lock);
hashtable_delete(&stringop_app2us_table);
hashtable_delete(&stringop_us2app_table);
#endif
#ifdef TOOL_DR_MEMORY
if (INSTRUMENT_MEMREFS())
replace_exit();
#endif
instru_tls_exit();
}
void
instrument_thread_init(void *drcontext)
{
if (!INSTRUMENT_MEMREFS())
return;
instru_tls_thread_init(drcontext);
}
void
instrument_thread_exit(void *drcontext)
{
if (!INSTRUMENT_MEMREFS())
return;
instru_tls_thread_exit(drcontext);
}
size_t
instrument_persist_ro_size(void *drcontext, void *perscxt)
{
size_t sz = 0;
if (!INSTRUMENT_MEMREFS())
return 0;
LOG(2, "persisting bb table "PFX"-"PFX"\n", dr_persist_start(perscxt),
dr_persist_start(perscxt) + dr_persist_size(perscxt));
sz += hashtable_persist_size(drcontext, &bb_table, sizeof(bb_saved_info_t),
perscxt, DR_HASHPERS_REBASE_KEY |
DR_HASHPERS_ONLY_IN_RANGE |
DR_HASHPERS_ONLY_PERSISTED);
if (options.shadowing) {
LOG(2, "persisting xl8 table\n");
sz += hashtable_persist_size(drcontext, &xl8_sharing_table, sizeof(uint),
perscxt, DR_HASHPERS_REBASE_KEY |
DR_HASHPERS_ONLY_IN_RANGE);
LOG(2, "persisting unaddr table\n");
sz += hashtable_persist_size(drcontext, &ignore_unaddr_table, sizeof(uint),
perscxt, DR_HASHPERS_REBASE_KEY |
DR_HASHPERS_ONLY_IN_RANGE);
}
#ifdef X86
LOG(2, "persisting string table\n");
sz += hashtable_persist_size(drcontext, &stringop_app2us_table,
sizeof(stringop_entry_t), perscxt,
DR_HASHPERS_REBASE_KEY |
DR_HASHPERS_ONLY_IN_RANGE | DR_HASHPERS_ONLY_PERSISTED);
/* the stringop_us2app_table is composed of heap-allocated entries in
* stringop_app2us_table, which will change on resurrection: so rather than
* persist we rebuild at resurrect time
*/
#endif
return sz;
}
bool
instrument_persist_ro(void *drcontext, void *perscxt, file_t fd)
{
bool ok = true;
if (!INSTRUMENT_MEMREFS())
return ok;
LOG(2, "persisting bb table\n");
ok = ok && hashtable_persist(drcontext, &bb_table, sizeof(bb_saved_info_t), fd,
perscxt, DR_HASHPERS_PAYLOAD_IS_POINTER |
DR_HASHPERS_REBASE_KEY | DR_HASHPERS_ONLY_IN_RANGE |
DR_HASHPERS_ONLY_PERSISTED);
if (options.shadowing) {
LOG(2, "persisting xl8 table\n");
/* these two tables don't just contain tags so we can't do ONLY_PERSISTED */
ok = ok && hashtable_persist(drcontext, &xl8_sharing_table, sizeof(uint), fd,
perscxt, DR_HASHPERS_REBASE_KEY |
DR_HASHPERS_ONLY_IN_RANGE);
LOG(2, "persisting unaddr table\n");
ok = ok && hashtable_persist(drcontext, &ignore_unaddr_table, sizeof(uint), fd,
perscxt, DR_HASHPERS_REBASE_KEY |
DR_HASHPERS_ONLY_IN_RANGE);
}
#ifdef X86
LOG(2, "persisting string table\n");
ok = ok && hashtable_persist(drcontext, &stringop_app2us_table,
sizeof(stringop_entry_t), fd, perscxt,
DR_HASHPERS_PAYLOAD_IS_POINTER | DR_HASHPERS_REBASE_KEY |
DR_HASHPERS_ONLY_IN_RANGE | DR_HASHPERS_ONLY_PERSISTED);
#endif
return ok;
}
/* caller should hold bb_table lock */
void
bb_save_add_entry(app_pc key, bb_saved_info_t *save)
{
bb_saved_info_t *old = (bb_saved_info_t *)
hashtable_add_replace(&bb_table, (void *)key, (void *)save);
ASSERT(hashtable_lock_self_owns(&bb_table), "missing lock");
if (old != NULL) {
ASSERT(old->ignore_next_delete < UCHAR_MAX, "ignore_next_delete overflow");
save->ignore_next_delete = old->ignore_next_delete + 1;
global_free(old, sizeof(*old), HEAPSTAT_PERBB);
LOG(2, "bb "PFX" duplicated: assuming non-precise flushing\n", key);
}
}
/* caller should hold bb_table lock */
static bool
bb_save_resurrect_entry(void *key, void *payload, ptr_int_t shift)
{
/* last_instr could be changed to last_instr_offs but then we'd need to call
* dr_fragment_app_pc(tag) in a few places which doesn't seem worth it
*/
bb_saved_info_t *save = (bb_saved_info_t *) payload;
ASSERT(hashtable_lock_self_owns(&bb_table), "missing lock");
save->first_restore_pc =
save->first_restore_pc == NULL ?
NULL : (app_pc) ((ptr_int_t)save->first_restore_pc + shift);
save->last_instr = (app_pc) ((ptr_int_t)save->last_instr + shift);
bb_save_add_entry((app_pc) key, save);
return true;
}
static bool
xl8_sharing_resurrect_entry(void *key, void *payload, ptr_int_t shift)
{
/* we can have dups b/c of non-precise flushing on re-loaded modules,
* so we use our own callback here to ignore them (perf, not correctness,
* on dup entries)
*/
hashtable_add(&xl8_sharing_table, key, payload);
return true;
}
#ifdef X86
/* caller should hold hashtable lock */
static void
stringop_app2us_add_entry(app_pc xl8, stringop_entry_t *entry)
{
stringop_entry_t *old = (stringop_entry_t *)
hashtable_add_replace(&stringop_app2us_table, (void *)xl8, (void *)entry);
ASSERT(dr_mutex_self_owns(stringop_lock), "caller must hold lock");
if (old != NULL) {
IF_DEBUG(bool found;)
LOG(2, "stringop xl8 "PFX" duplicated at "PFX
": assuming non-precise flushing\n", xl8, old);
ASSERT(old->ignore_next_delete < UCHAR_MAX, "ignore_next_delete overflow");
entry->ignore_next_delete = old->ignore_next_delete + 1;
global_free(old, sizeof(*old), HEAPSTAT_PERBB);
IF_DEBUG(found =)
hashtable_remove(&stringop_us2app_table, (void *)old);
ASSERT(found, "entry should be in both tables");
}
}
/* caller should hold hashtable lock */
static bool
stringop_app2us_resurrect_entry(void *key, void *payload, ptr_int_t shift)
{
stringop_app2us_add_entry((app_pc) key, (stringop_entry_t *) payload);
return true;
}
static bool
populate_us2app_table(void)
{
uint i;
for (i = 0; i < HASHTABLE_SIZE(stringop_app2us_table.table_bits); i++) {
hash_entry_t *he;
for (he = stringop_app2us_table.table[i]; he != NULL; he = he->next) {
hashtable_add(&stringop_us2app_table, (void *)he->payload, he->key);
/* we're going through whole table so we will re-add prior entries */
}
}
return true;
}
#endif
bool
instrument_resurrect_ro(void *drcontext, void *perscxt, byte **map INOUT)
{
bool ok = true;
if (!INSTRUMENT_MEMREFS())
return ok;
LOG(2, "resurrecting bb table\n");
hashtable_lock(&bb_table);
ok = ok && hashtable_resurrect(drcontext, map, &bb_table, sizeof(bb_saved_info_t),
perscxt, DR_HASHPERS_PAYLOAD_IS_POINTER |
DR_HASHPERS_REBASE_KEY | DR_HASHPERS_CLONE_PAYLOAD,
bb_save_resurrect_entry);
hashtable_unlock(&bb_table);
if (options.shadowing) {
LOG(2, "resurrecting xl8 table\n");
ok = ok && hashtable_resurrect(drcontext, map, &xl8_sharing_table, sizeof(uint),
perscxt, DR_HASHPERS_REBASE_KEY,
xl8_sharing_resurrect_entry);
LOG(2, "resurrecting unaddr table\n");
ok = ok && hashtable_resurrect(drcontext, map, &ignore_unaddr_table, sizeof(uint),
perscxt, DR_HASHPERS_REBASE_KEY, NULL);
}
#ifdef X86
LOG(2, "resurrecting string table\n");
dr_mutex_lock(stringop_lock);
ok = ok && hashtable_resurrect(drcontext, map, &stringop_app2us_table,
sizeof(stringop_entry_t), perscxt,
DR_HASHPERS_PAYLOAD_IS_POINTER |
DR_HASHPERS_REBASE_KEY |
DR_HASHPERS_CLONE_PAYLOAD,
stringop_app2us_resurrect_entry);
/* the stringop_us2app_table is composed of heap-allocated entries in
* stringop_app2us_table, which will change on resurrection: so rather than
* persist we rebuild at resurrect time
*/
ok = ok && populate_us2app_table();
dr_mutex_unlock(stringop_lock);
#endif
/* FIXME: if a later one fails, we'll abort the pcache load but we'll have entries
* added to the earlier tables. we should invalidate them.
*/
ASSERT(ok, "aborted pcache load leaves tables inconsistent");
return ok;
}
void
instru_insert_mov_pc(void *drcontext, instrlist_t *bb, instr_t *inst,
opnd_t dst, opnd_t pc_opnd)
{
if (opnd_is_instr(pc_opnd)) {
/* This does insert meta instrs */
instrlist_insert_mov_instr_addr(drcontext, opnd_get_instr(pc_opnd),
NULL /* in code cache */,
dst, bb, inst, NULL, NULL);
} else {
ASSERT(opnd_is_immed_int(pc_opnd), "invalid opnd");
/* This does insert meta instrs */
instrlist_insert_mov_immed_ptrsz(drcontext, opnd_get_immed_int(pc_opnd),
dst, bb, inst, NULL, NULL);
}
}
#ifdef TOOL_DR_MEMORY
# ifdef WINDOWS
/* i#1371: _SEH_epilog4 returns at different stack spot instead of actual retaddr
* USER32!_SEH_epilog4:
* 74af616a 8b4df0 mov ecx, [ebp-0x10]
* 74af616d 64890d00000000 mov fs:[00000000], ecx
* 74af6174 59 pop ecx
* 74af6175 5f pop edi
* 74af6176 5f pop edi
* 74af6177 5e pop esi
* 74af6178 5b pop ebx
* 74af6179 8be5 mov esp, ebp
* 74af617b 5d pop ebp
* 74af617c 51 push ecx
* 74af617d c3 ret
*/
static void
bb_check_SEH_epilog(void *drcontext, app_pc tag, instrlist_t *ilist)
{
instr_t *instr, *next_pop;
opnd_t opnd;
reg_id_t ret_reg; /* register that holds return addr */
/* ret */
instr = instrlist_last_app_instr(ilist);
if (instr == NULL || !instr_is_return(instr))
return;
/* push ecx */
instr = instr_get_prev_app_instr(instr);
if (instr == NULL || instr_get_opcode(instr) != OP_push)
return;
/* opnd must be reg */
opnd = instr_get_src(instr, 0);
if (!opnd_is_reg(opnd))
return;
ret_reg = opnd_get_reg(opnd);
/* mov ecx, [ebp-0x10] */
instr = instrlist_first_app_instr(ilist);
if (instr == NULL || instr_get_opcode(instr) != OP_mov_ld)
return;
/* mov [fs:00000000], ecx */
instr = instr_get_next_app_instr(instr);
if (instr == NULL || instr_get_opcode(instr) != OP_mov_st)
return;
/* opnd must be [fs:00000000] */
opnd = instr_get_dst(instr, 0);
if (!opnd_is_far_base_disp(opnd) ||
!opnd_is_abs_addr(opnd) /* rule out base or index */||
opnd_get_disp(opnd) != 0 /* disp must be 0 */)
return;
/* pop ecx */
instr = instr_get_next_app_instr(instr);
if (instr == NULL || instr_get_opcode(instr) != OP_pop)
return;
opnd = instr_get_dst(instr, 0);
/* opnd must be the same reg used by the push above */
if (!opnd_is_reg(opnd) || opnd_get_reg(opnd) != ret_reg)
return;
/* reg opnd must be pointer size */
ASSERT(opnd_get_size(opnd) == OPSZ_PTR, "wrong opnd size");
/* pop edi */
instr = instr_get_next_app_instr(instr);
if (instr == NULL || instr_get_opcode(instr) != OP_pop)
return;
next_pop = instr;
# ifdef DEBUG
/* pop edi */
instr = instr_get_next_app_instr(instr);
ASSERT(instr != NULL && instr_get_opcode(instr) == OP_pop,
"need more check to identify SEH_epilog");
# endif
instr = INSTR_CREATE_label(drcontext);
instr_set_note(instr, (void *)(note_base + NOTE_SEH_EPILOG_RETADDR));
PRE(ilist, next_pop, instr);
LOG(1, "found SEH_epilog at "PFX"\n", dr_fragment_app_pc(tag));
}
# ifndef X64
/* handle!_chkstk:
* ...
* 00a6cc4b 94 xchg eax,esp
* 00a6cc4c 8b00 mov eax,[eax]
* 00a6cc4e 890424 mov [esp],eax
* 00a6cc51 c3 ret
* or
* ntdll!_alloca_probe
* ...
* 7d610434 8500 test [eax],eax
* 7d610436 94 xchg eax,esp
* 7d610437 8b00 mov eax,[eax]
* 7d610439 50 push eax
* 7d61043a c3 ret
* or
* hello!_alloca_probe+0x20 [intel\chkstk.asm @ 85]:
* 85 0040db70 2bc8 sub ecx,eax
* 86 0040db72 8bc4 mov eax,esp
* 88 0040db74 8501 test [ecx],eax
* 90 0040db76 8be1 mov esp,ecx
* 92 0040db78 8b08 mov ecx,[eax]
* 93 0040db7a 8b4004 mov eax,[eax+0x4]
* 95 0040db7d 50 push eax
* 97 0040db7e c3 ret
*/
static void
bb_handle_chkstk(void *drcontext, app_pc tag, instrlist_t *ilist)
{
instr_t *instr, *load = NULL;
int opc;
opnd_t opnd;
/* ret */
instr = instrlist_last_app_instr(ilist);
if (instr == NULL || !instr_is_return(instr))
return;
/* mov [esp],eax or push eax */
instr = instr_get_prev_app_instr(instr);
if (instr == NULL)
return;
opc = instr_get_opcode(instr);
if (opc != OP_push && opc != OP_mov_st)
return;
/* dst: [esp] */
if (opc == OP_mov_st &&
!opnd_same(OPND_CREATE_MEMPTR(DR_REG_XSP, 0), instr_get_dst(instr, 0)))
return;
/* src: eax */
opnd = instr_get_src(instr, 0);
if (!opnd_is_reg(opnd) || opnd_get_reg(opnd) != DR_REG_XAX)
return;
/* mov eax,[eax] or mov eax,[eax+0x4] */
instr = instr_get_prev_app_instr(instr);
if (instr == NULL || instr_get_opcode(instr) != OP_mov_ld)
return;
/* dst: eax */
if (opnd_get_reg(instr_get_dst(instr, 0)) != DR_REG_XAX)
return;
/* src: [eax]/[eax+4] */
opnd = instr_get_src(instr, 0);
if (!opnd_is_near_base_disp(opnd) ||
opnd_get_base(opnd) != DR_REG_XAX ||
opnd_get_index(opnd) != DR_REG_NULL)
return;
if (opnd_get_disp(opnd) != 0 && opnd_get_disp(opnd) != 4)
WARN("WARNING: disp in [eax, disp] is not 0 or 4\n");
load = instr;
# ifdef DEBUG
instr = instr_get_prev_app_instr(instr); /* go to prev before we kill load */
# endif
/* We might start a bb right here due to relocation, so we pattern match
* up till here.
*
* To zero the return address, we need the original stack pointer value,
* which will be clobbered by the app instruction "mov eax,[eax]".
* We have no dead registers (not even edx, as some calling conventions
* have it live: i#1405). We could use some clever rewrites of the
* original code to use a push or pop through memory (see i#1405c#3) to
* perform the memory-to-memory copy the app is doing here, but
* those result in us reporting unaddrs due to accessing beyond TOS.
* Instead, we note that we don't need to zero: we just need a non-retaddr
* in the slot. Thus, we replace the load with xchg, which will place
* a stack address in the slot, which does not look like a retaddr.
* The xchg locks the bus, but that's compared to 2 extra stores (spill
* reg plus zero slot) and 1 extra load (restore reg). Plus, it's much
* simpler.
*
* A: mov eax,dword ptr [eax]
* B: mov dword ptr [esp],eax (OR push eax)
* C: ret
* =>
* A: xchg eax,dword ptr [eax]
* B: mov dword ptr [esp],eax (OR push eax)
* C: ret
*
* If we decide to go to a register-spilling solution, we should move this
* to the insert phase (and integrate properly with register stealing).
*/
/* This is pattern-matched in instr_shared_slowpath_decode_pc().
* XXX: this may confuse a client/user when a fault happens there,
* as its translation is the load instruction "mov eax, [eax+X]".
*/
PREXL8(ilist, load,
INSTR_XL8(INSTR_CREATE_xchg(drcontext, opnd, opnd_create_reg(DR_REG_XAX)),
instr_get_app_pc(load)));
instrlist_remove(ilist, load);
instr_destroy(drcontext, load);
LOG(2, "found _chkstk at "PFX"\n", dr_fragment_app_pc(tag));
# ifdef DEBUG
/* debug-only extra pattern verification */
/* skip newly inserted "lea edx, [eax]" */
ASSERT(instr != NULL, "instrumented code is gone");
if (instr == NULL)
return;
if (instr_get_opcode(instr) == OP_xchg) {
/* xchg eax,esp */
if (!(instr_writes_to_exact_reg(instr, DR_REG_XSP, DR_QUERY_DEFAULT) &&
instr_writes_to_exact_reg(instr, DR_REG_XAX, DR_QUERY_DEFAULT))) {
WARN("Wrong xchg instr\n");
}
return;
} else {
/* find any instr writing to stack pointer before reading from it */
for (instr = instr_get_prev_app_instr(instr);
instr != NULL;
instr = instr_get_prev_app_instr(instr)) {
ASSERT(!instr_reads_from_reg(instr, DR_REG_XSP, DR_QUERY_DEFAULT),
"see wrong pattern");
if (instr_writes_to_exact_reg(instr, DR_REG_XSP, DR_QUERY_DEFAULT))
return;
}
}
# endif /* DEBUG */
}
# endif /* !X64 */
# endif /* WINDOWS */
static void
insert_zero_retaddr(void *drcontext, instrlist_t *bb, instr_t *inst, bb_info_t *bi)
{
if (instr_is_return(inst)) {
dr_clobber_retaddr_after_read(drcontext, bb, inst, 0);
LOG(2, "zero retaddr for normal ret\n");
# ifdef WINDOWS
} else if (instr_get_opcode(inst) == OP_pop) {
/* SEH_epilog */
/* Assuming it is forward instrumentation, i.e., there is no instruction
* inserted between the pop and the label yet.
*/
instr_t *label = instr_get_next(inst);
if (label != NULL && instr_is_label(label) &&
instr_get_note(label) == (void *)(note_base+NOTE_SEH_EPILOG_RETADDR)) {
PRE(bb, label,
INSTR_CREATE_mov_st(drcontext,
OPND_CREATE_MEMPTR(REG_XSP, -XSP_SZ),
OPND_CREATE_INT32(0)));
LOG(2, "zero retaddr in SEH_epilog\n");
}
# endif /* WINDOWS */
# ifdef ARM
} else if (instr_get_opcode(inst) == OP_ldr &&
opnd_get_base(instr_get_src(inst, 0)) == DR_REG_SP &&
opnd_get_reg(instr_get_dst(inst, 0)) == DR_REG_LR) {
/* We handle this idiom here which thwarts the other retaddr clobbering code
* as the pop is prior to the indirect branch (i#1856):
*
* f85d eb04 ldr (%sp)[4byte] $0x00000004 %sp -> %lr %sp
* b003 add %sp $0x0000000c -> %sp
* 4770 bx %lr
*/
bool writeback = instr_num_srcs(inst) > 1;
if (writeback && opnd_is_immed_int(instr_get_src(inst, 1))) {
opnd_t memop = instr_get_src(inst, 0);
opnd_set_disp(&memop, -opnd_get_immed_int(instr_get_src(inst, 1)));
/* See above: we just write our stolen reg value */
/* XXX: is this against drmgr rules? */
POST(bb, inst, XINST_CREATE_store
(drcontext, memop, opnd_create_reg(dr_get_stolen_reg())));
}
#endif
}
}
/* i#1412: raise an error on executing invalid memory. We check every instr to
* handle page boundaries, at the risk of raising errors on instrs that are
* never reached due to prior faults and other corner cases.
*
* The pc param should equal the result of instr_get_app_pc(inst).
*/
static void
check_program_counter(void *drcontext, app_pc pc, instr_t *inst)
{
umbra_shadow_memory_info_t info;
if (!options.check_pc || !options.shadowing)
return;
umbra_shadow_memory_info_init(&info);
if (shadow_get_byte(&info, pc) == SHADOW_UNADDRESSABLE &&
!is_in_realloc_gencode(pc) &&
!in_replace_routine(pc)
/* On Unix replace_* routines call into PIC routines elsewhere in the library.
* Plus, we execute code from replace_native_ret as the app.
*/
IF_UNIX(&& !is_in_client_or_DR_lib(pc))) {
size_t sz = instr_length(drcontext, inst);
app_loc_t loc;
dr_mcontext_t mc;
pc_to_loc(&loc, pc);
mc.size = sizeof(mc);
mc.flags = DR_MC_INTEGER | DR_MC_CONTROL;
dr_get_mcontext(drcontext, &mc);
report_unaddressable_access(&loc, pc, sz, DR_MEMPROT_EXEC, pc, pc + sz, &mc);
/* XXX: unlike data accesses, legitimate execution from memory we consider
* unaddressable would likely involve many instrs in a row and could result
* in many error reports. Avoiding that is complex, however, as marking
* unaddr memory (likely via shadow_set_non_matching_range() to undef if
* !def) as valid for the whole page or the whole region has downsides, and
* we certainly don't want to do that for redzones on the heap or beyond TOS.
* We do nothing today: users can always turn off -check_pc, and it seems very
* unlikely for a legitimate case to occur in an app.
*/
}
}
#ifdef X86
/* PR 580123: add fastpath for rep string instrs by converting to normal loop */
static void
convert_repstr_to_loop(void *drcontext, instrlist_t *bb, bb_info_t *bi,
bool translating)
{
bool expanded;
instr_t *string;
ASSERT(options.repstr_to_loop, "shouldn't be called");
/* The bulk of the code here is now in the drutil library */
if (!drutil_expand_rep_string_ex(drcontext, bb, &expanded, &string))
ASSERT(false, "drutil failed");
if (expanded) {
stringop_entry_t *entry;
app_pc xl8 = instr_get_app_pc(string);
IF_DEBUG(bool ok;)
LOG(3, "converting rep string into regular loop\n");
/* we handle the jecxz skipping lazy spill in the insert routine */
/* We could point instr_can_use_shared_slowpath() at the final byte of the
* instr (i.e., past the rep prefix) and have shared_slowpath fix up the pc
* if it reports an error, and perhaps assume the string instr is immediately
* after the return from slowpath (should be true since shouldn't pick edi or
* esi as scratch regs, and none of the string instrs read aflags) so it can
* look for data16 prefix. But it's simpler to handle data16 prefix by
* pointing at the start of the instr and having shared_slowpath assume there
* are no repstrs doing loops so no loop emulation is needed. This means the
* slowpath will consider xcx an operand here in addition to at the loop
* instr below but that shouldn't be a problem: if xcx is uninit it will get
* reported once and w/ the right pc. Xref i#353.
*/
bi->fake_xl8_override_instr = string;
bi->fake_xl8_override_pc = xl8;
/* We need to tell instr_can_use_shared_slowpath() what app pc to use
* while pointing it at an OP_loop instr.
* For -fastpath, we should go to slowpath only if ecx is uninit, but
* even then we can't afford to treat as a string op: will read wrong
* mem addr b/c the just-executed string op adjusted edi/esi (i#391).
* Solution is to allocate some memory and create a fake OP_loop there.
* We use a hashtable to map from that to the app_pc.
* We free by relying on the stringop being the first instr and thus
* the tag (=> no trace support).
*/
if (translating) {
dr_mutex_lock(stringop_lock);
entry = (stringop_entry_t *) hashtable_lookup(&stringop_app2us_table, xl8);
ASSERT(entry != NULL, "stringop entry should exit on translation");
dr_mutex_unlock(stringop_lock);
} else {
entry = (stringop_entry_t *) global_alloc(sizeof(*entry), HEAPSTAT_PERBB);
entry->loop_instr[0] = LOOP_INSTR_OPCODE;
entry->loop_instr[1] = 0;
entry->ignore_next_delete = 0;
dr_mutex_lock(stringop_lock);
stringop_app2us_add_entry(xl8, entry);
IF_DEBUG(ok = )
hashtable_add(&stringop_us2app_table, (void *)entry, xl8);
LOG(2, "adding stringop entry "PFX" for xl8 "PFX"\n",
entry, xl8);
/* only freed for heap reuse on hashtable removal */
ASSERT(ok, "not possible to have existing from-heap entry");
dr_mutex_unlock(stringop_lock);
}
/* we have the jecxz, mov $1, 2 jmps, and this loop all treated as OP_loop by
* slowpath. not a problem: ok to treat all as reading xcx.
*/
bi->fake_xl8 = (app_pc) entry;
bi->is_repstr_to_loop = true;
}
}
#endif
/* Conversions to app code itself that should happen before instrumentation */
static dr_emit_flags_t
instru_event_bb_app2app(void *drcontext, void *tag, instrlist_t *bb,
bool for_trace, bool translating, OUT void **user_data)
{
bb_info_t *bi;
if (go_native)
return DR_EMIT_GO_NATIVE;
#ifdef STATISTICS
if (!translating && !for_trace)
STATS_INC(num_bbs);
#endif
#ifdef TOOL_DR_MEMORY
/* No way to get app xsp at init or thread init (i#117) so we do it here */
if (first_bb) {
if (options.native_until_thread == 0)
set_initial_layout();
first_bb = false;
}
#endif
/* we pass bi among all 4 phases */
bi = thread_alloc(drcontext, sizeof(*bi), HEAPSTAT_PERBB);
memset(bi, 0, sizeof(*bi));
*user_data = (void *) bi;
if (options.check_uninitialized &&
options.check_uninit_blacklist[0] != '\0') {
/* We assume no elision across modules here, so we can just pass the tag */
bi->mark_defined = module_is_on_check_uninit_blacklist(dr_fragment_app_pc(tag));
DOLOG(3, {
if (bi->mark_defined)
LOG(3, "module is on uninit blacklist: always defined\n");
});
}
#ifdef DEBUG
/* To diagnose fastpath vs slowpath issues on a whole-bb level,
* set bi->force_slowpath here (xref i#1458).
*/
#endif
LOG(SYSCALL_VERBOSE, "in event_basic_block(tag="PFX")%s%s\n", tag,
for_trace ? " for trace" : "", translating ? " translating" : "");
DOLOG(3, instrlist_disassemble(drcontext, tag, bb, LOGFILE_GET(drcontext)););
#ifdef X86
if (options.repstr_to_loop && INSTRUMENT_MEMREFS())
convert_repstr_to_loop(drcontext, bb, bi, translating);
#endif
#if defined(WINDOWS) && !defined(X64)
/* i#1374: we need insert non-meta instr for handling zero_retaddr in _chkstk */
if (options.zero_retaddr)
bb_handle_chkstk(drcontext, tag, bb);
#endif /* WINDOWS && !X64 */
return DR_EMIT_DEFAULT;
}
static dr_emit_flags_t
instru_event_bb_analysis(void *drcontext, void *tag, instrlist_t *bb,
bool for_trace, bool translating, void *user_data)
{
bb_info_t *bi = (bb_info_t *) user_data;
if (go_native)
return DR_EMIT_GO_NATIVE;
LOG(4, "ilist before analysis:\n");
DOLOG(4, instrlist_disassemble(drcontext, tag, bb, LOGFILE_GET(drcontext)););
#ifdef USE_DRSYMS
/* symbol of each bb is very useful for debugging */
DOLOG(3, {
char buf[128];
size_t sofar = 0;
ssize_t len;
if (!translating) {
BUFPRINT(buf, BUFFER_SIZE_ELEMENTS(buf), sofar, len,
"new basic block @"PFX" ==", tag);
print_symbol(tag, buf, BUFFER_SIZE_ELEMENTS(buf), &sofar,
true, PRINT_SYMBOL_OFFSETS);
LOG(1, "%s\n", buf);
}
});
#endif
#ifdef TOOL_DR_MEMORY
DOLOG(4, {
if (options.shadowing) {
LOG(4, "shadow register values:\n");
print_shadow_registers();
}
});
#endif
#ifdef TOOL_DR_MEMORY
if (INSTRUMENT_MEMREFS())
fastpath_top_of_bb(drcontext, tag, bb, bi);
#endif
/* Rather than having DR store translations, it takes less space for us to
* use the bb table we already have
*/
if (INSTRUMENT_MEMREFS()) {
if (translating) {
bb_saved_info_t *save;
hashtable_lock(&bb_table);
save = (bb_saved_info_t *) hashtable_lookup(&bb_table, tag);
ASSERT(save != NULL, "missing bb info");
if (save->check_ignore_unaddr)
bi->check_ignore_unaddr = true;
/* setting this pattern field here is sort of abstraction violation,
* but more efficient.
*/
bi->pattern_4byte_check_only = save->pattern_4byte_check_only;
IF_DEBUG(bi->pattern_4byte_check_field_set = true);
bi->share_xl8_max_diff = save->share_xl8_max_diff;
hashtable_unlock(&bb_table);
} else {
/* We want to ignore unaddr refs by heap routines (when touching headers,
* etc.). We want to stay on the fastpath so we put checks there.
* We decide up front since in_heap_routine changes dynamically
* and if we recreate partway into the first bb we'll get it wrong:
* though now that we're checking the first bb from alloc_instrument
* it doesn't matter.
*/
bi->check_ignore_unaddr = (options.check_ignore_unaddr &&
alloc_in_heap_routine(drcontext));
DOLOG(2, {
if (bi->check_ignore_unaddr)
LOG(2, "inside heap routine: adding nop-if-mem-unaddr checks\n");
});
/* i#826: share_xl8_max_diff changes over time, so save it. */
bi->share_xl8_max_diff = options.share_xl8_max_diff;
#ifdef TOOL_DR_MEMORY
if (options.check_memset_unaddr &&
in_replace_memset(dr_fragment_app_pc(tag))) {
/* since memset is later called by heap routines, add in-heap checks
* now (i#234). we add them to other mem and string routines as well
* rather than try
*/
bi->check_ignore_unaddr = true;
LOG(2, "inside memset routine @"PFX": adding nop-if-mem-unaddr checks\n",
tag);
}
#endif
}
}
bi->first_instr = true;
#ifdef WINDOWS
if (options.zero_retaddr)
bb_check_SEH_epilog(drcontext, tag, bb);
#endif
return DR_EMIT_DEFAULT;
}
static dr_emit_flags_t
instru_event_bb_insert(void *drcontext, void *tag, instrlist_t *bb, instr_t *inst,
bool for_trace, bool translating, void *user_data)
{
bb_info_t *bi = (bb_info_t *) user_data;
uint i;
app_pc pc = instr_get_app_pc(inst);
uint opc;
bool has_shadowed_reg, has_mem, has_noignorable_mem;
bool used_fastpath = false;
fastpath_info_t mi;
/* i#2402: Temporarily disable auto predication globally due to poor
* interaction with internal control flow we emit.
*/
drmgr_disable_auto_predication(drcontext, bb);
if (go_native)
return DR_EMIT_GO_NATIVE;
if (instr_is_meta(inst))
goto instru_event_bb_insert_done;
if (!translating && !for_trace && options.check_pc)
check_program_counter(drcontext, pc, inst);
memset(&mi, 0, sizeof(mi));
/* We can't change bi->check_ignore_unaddr in the middle b/c of recreation
* so only set if entering/exiting on first
*/
if (bi->first_instr && INSTRUMENT_MEMREFS() && options.check_ignore_unaddr) {
if (alloc_entering_alloc_routine(pc)) {
bi->check_ignore_unaddr = true;
LOG(2, "entering heap routine: adding nop-if-mem-unaddr checks\n");
} else if (alloc_exiting_alloc_routine(pc)) {
/* we wait until post-call so pt->in_heap_routine >0 in post-call
* bb event, so avoid adding checks there
*/
bi->check_ignore_unaddr = false;
LOG(2, "exiting heap routine: NOT adding nop-if-mem-unaddr checks\n");
}
}
if (bi->first_instr && bi->is_repstr_to_loop) {
/* if xcx is 0 we'll skip ahead and will restore the whole-bb regs
* at the bottom of the bb so make sure we save first.
* this is a case of internal control flow messing up code that
* was taking advantage of the simplicity of linear block code!
*/
if (whole_bb_spills_enabled() &&
!(options.pattern != 0 && options.pattern_opt_repstr)) {
if (options.pattern != 0) { /* pattern uses drreg */
IF_DEBUG(drreg_status_t res =)
drreg_reserve_aflags(drcontext, bb, inst);
ASSERT(res == DRREG_SUCCESS, "reserve of aflags should work");
IF_DEBUG(res =)
drreg_unreserve_aflags(drcontext, bb, inst);
ASSERT(res == DRREG_SUCCESS, "reserve of aflags should work");
} else {
mark_scratch_reg_used(drcontext, bb, bi, &bi->reg1);
mark_scratch_reg_used(drcontext, bb, bi, &bi->reg2);
mark_eflags_used(drcontext, bb, bi);
/* eflag saving may have clobbered xcx, which we need for jecxz, but
* jecxz is an app instr now so we should naturally restore it
*/
}
}
}
if (INSTRUMENT_MEMREFS()) {
/* We want to spill AFTER any clean call in case it changes mcontext */
/* XXX: examine this: how make it more in spirit of drmgr? */
bi->spill_after = instr_get_prev(inst);
/* update liveness of whole-bb spilled regs */
fastpath_pre_instrument(drcontext, bb, inst, bi);
}
opc = instr_get_opcode(inst);
if (instr_is_syscall(inst)) {
/* new syscall events mean we no longer have to add a clean call
*/
/* we treat interrupts and syscalls, including the call*
* for a wow64 syscall, as though they do not write to the
* stack or esp (for call*, since we never see the
* corresponding ret instruction), including for sysenter
* now that we have DRi#537.
*/
goto instru_event_bb_insert_done;
}
#ifdef WINDOWS
ASSERT(!instr_is_wow64_syscall(inst), "syscall identification error");
#endif
if (!INSTRUMENT_MEMREFS() && (!options.leaks_only || !options.count_leaks)) {
if (options.zero_retaddr)
insert_zero_retaddr(drcontext, bb, inst, bi);
goto instru_event_bb_insert_done;
}
if (instr_is_interrupt(inst))
goto instru_event_bb_insert_done;
if (instr_is_nop(inst))
goto instru_event_bb_insert_done;
if (options.pattern != 0 && instr_is_prefetch(inst))
goto instru_event_bb_insert_done;
/* if there are no shadowed reg or mem operands, we can ignore it */
has_shadowed_reg = false;
has_mem = false;
has_noignorable_mem = false;
for (i = 0; i < instr_num_dsts(inst); i++) {
opnd_t opnd = instr_get_dst(inst, i);
if (opnd_is_memory_reference(opnd) IF_X86(&& instr_get_opcode(inst) != OP_lea))
has_mem = true;
#ifdef TOOL_DR_MEMORY
if (has_mem && opnd_uses_nonignorable_memory(opnd))
has_noignorable_mem = true;
#endif
if (options.shadowing && opnd_is_reg(opnd) &&
reg_is_shadowed(opc, opnd_get_reg(opnd))) {
has_shadowed_reg = true;
if (reg_is_gpr(opnd_get_reg(opnd))) {
/* written to => no longer known to be addressable,
* unless modified by const amt: we look for push/pop
*/
if (!(opc_is_push(opc) || (opc_is_pop(opc) && i > 0))) {
bi->addressable[reg_to_pointer_sized(opnd_get_reg(opnd)) -
DR_REG_START_GPR] = false;
}
}
}
}
if (!has_shadowed_reg || !has_mem) {
for (i = 0; i < instr_num_srcs(inst); i++) {
opnd_t opnd = instr_get_src(inst, i);
if (opnd_is_memory_reference(opnd)
IF_X86(&& instr_get_opcode(inst) != OP_lea))
has_mem = true;
#ifdef TOOL_DR_MEMORY
if (has_mem && opnd_uses_nonignorable_memory(opnd))
has_noignorable_mem = true;
#endif
if (options.shadowing && opnd_is_reg(opnd) &&
reg_is_shadowed(opc, opnd_get_reg(opnd)))
has_shadowed_reg = true;
}
}
if (!has_shadowed_reg && !has_mem &&
!TESTANY(EFLAGS_READ_ARITH|EFLAGS_WRITE_ARITH,
instr_get_eflags(inst, DR_QUERY_INCLUDE_ALL)))
goto instru_event_bb_insert_done;
/* for cmp/test+jcc -check_uninit_cmps don't need to instrument jcc */
if ((options.pattern != 0 ||
(options.shadowing && bi->eflags_defined)) &&
instr_is_jcc(inst))
goto instru_event_bb_insert_done;
if (options.pattern != 0) {
if (!(bi->is_repstr_to_loop && options.pattern_opt_repstr)) {
/* aggressive optimization of repstr for pattern mode will
* be handled separately in pattern_instrument_repstr
*/
pattern_instrument_check(drcontext, bb, inst, bi, translating);
}
} else if (options.shadowing &&
(options.check_uninitialized || has_noignorable_mem)) {
if (instr_ok_for_instrument_fastpath(inst, &mi, bi)) {
instrument_fastpath(drcontext, bb, inst, &mi, bi->check_ignore_unaddr);
used_fastpath = true;
bi->added_instru = true;
} else {
LOG(3, "fastpath unavailable "PFX": ", pc);
DOLOG(3, { instr_disassemble(drcontext, inst, LOGFILE_GET(drcontext)); });
LOG(3, "\n");
bi->shared_memop = opnd_create_null();
/* Restore whole-bb spilled regs (PR 489221)
* FIXME: optimize via liveness analysis
*/
mi.reg1 = bi->reg1;
mi.reg2 = bi->reg2;
memset(&mi.reg3, 0, sizeof(mi.reg3));
instrument_slowpath(drcontext, bb, inst,
whole_bb_spills_enabled() ? &mi : NULL);
/* for whole-bb slowpath does interact w/ global regs */
bi->added_instru = whole_bb_spills_enabled();
}
}
/* do esp adjust last, for ret immed; leave wants it the
* other way but we compensate in adjust_memop() */
/* -leaks_only co-opts esp-adjust code to zero out newly allocated stack
* space to avoid stale pointers from prior frames from misleading our
* leak scan (PR 520916). yes, I realize it may not be perfectly
* transparent.
*/
if ((options.leaks_only || options.shadowing) &&
instr_writes_esp(inst)) {
bool shadow_xsp = options.shadowing &&
(options.check_uninitialized || options.check_stack_bounds);
bool zero_stack = ZERO_STACK();
if (shadow_xsp || zero_stack) {
/* any new spill must be after the fastpath instru */
bi->spill_after = instr_get_prev(inst);
if (shadow_xsp) {
sp_adjust_action_t sp_action = SP_ADJUST_ACTION_SHADOW;
if (should_mark_stack_frames_defined(pc)) {
sp_action = SP_ADJUST_ACTION_DEFINED;
}
if (instrument_esp_adjust(drcontext, bb, inst, bi, sp_action)) {
/* instru clobbered reg1 so no sharing across it */
bi->shared_memop = opnd_create_null();
}
}
if (zero_stack) {
/* w/o definedness info we need to zero as well to find leaks */
instrument_esp_adjust(drcontext, bb, inst, bi, SP_ADJUST_ACTION_ZERO);
}
}
bi->added_instru = true;
}
if (options.zero_retaddr && !ZERO_STACK() && !options.check_uninitialized)
insert_zero_retaddr(drcontext, bb, inst, bi);
/* None of the "goto instru_event_bb_insert_dones" above need to be processed here */
if (INSTRUMENT_MEMREFS())
fastpath_pre_app_instr(drcontext, bb, inst, bi, &mi);
if (mi.appclone != NULL) {
instr_t *nxt = instr_get_next(mi.appclone);
ASSERT(options.single_arg_slowpath, "only used for single_arg_slowpath");
while (nxt != NULL &&
(instr_is_label(nxt) || instr_is_spill(nxt) || instr_is_restore(nxt)))
nxt = instr_get_next(nxt);
ASSERT(nxt != NULL, "app clone error");
DOLOG(3, {
LOG(3, "comparing: ");
instr_disassemble(drcontext, mi.appclone, LOGFILE_GET(drcontext));
LOG(3, "\n");
LOG(3, "with: ");
instr_disassemble(drcontext, nxt, LOGFILE_GET(drcontext));
LOG(3, "\n");
});
STATS_INC(app_instrs_fastpath);
/* only destroy if app instr won't be mangled */
if (instr_same(mi.appclone, nxt) &&
!instr_is_cti(nxt) &&
/* FIXME PR 494769: -single_arg_slowpath cannot be on by default
* until b/c we can't predict whether an instr will be mangled
* for selfmod! Also, today we're not looking for mangling of
* instr_has_rel_addr_reference(). The option is off by default
* until that's addressed by implementing i#156/PR 306163 and
* adding post-mangling bb and trace events.
*/
!instr_is_syscall(nxt) &&
!instr_is_interrupt(nxt)) {
ASSERT(mi.slow_store_retaddr != NULL, "slowpath opt error");
/* point at the jmp so slow_path() knows to return right afterward */
instru_insert_mov_pc(drcontext, bb, mi.slow_store_retaddr,
mi.slow_store_dst, opnd_create_instr(mi.slow_jmp));
/* we've replaced the original so remove it */
instrlist_remove(bb, mi.slow_store_retaddr);
instr_destroy(drcontext, mi.slow_store_retaddr);
mi.slow_store_retaddr = NULL;
if (mi.slow_store_retaddr2 != NULL) {
instrlist_remove(bb, mi.slow_store_retaddr2);
instr_destroy(drcontext, mi.slow_store_retaddr2);
mi.slow_store_retaddr2 = NULL;
}
instrlist_remove(bb, mi.appclone);
instr_destroy(drcontext, mi.appclone);
mi.appclone = NULL;
STATS_INC(app_instrs_no_dup);
} else {
DOLOG(3, {
LOG(3, "need dup for: ");
instr_disassemble(drcontext, mi.appclone, LOGFILE_GET(drcontext));
LOG(3, "\n");
});
}
}
instru_event_bb_insert_done:
if (bi->first_instr && instr_is_app(inst))
bi->first_instr = false;
if (!used_fastpath && options.shadowing) {
/* i#1870: sanity check in case we bail out of instrumenting the next instr
* when we're sharing.
*/
bi->shared_memop = opnd_create_null();
}
/* We store whether bi->check_ignore_unaddr in our own data struct to avoid
* DR having to store translations, so we can recreate deterministically
* => DR_EMIT_DEFAULT
*/
if (persistence_supported())
return DR_EMIT_DEFAULT | DR_EMIT_PERSISTABLE;
else
return DR_EMIT_DEFAULT;
}
static dr_emit_flags_t
instru_event_bb_instru2instru(void *drcontext, void *tag, instrlist_t *bb,
bool for_trace, bool translating, void *user_data)
{
bb_info_t *bi = (bb_info_t *) user_data;
if (go_native)
return DR_EMIT_GO_NATIVE;
#ifdef TOOL_DR_MEMORY
# ifdef X86
if (options.pattern != 0 && options.pattern_opt_repstr &&
bi->is_repstr_to_loop)
pattern_instrument_repstr(drcontext, bb, bi, translating);
# endif
#endif
if (INSTRUMENT_MEMREFS()) {
fastpath_bottom_of_bb(drcontext, tag, bb, bi, bi->added_instru, translating,
bi->check_ignore_unaddr);
}
LOG(4, "final ilist:\n");
DOLOG(4, instrlist_disassemble(drcontext, tag, bb, LOGFILE_GET(drcontext)););
thread_free(drcontext, bi, sizeof(*bi), HEAPSTAT_PERBB);
return DR_EMIT_DEFAULT;
}
#endif /* TOOL_DR_MEMORY */
/***************************************************************************
* LOCATION SHARED CODE
*/
#ifdef TOOL_DR_MEMORY
/* for jmp-to-slowpath optimization where we xl8 to get app pc (PR 494769) */
static app_pc
translate_cache_pc(byte *pc_to_xl8)
{
app_pc res;
void *drcontext = dr_get_current_drcontext();
cls_drmem_t *cpt = (cls_drmem_t *) drmgr_get_cls_field(drcontext, cls_idx_drmem);
ASSERT(cpt != NULL, "pt shouldn't be null");
ASSERT(pc_to_xl8 != NULL, "invalid param");
ASSERT(options.single_arg_slowpath, "only used for single_arg_slowpath");
/* ensure event_restore_state() returns true */
cpt->self_translating = true;
res = dr_app_pc_from_cache_pc(pc_to_xl8);
cpt->self_translating = false;
ASSERT(res != NULL, "failure to determine app pc on slowpath");
STATS_INC(xl8_app_for_slowpath);
LOG(3, "translated "PFX" to "PFX" for slowpath\n", pc_to_xl8, res);
return res;
}
#endif
app_pc
loc_to_pc(app_loc_t *loc)
{
ASSERT(loc != NULL && loc->type == APP_LOC_PC, "invalid param");
if (!loc->u.addr.valid) {
#ifdef TOOL_DR_MEMORY
ASSERT(options.single_arg_slowpath, "only used for single_arg_slowpath");
/* pc field holds cache pc that must be translated */
ASSERT(dr_memory_is_dr_internal(loc->u.addr.pc), "invalid untranslated pc");
loc->u.addr.pc = translate_cache_pc(loc->u.addr.pc);
ASSERT(loc->u.addr.pc != NULL, "translation failed");
loc->u.addr.valid = true;
#else
ASSERT(false, "NYI");
#endif
}
return loc->u.addr.pc;
}
app_pc
loc_to_print(app_loc_t *loc)
{
ASSERT(loc != NULL, "invalid param");
if (loc->type == APP_LOC_PC) {
/* perf hit to translate so only at high loglevel */
DOLOG(3, { return loc_to_pc(loc); });
return loc->u.addr.valid ? loc->u.addr.pc : NULL;
} else {
ASSERT(loc->type == APP_LOC_SYSCALL, "unknown type");
/* we ignore secondary sysnum (used only for logging) */
return (app_pc)(ptr_uint_t) loc->u.syscall.sysnum.number;
}
}