blob: baf1b32dbde18820d160eeeff35eb75ca69eaa6e [file]
/* ******************************************************************************
* Copyright (c) 2013-2014 Google, Inc. All rights reserved.
* ******************************************************************************/
/*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* * Neither the name of VMware, Inc. nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
/* Code Manipulation API Sample:
* bbbuf.c
*
* This sample demonstrates how to use a TLS field for per-thread profiling.
* For each thread, we create a 64KB buffer with 64KB-aligned start address,
* and store that into a TLS slot.
* At the beginning of each basic block, we insert code to
* - load the pointer from the TLS slot,
* - store the starting pc of the basic block into the buffer,
* - update the pointer by incrementing just the low 16 bits of the pointer
* so we will fill the buffer in a cyclical way.
* This sample can be used for hot path profiling or debugging with execution
* history.
*/
#include "dr_api.h"
#include <string.h>
#define MINSERT instrlist_meta_preinsert
#define TESTALL(mask, var) (((mask) & (var)) == (mask))
#define TESTANY(mask, var) (((mask) & (var)) != 0)
#define ALIGN_FORWARD(x, alignment) \
((((ptr_uint_t)x) + ((alignment)-1)) & (~((alignment)-1)))
#define BUF_64K_BYTE (1 << 16)
/* We make TLS_BUF_SIZE to be 128KB so we can have a 64KB buffer
* with 64KB aligned starting address.
*/
#define TLS_BUF_SIZE (BUF_64K_BYTE * 2)
static reg_id_t tls_seg;
static uint tls_offs;
typedef struct _per_thread_t {
void *seg_base;
void *buf_base;
} per_thread_t;
/* iterate basic block to find a dead register */
static reg_id_t
bb_find_dead_reg(instrlist_t *ilist)
{
instr_t *instr;
int i;
bool reg_is_read[DR_NUM_GPR_REGS] = { false,};
for (instr = instrlist_first(ilist);
instr != NULL;
instr = instr_get_next(instr)) {
if (instr_is_syscall(instr) || instr_is_interrupt(instr))
return DR_REG_NULL;
for (i = 0; i < DR_NUM_GPR_REGS; i++) {
if (!reg_is_read[i] &&
instr_reads_from_reg(instr, (reg_id_t)(DR_REG_START_GPR + i),
DR_QUERY_DEFAULT)) {
reg_is_read[i] = true;
}
if (!reg_is_read[i] &&
instr_writes_to_exact_reg(instr,
(reg_id_t)(DR_REG_START_GPR + i),
DR_QUERY_DEFAULT)) {
return (reg_id_t)(DR_REG_START_GPR + i);
}
#ifdef X64
/* in x64, update on 32-bit register kills the whole register */
if (!reg_is_read[i] &&
instr_writes_to_exact_reg(instr,
reg_64_to_32
((reg_id_t)(DR_REG_START_GPR + i)),
DR_QUERY_DEFAULT)) {
return (reg_id_t)(DR_REG_START_GPR + i);
}
#endif
}
}
return DR_REG_NULL;
}
/* iterate basic block to check if aflags are dead after (including) where */
static bool
bb_aflags_are_dead(instrlist_t *ilist, instr_t *where)
{
instr_t *instr;
uint flags;
for (instr = where; instr != NULL; instr = instr_get_next(instr)) {
flags = instr_get_arith_flags(instr, DR_QUERY_DEFAULT);
if (TESTANY(EFLAGS_READ_6, flags))
return false;
if (TESTALL(EFLAGS_WRITE_6, flags))
return true;
}
return false;
}
static dr_emit_flags_t
event_basic_block(void *drcontext, void *tag, instrlist_t *bb,
bool for_trace, bool translating)
{
instr_t *first = instrlist_first(bb);
app_pc pc = dr_fragment_app_pc(tag);
instr_t *mov1, *mov2;
/* We try to avoid register stealing by using "dead" register if possible.
* However, technically, a fault could come in and want the original value
* of the "dead" register, but that's too corner-case for us.
*/
reg_id_t reg = bb_find_dead_reg(bb);
bool steal = (reg == DR_REG_NULL);
if (reg == DR_REG_NULL)
reg = DR_REG_XCX; /* randomly use one if no dead reg found */
/* save register if necessary */
if (steal)
dr_save_reg(drcontext, bb, first, reg, SPILL_SLOT_1);
/* load buffer pointer from TLS field */
MINSERT(bb, first, INSTR_CREATE_mov_ld
(drcontext,
opnd_create_reg(reg),
opnd_create_far_base_disp(tls_seg, DR_REG_NULL, DR_REG_NULL,
0, tls_offs, OPSZ_PTR)));
/* store bb's start pc into the buffer */
instrlist_insert_mov_immed_ptrsz(drcontext, (ptr_int_t)pc,
OPND_CREATE_MEMPTR(reg, 0),
bb, first, &mov1, &mov2);
DR_ASSERT(mov1 != NULL);
instr_set_meta(mov1);
if (mov2 != NULL)
instr_set_meta(mov2);
/* update the TLS buffer pointer by incrementing just the bottom 16 bits of
* the pointer
*/
if (bb_aflags_are_dead(bb, first)) {
/* if aflags are dead, we use add directly */
MINSERT(bb, first, INSTR_CREATE_add
(drcontext,
opnd_create_far_base_disp(tls_seg, DR_REG_NULL, DR_REG_NULL,
0, tls_offs, OPSZ_2),
OPND_CREATE_INT8(sizeof(app_pc))));
} else {
reg_id_t reg_16;
#ifdef X64
reg_16 = reg_32_to_16(reg_64_to_32(reg));
#else
reg_16 = reg_32_to_16(reg);
#endif
/* we use lea to avoid aflags save/restore */
MINSERT(bb, first, INSTR_CREATE_lea
(drcontext,
opnd_create_reg(reg_16),
opnd_create_base_disp(reg, DR_REG_NULL, 0,
sizeof(app_pc), OPSZ_lea)));
MINSERT(bb, first, INSTR_CREATE_mov_st
(drcontext,
opnd_create_far_base_disp(tls_seg, DR_REG_NULL, DR_REG_NULL,
0, tls_offs, OPSZ_PTR),
opnd_create_reg(reg)));
}
/* restore register if necessary */
if (steal)
dr_restore_reg(drcontext, bb, first, reg, SPILL_SLOT_1);
return DR_EMIT_DEFAULT;
}
static void
event_thread_init(void *drcontext)
{
per_thread_t *data = dr_thread_alloc(drcontext, sizeof(*data));
DR_ASSERT(data != NULL);
dr_set_tls_field(drcontext, data);
/* Keep seg_base in a per-thread data structure so we can get the TLS
* slot and find where the pointer points to in the buffer.
* It is mainly for users using a debugger to get the execution history.
*/
data->seg_base = dr_get_dr_segment_base(tls_seg);
/* We allocate a 128KB buffer to make sure we have a 64KB buffer with
* 64KB-aligned starting address, so that we can fill the buffer
* cyclically by incrementing the bottom 16 bits of the pointer.
*/
data->buf_base = dr_raw_mem_alloc(TLS_BUF_SIZE,
DR_MEMPROT_READ | DR_MEMPROT_WRITE,
NULL);
DR_ASSERT(data->seg_base != NULL && data->buf_base != NULL);
memset(data->buf_base, 0, TLS_BUF_SIZE);
/* put the 64KB-aligned address into TLS slot as the pointer pointing
* to the 64KB cyclic buffer
*/
*(void **)((byte *)(data->seg_base) + tls_offs) = (void *)
ALIGN_FORWARD(data->buf_base, BUF_64K_BYTE);
}
static void
event_thread_exit(void *drcontext)
{
per_thread_t *data = dr_get_tls_field(drcontext);
dr_raw_mem_free(data->buf_base, TLS_BUF_SIZE);
dr_thread_free(drcontext, data, sizeof(*data));
}
static void
event_exit(void)
{
if (!dr_raw_tls_cfree(tls_offs, 1))
DR_ASSERT(false);
}
DR_EXPORT void
dr_init(client_id_t id)
{
dr_set_client_name("DynamoRIO Sample Client 'bbbuf'",
"http://dynamorio.org/issues");
/* register events */
dr_register_thread_init_event(event_thread_init);
dr_register_thread_exit_event(event_thread_exit);
dr_register_exit_event(event_exit);
dr_register_bb_event(event_basic_block);
/* The TLS field provided by DR cannot be directly accessed from code cache.
* For better performance, we allocate raw TLS so that we can directly
* access and update it with a single instruction.
*/
if(!dr_raw_tls_calloc(&tls_seg, &tls_offs, 1, 0))
DR_ASSERT(false);
}