blob: 22cfdeba111523e1683029777dec11c8d6d7bf2f [file] [log] [blame]
/* *******************************************************************************
* Copyright (c) 2011 Massachusetts Institute of Technology All rights reserved.
* *******************************************************************************/
/*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* * Neither the name of MIT nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL MIT OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
/* Test the clean call inliner. */
#include "dr_api.h"
#include "client_tools.h"
#include <stddef.h> /* offsetof */
#include <string.h> /* memset */
#define CALLEE_ALIGNMENT 64
#define PRE instrlist_meta_preinsert
#define APP instrlist_meta_append
/* List of instrumentation functions. */
#define FUNCTIONS() \
FUNCTION(empty) \
FUNCTION(empty_1arg) \
FUNCTION(inscount) \
FUNCTION(gcc47_inscount) \
FUNCTION(callpic_pop) \
FUNCTION(callpic_mov) \
FUNCTION(nonleaf) \
FUNCTION(cond_br) \
FUNCTION(tls_clobber) \
FUNCTION(aflags_clobber) \
FUNCTION(compiler_inscount) \
FUNCTION(bbcount) \
LAST_FUNCTION()
/* Table of function names. */
#define FUNCTION(fn_name) #fn_name,
#define LAST_FUNCTION() NULL
static const char *func_names[] = {
FUNCTIONS()
};
#undef FUNCTION
#undef LAST_FUNCTION
/* Codegen function declarations. */
#define FUNCTION(fn_name) \
static instrlist_t *codegen_##fn_name(void *dc);
#define LAST_FUNCTION()
FUNCTIONS()
#undef FUNCTION
#undef LAST_FUNCTION
/* Table of codegen functions. */
typedef instrlist_t *(*codegen_func_t)(void *dc);
#define FUNCTION(fn_name) codegen_##fn_name,
#define LAST_FUNCTION() NULL
static codegen_func_t codegen_funcs[] = {
FUNCTIONS()
};
#undef FUNCTION
#undef LAST_FUNCTION
/* Create an enum for each function. */
#define FUNCTION(fn_name) FN_##fn_name,
#define LAST_FUNCTION() LAST_FUNC_ENUM
enum {
FUNCTIONS()
};
#undef FUNCTION
#undef LAST_FUNCTION
/* A separate define so ctags can find it. */
#define N_FUNCS LAST_FUNC_ENUM
static app_pc func_app_pcs[N_FUNCS];
static void *func_ptrs[N_FUNCS];
static bool func_called[N_FUNCS];
/* Instrumentation machine code memory. */
static void *rwx_mem;
static size_t rwx_size;
static void event_exit(void);
static dr_emit_flags_t event_basic_block(void *dc, void *tag, instrlist_t *bb,
bool for_trace, bool translating);
static void lookup_pcs(void);
static void codegen_instrumentation_funcs(void);
static void free_instrumentation_funcs(void);
static void compiler_inscount(ptr_uint_t count);
static void test_inlined_call_args(void *dc, instrlist_t *bb, instr_t *where,
int fn_idx);
DR_EXPORT void
dr_init(client_id_t id)
{
dr_register_exit_event(event_exit);
dr_register_bb_event(event_basic_block);
dr_fprintf(STDERR, "INIT\n");
/* Lookup pcs. */
lookup_pcs();
codegen_instrumentation_funcs();
}
static void
event_exit(void)
{
int i;
free_instrumentation_funcs();
for (i = 0; i < N_FUNCS; i++) {
DR_ASSERT_MSG(func_called[i],
"Instrumentation function was not called!");
}
dr_fprintf(STDERR, "PASSED\n");
}
static void
lookup_pcs(void)
{
module_data_t *exe;
int i;
exe = dr_lookup_module_by_name(
#ifdef WINDOWS
"client.inline.exe"
#else
"client.inline"
#endif
);
for (i = 0; i < N_FUNCS; i++) {
app_pc func_pc = (app_pc)dr_get_proc_address(
exe->handle, func_names[i]);
DR_ASSERT_MSG(func_pc != NULL,
"Unable to find a function we wanted to instrument!");
func_app_pcs[i] = func_pc;
}
dr_free_module_data(exe);
}
/* Generate the instrumentation. */
static void
codegen_instrumentation_funcs(void)
{
void *dc = dr_get_current_drcontext();
instrlist_t *ilists[N_FUNCS];
int i;
size_t offset = 0;
uint rwx_prot;
app_pc pc;
/* Generate all of the ilists. */
for (i = 0; i < N_FUNCS; i++) {
ilists[i] = codegen_funcs[i](dc);
}
/* Compute size of each instr and set each note with the offset. */
for (i = 0; i < N_FUNCS; i++) {
instr_t *inst;
offset = ALIGN_FORWARD(offset, CALLEE_ALIGNMENT);
for (inst = instrlist_first(ilists[i]); inst;
inst = instr_get_next(inst)) {
instr_set_note(inst, (void *)(ptr_int_t)offset);
offset += instr_length(dc, inst);
}
}
/* Allocate RWX memory for the code and fill it with nops. nops make
* reading the disassembly in gdb easier. */
rwx_prot = DR_MEMPROT_EXEC|DR_MEMPROT_READ|DR_MEMPROT_WRITE;
rwx_size = ALIGN_FORWARD(offset, PAGE_SIZE);
rwx_mem = dr_nonheap_alloc(rwx_size, rwx_prot);
memset(rwx_mem, 0x90, rwx_size);
/* Encode instructions. We don't worry about labels, since the notes are
* already set. */
pc = (byte*)rwx_mem;
for (i = 0; i < N_FUNCS; i++) {
pc = (byte*)ALIGN_FORWARD(pc, CALLEE_ALIGNMENT);
func_ptrs[i] = pc;
dr_log(dc, LOG_EMIT, 3, "Generated instrumentation function %s at "PFX
":", func_names[i], pc);
instrlist_disassemble(dc, pc, ilists[i], dr_get_logfile(dc));
pc = instrlist_encode(dc, ilists[i], pc, false);
instrlist_clear_and_destroy(dc, ilists[i]);
}
/* For compiler_inscount, we don't use generated code, we just point
* straight at the compiled code.
*/
func_ptrs[FN_compiler_inscount] = (void*)&compiler_inscount;
}
/* Free the instrumentation machine code. */
static void
free_instrumentation_funcs(void)
{
dr_nonheap_free(rwx_mem, rwx_size);
}
/* Globals used by instrumentation functions. */
ptr_uint_t global_count;
static uint callee_inlined;
static dr_mcontext_t before_mcontext = {sizeof(before_mcontext),DR_MC_ALL,};
static int before_errno;
static dr_mcontext_t after_mcontext = {sizeof(after_mcontext),DR_MC_ALL,};
static int after_errno;
/* Reset global_count and patch the out-of-line version of the instrumentation function
* so we can find out if it got called, which would mean it wasn't inlined.
*
* XXX: We modify the callee code! If DR tries to disassemble the callee's
* ilist after the modification, it will trigger assertion failures in the
* disassembler.
*/
static void
before_callee(app_pc func, const char *func_name)
{
void *dc;
instrlist_t *ilist;
opnd_t xax = opnd_create_reg(DR_REG_XAX);
byte *end_pc;
if (func_name != NULL)
dr_fprintf(STDERR, "Calling func %s...\n", func_name);
/* Save mcontext before call. */
dc = dr_get_current_drcontext();
dr_get_mcontext(dc, &before_mcontext);
/* If this is compiler_inscount, we need to unprotect our own text section
* so we can make this code modification.
*/
if (func == (app_pc)compiler_inscount) {
app_pc start_pc = (app_pc)ALIGN_BACKWARD(func, PAGE_SIZE);
app_pc end_pc = func;
instr_t instr;
instr_init(dc, &instr);
do {
instr_reset(dc, &instr);
end_pc = decode(dc, end_pc, &instr);
} while (!instr_is_return(&instr));
end_pc += instr_length(dc, &instr);
instr_reset(dc, &instr);
end_pc = (app_pc)ALIGN_FORWARD(end_pc, PAGE_SIZE);
dr_memory_protect(start_pc, (size_t)(end_pc - start_pc),
DR_MEMPROT_EXEC|DR_MEMPROT_READ|DR_MEMPROT_WRITE);
}
/* Patch the callee to be:
* push xax
* mov xax, &callee_inlined
* mov dword [xax], 0
* pop xax
* ret
*/
ilist = instrlist_create(dc);
APP(ilist, INSTR_CREATE_push(dc, xax));
APP(ilist, INSTR_CREATE_mov_imm
(dc, xax, OPND_CREATE_INTPTR(&callee_inlined)));
APP(ilist, INSTR_CREATE_mov_st
(dc, OPND_CREATE_MEM32(DR_REG_XAX, 0), OPND_CREATE_INT32(0)));
APP(ilist, INSTR_CREATE_pop(dc, xax));
APP(ilist, INSTR_CREATE_ret(dc));
end_pc = instrlist_encode(dc, ilist, func, false /* no jump targets */);
instrlist_clear_and_destroy(dc, ilist);
dr_log(dc, LOG_EMIT, 3, "Patched instrumentation function %s at "PFX":\n",
(func_name ? func_name : "(null)"), func);
/* Check there was enough room in the function. We align every callee
* entry point to CALLEE_ALIGNMENT, so each function has at least
* CALLEE_ALIGNMENT bytes long.
*/
DR_ASSERT_MSG(end_pc < func + CALLEE_ALIGNMENT,
"Patched code too big for smallest function!");
/* Reset instrumentation globals. */
global_count = 0;
callee_inlined = 1;
}
#define NUM_GP_REGS (1 + (IF_X64_ELSE(DR_REG_R15, DR_REG_XDI) - DR_REG_XAX))
static int reg_offsets[NUM_GP_REGS + 1] = {
offsetof(dr_mcontext_t, xax),
offsetof(dr_mcontext_t, xbx),
offsetof(dr_mcontext_t, xcx),
offsetof(dr_mcontext_t, xdx),
offsetof(dr_mcontext_t, xdi),
offsetof(dr_mcontext_t, xsi),
offsetof(dr_mcontext_t, xbp),
offsetof(dr_mcontext_t, xsp),
#ifdef X64
offsetof(dr_mcontext_t, r8),
offsetof(dr_mcontext_t, r9),
offsetof(dr_mcontext_t, r10),
offsetof(dr_mcontext_t, r11),
offsetof(dr_mcontext_t, r12),
offsetof(dr_mcontext_t, r13),
offsetof(dr_mcontext_t, r14),
offsetof(dr_mcontext_t, r15),
#endif
offsetof(dr_mcontext_t, xflags)
};
static bool
mcontexts_equal(dr_mcontext_t *mc_a, dr_mcontext_t *mc_b, int func_index)
{
int i;
int ymm_bytes_used;
/* Check GPRs. */
for (i = 0; i < NUM_GP_REGS; i++) {
reg_t a = *(reg_t*)((byte*)mc_a + reg_offsets[i]);
reg_t b = *(reg_t*)((byte*)mc_b + reg_offsets[i]);
if (a != b)
return false;
}
/* Check xflags for all funcs except bbcount, which has dead flags. */
if (mc_a->xflags != mc_b->xflags && func_index != FN_bbcount)
return false;
/* Only look at the initialized bits of the SSE regs. */
ymm_bytes_used = (proc_has_feature(FEATURE_AVX) ? 32 : 16);
for (i = 0; i < NUM_XMM_SLOTS; i++) {
if (memcmp(&mc_a->ymm[i], &mc_b->ymm[i], ymm_bytes_used) != 0)
return false;
}
return true;
}
static void
dump_diff_mcontexts(void)
{
uint i;
dr_fprintf(STDERR, "Registers clobbered by supposedly clean call!\n"
"Printing GPRs + flags:\n");
for (i = 0; i < NUM_GP_REGS + 1; i++) {
reg_t before_reg = *(reg_t*)((byte*)&before_mcontext + reg_offsets[i]);
reg_t after_reg = *(reg_t*)((byte*)&after_mcontext + reg_offsets[i]);
const char *reg_name = (i < NUM_GP_REGS ?
get_register_name(DR_REG_XAX + i) :
"xflags");
const char *diff_str = (before_reg == after_reg ?
"" : " <- DIFFERS");
dr_fprintf(STDERR, "%s before: "PFX" after: "PFX"%s\n",
reg_name, before_reg, after_reg, diff_str);
}
dr_fprintf(STDERR, "Printing XMM regs:\n");
for (i = 0; i < NUM_XMM_SLOTS; i++) {
dr_ymm_t before_reg = before_mcontext.ymm[i];
dr_ymm_t after_reg = after_mcontext.ymm[i];
size_t mmsz = proc_has_feature(FEATURE_AVX) ? sizeof(dr_xmm_t) :
sizeof(dr_ymm_t);
const char *diff_str =
(memcmp(&before_reg, &after_reg, mmsz) == 0 ? "" : " <- DIFFERS");
dr_fprintf(STDERR, "xmm%2d before: %08x%08x%08x%08x",
i,
before_reg.u32[0], before_reg.u32[1],
before_reg.u32[2], before_reg.u32[3]);
if (proc_has_feature(FEATURE_AVX)) {
dr_fprintf(STDERR, "%08x%08x%08x%08x",
before_reg.u32[4], before_reg.u32[5],
before_reg.u32[6], before_reg.u32[7]);
}
dr_fprintf(STDERR, " after: %08x%08x%08x%08x",
after_reg.u32[0], after_reg.u32[1],
after_reg.u32[2], after_reg.u32[3]);
if (proc_has_feature(FEATURE_AVX)) {
dr_fprintf(STDERR, "%08x%08x%08x%08x",
after_reg.u32[4], after_reg.u32[5],
after_reg.u32[6], after_reg.u32[7]);
}
dr_fprintf(STDERR, "%s\n", diff_str);
}
}
static void
dump_inlined_code(void *dc, app_pc start_inline, app_pc end_inline,
int func_index)
{
app_pc pc, next_pc;
dr_fprintf(STDERR, "Inlined code for %s:\n", func_names[func_index]);
for (pc = start_inline; pc != end_inline; pc = next_pc) {
next_pc = disassemble(dc, pc, STDERR);
}
}
static void
after_callee(app_pc start_inline, app_pc end_inline, bool inline_expected,
int func_index, const char *func_name)
{
void *dc;
/* Save mcontext after call. */
dc = dr_get_current_drcontext();
dr_get_mcontext(dc, &after_mcontext);
/* Compare mcontexts. */
if (before_errno != after_errno) {
dr_fprintf(STDERR, "errnos differ!\nbefore: %d, after: %d\n",
before_errno, after_errno);
}
if (!mcontexts_equal(&before_mcontext, &after_mcontext, func_index)) {
dump_diff_mcontexts();
dump_inlined_code(dc, start_inline, end_inline, func_index);
}
/* Now that we use the mcontext in dcontext, we expect no stack usage. */
if (inline_expected) {
app_pc pc, next_pc;
instr_t instr;
bool found_xsp = false;
instr_init(dc, &instr);
for (pc = start_inline; pc != end_inline; pc = next_pc) {
next_pc = decode(dc, pc, &instr);
if (instr_uses_reg(&instr, DR_REG_XSP)) {
found_xsp = true;
}
instr_reset(dc, &instr);
}
if (found_xsp) {
dr_fprintf(STDERR, "Found stack usage in inlined code for %s\n",
func_names[func_index]);
dump_inlined_code(dc, start_inline, end_inline, func_index);
}
}
if (inline_expected && !callee_inlined) {
dr_fprintf(STDERR, "Function %s was not inlined!\n",
func_names[func_index]);
dump_inlined_code(dc, start_inline, end_inline, func_index);
} else if (!inline_expected && callee_inlined) {
dr_fprintf(STDERR, "Function %s was inlined unexpectedly!\n",
func_names[func_index]);
dump_inlined_code(dc, start_inline, end_inline, func_index);
}
/* Function-specific checks. */
switch (func_index) {
case FN_inscount:
case FN_compiler_inscount:
if (global_count != 0xDEAD) {
dr_fprintf(STDERR, "global_count not updated properly after inscount!\n");
dump_inlined_code(dc, start_inline, end_inline, func_index);
}
break;
default:
break;
}
if (func_name != NULL)
dr_fprintf(STDERR, "Called func %s.\n", func_name);
}
static void
fill_scratch(void)
{
void *dc = dr_get_current_drcontext();
int slot;
/* Set slots to 0x000... 0x111... 0x222... etc. */
for (slot = SPILL_SLOT_1; slot <= SPILL_SLOT_MAX; slot++) {
reg_t value = slot * 0x11111111;
dr_write_saved_reg(dc, slot, value);
}
}
static void
check_scratch(void)
{
void *dc = dr_get_current_drcontext();
int slot;
/* Check that slots are 0x000... 0x111... 0x222... etc. */
for (slot = SPILL_SLOT_1; slot <= SPILL_SLOT_MAX; slot++) {
reg_t value = dr_read_saved_reg(dc, slot);
reg_t expected = slot * 0x11111111;
if (value != expected)
dr_fprintf(STDERR, "Client scratch slot clobbered by clean call!\n");
}
}
static void
check_aflags(int actual, int expected)
{
byte ah = (actual >> 8) & 0xFF;
byte al = actual & 0xFF;
byte eh = (expected >> 8) & 0xFF;
byte el = expected & 0xFF;
dr_fprintf(STDERR, "actual: %04x, expected: %04x\n", actual, expected);
DR_ASSERT_MSG(ah == eh, "Aflags clobbered!");
DR_ASSERT_MSG(al == el, "Overflow clobbered!");
dr_fprintf(STDERR, "passed for %04x\n", expected);
}
static instr_t *
test_aflags(void *dc, instrlist_t *bb, instr_t *where, int aflags,
instr_t *before_label, instr_t *after_label)
{
opnd_t xax = opnd_create_reg(DR_REG_XAX);
opnd_t al = opnd_create_reg(DR_REG_AL);
/* Save flags and REG_XAX
* XXX: Assumes we can push to application stack, which happens to be valid
* for this test application.
*
* pushf
* mov [SPILL_SLOT_1], REG_XAX
*/
PRE(bb, where, INSTR_CREATE_pushf(dc));
PRE(bb, where, INSTR_CREATE_mov_st
(dc, dr_reg_spill_slot_opnd(dc, SPILL_SLOT_1), xax));
/* Then populate aflags from XAX:
* mov REG_XAX, aflags
* add al, HEX(7F)
* sahf ah
*/
PRE(bb, where, INSTR_CREATE_mov_imm(dc, xax, OPND_CREATE_INTPTR(aflags)));
PRE(bb, where, INSTR_CREATE_add(dc, al, OPND_CREATE_INT8(0x7F)));
PRE(bb, where, INSTR_CREATE_sahf(dc));
if (before_label != NULL)
PRE(bb, where, before_label);
dr_insert_clean_call(dc, bb, where, func_ptrs[FN_aflags_clobber], false, 0);
if (after_label != NULL)
PRE(bb, where, after_label);
/* Get the flags back into XAX, and then to SPILL_SLOT_2:
* mov REG_XAX, 0
* lahf
* seto al
* mov [SPILL_SLOT_2], REG_XAX
*/
PRE(bb, where, INSTR_CREATE_mov_imm(dc, xax, OPND_CREATE_INTPTR(0)));
PRE(bb, where, INSTR_CREATE_lahf(dc));
PRE(bb, where, INSTR_CREATE_setcc(dc, OP_seto, al));
PRE(bb, where, INSTR_CREATE_mov_st
(dc, dr_reg_spill_slot_opnd(dc, SPILL_SLOT_2), xax));
/* Assert that they match the original flags. */
dr_insert_clean_call(dc, bb, where, (void*)check_aflags, false, 2,
dr_reg_spill_slot_opnd(dc, SPILL_SLOT_2),
OPND_CREATE_INT32(aflags));
/* Restore and flags. */
PRE(bb, where, INSTR_CREATE_mov_ld
(dc, xax, dr_reg_spill_slot_opnd(dc, SPILL_SLOT_1)));
PRE(bb, where, INSTR_CREATE_popf(dc));
return where;
}
static dr_emit_flags_t
event_basic_block(void *dc, void *tag, instrlist_t *bb,
bool for_trace, bool translating)
{
instr_t *entry = instrlist_first(bb);
app_pc entry_pc = instr_get_app_pc(entry);
int i;
bool inline_expected = true;
instr_t *before_label;
instr_t *after_label;
for (i = 0; i < N_FUNCS; i++) {
if (entry_pc == func_app_pcs[i])
break;
}
if (i == N_FUNCS)
return DR_EMIT_DEFAULT;
/* We're inserting a call to a function in this bb. */
func_called[i] = 1;
dr_insert_clean_call(dc, bb, entry, (void*)before_callee, false, 2,
OPND_CREATE_INTPTR(func_ptrs[i]),
OPND_CREATE_INTPTR(func_names[i]));
before_label = INSTR_CREATE_label(dc);
after_label = INSTR_CREATE_label(dc);
switch (i) {
default:
/* Default behavior is to call instrumentation with no-args and
* assert it gets inlined.
*/
PRE(bb, entry, before_label);
dr_insert_clean_call(dc, bb, entry, func_ptrs[i], false, 0);
PRE(bb, entry, after_label);
break;
case FN_empty_1arg:
case FN_inscount:
case FN_gcc47_inscount:
case FN_compiler_inscount:
PRE(bb, entry, before_label);
dr_insert_clean_call(dc, bb, entry, func_ptrs[i], false, 1,
OPND_CREATE_INT32(0xDEAD));
PRE(bb, entry, after_label);
break;
case FN_nonleaf:
case FN_cond_br:
/* These functions cannot be inlined (yet). */
PRE(bb, entry, before_label);
dr_insert_clean_call(dc, bb, entry, func_ptrs[i], false, 0);
PRE(bb, entry, after_label);
inline_expected = false;
break;
case FN_tls_clobber:
dr_insert_clean_call(dc, bb, entry, (void*)fill_scratch, false, 0);
PRE(bb, entry, before_label);
dr_insert_clean_call(dc, bb, entry, func_ptrs[i], false, 0);
PRE(bb, entry, after_label);
dr_insert_clean_call(dc, bb, entry, (void*)check_scratch, false, 0);
break;
case FN_aflags_clobber:
/* ah is: SF:ZF:0:AF:0:PF:1:CF. If we turn everything on we will
* get all 1's except bits 3 and 5, giving a hex mask of 0xD7.
* Overflow is in the low byte (al usually) so use use a mask of
* 0xD701 first. If we turn everything off we get 0x0200.
*/
entry = test_aflags(dc, bb, entry, 0xD701, before_label, after_label);
(void)test_aflags(dc, bb, entry, 0x00200, NULL, NULL);
break;
}
dr_insert_clean_call(dc, bb, entry, (void*)after_callee, false, 5,
opnd_create_instr(before_label),
opnd_create_instr(after_label),
OPND_CREATE_INT32(inline_expected),
OPND_CREATE_INT32(i),
OPND_CREATE_INTPTR(func_names[i]));
if (i == FN_inscount || i == FN_empty_1arg) {
test_inlined_call_args(dc, bb, entry, i);
}
return DR_EMIT_DEFAULT;
}
/* For all regs, pass arguments of the form:
* %reg
* (%reg, %xax, 1)-0xDEAD
* (%xax, %reg, 1)-0xDEAD
*/
static void
test_inlined_call_args(void *dc, instrlist_t *bb, instr_t *where, int fn_idx)
{
uint i;
static const ptr_uint_t hex_dead_global = 0xDEAD;
for (i = 0; i < NUM_GP_REGS; i++) {
reg_id_t reg = DR_REG_XAX + (reg_id_t)i;
reg_id_t other_reg = (reg == DR_REG_XAX ? DR_REG_XBX : DR_REG_XAX);
opnd_t arg;
instr_t *before_label;
instr_t *after_label;
/* FIXME: We should test passing the app %xsp to an inlined function,
* but I hesitate to store a non-stack location in XSP.
*/
if (reg == DR_REG_XSP)
continue;
/* %reg */
before_label = INSTR_CREATE_label(dc);
after_label = INSTR_CREATE_label(dc);
arg = opnd_create_reg(reg);
dr_insert_clean_call(dc, bb, where, (void*)before_callee, false, 2,
OPND_CREATE_INTPTR(func_ptrs[fn_idx]),
OPND_CREATE_INTPTR(0));
PRE(bb, where, before_label);
dr_save_reg(dc, bb, where, reg, SPILL_SLOT_1);
PRE(bb, where, INSTR_CREATE_mov_imm
(dc, arg, OPND_CREATE_INTPTR(0xDEAD)));
dr_insert_clean_call(dc, bb, where, (void*)func_ptrs[fn_idx], false, 1,
arg);
dr_restore_reg(dc, bb, where, reg, SPILL_SLOT_1);
PRE(bb, where, after_label);
dr_insert_clean_call(dc, bb, where, (void*)after_callee, false, 5,
opnd_create_instr(before_label),
opnd_create_instr(after_label),
OPND_CREATE_INT32(true),
OPND_CREATE_INT32(fn_idx),
OPND_CREATE_INTPTR(0));
/* (%reg, %other_reg, 1)-0xDEAD */
before_label = INSTR_CREATE_label(dc);
after_label = INSTR_CREATE_label(dc);
arg = opnd_create_base_disp(reg, other_reg, 1, -0xDEAD, OPSZ_PTR);
dr_insert_clean_call(dc, bb, where, (void*)before_callee, false, 2,
OPND_CREATE_INTPTR(func_ptrs[fn_idx]),
OPND_CREATE_INTPTR(0));
PRE(bb, where, before_label);
dr_save_reg(dc, bb, where, reg, SPILL_SLOT_1);
dr_save_reg(dc, bb, where, other_reg, SPILL_SLOT_2);
PRE(bb, where, INSTR_CREATE_mov_imm
(dc, opnd_create_reg(reg), OPND_CREATE_INTPTR(0xDEAD)));
PRE(bb, where, INSTR_CREATE_mov_imm
(dc, opnd_create_reg(other_reg),
OPND_CREATE_INTPTR(&hex_dead_global)));
dr_insert_clean_call(dc, bb, where, (void*)func_ptrs[fn_idx], false, 1,
arg);
dr_restore_reg(dc, bb, where, other_reg, SPILL_SLOT_2);
dr_restore_reg(dc, bb, where, reg, SPILL_SLOT_1);
PRE(bb, where, after_label);
dr_insert_clean_call(dc, bb, where, (void*)after_callee, false, 5,
opnd_create_instr(before_label),
opnd_create_instr(after_label),
OPND_CREATE_INT32(true),
OPND_CREATE_INT32(fn_idx),
OPND_CREATE_INTPTR(0));
/* (%other_reg, %reg, 1)-0xDEAD */
before_label = INSTR_CREATE_label(dc);
after_label = INSTR_CREATE_label(dc);
arg = opnd_create_base_disp(other_reg, reg, 1, -0xDEAD, OPSZ_PTR);
dr_insert_clean_call(dc, bb, where, (void*)before_callee, false, 2,
OPND_CREATE_INTPTR(func_ptrs[fn_idx]),
OPND_CREATE_INTPTR(0));
PRE(bb, where, before_label);
dr_save_reg(dc, bb, where, reg, SPILL_SLOT_1);
dr_save_reg(dc, bb, where, other_reg, SPILL_SLOT_2);
PRE(bb, where, INSTR_CREATE_mov_imm
(dc, opnd_create_reg(other_reg), OPND_CREATE_INTPTR(0xDEAD)));
PRE(bb, where, INSTR_CREATE_mov_imm
(dc, opnd_create_reg(reg),
OPND_CREATE_INTPTR(&hex_dead_global)));
dr_insert_clean_call(dc, bb, where, (void*)func_ptrs[fn_idx], false, 1,
arg);
dr_restore_reg(dc, bb, where, other_reg, SPILL_SLOT_2);
dr_restore_reg(dc, bb, where, reg, SPILL_SLOT_1);
PRE(bb, where, after_label);
dr_insert_clean_call(dc, bb, where, (void*)after_callee, false, 5,
opnd_create_instr(before_label),
opnd_create_instr(after_label),
OPND_CREATE_INT32(true),
OPND_CREATE_INT32(fn_idx),
OPND_CREATE_INTPTR(0));
}
}
/*****************************************************************************/
/* Instrumentation function code generation. */
/*
prologue:
push REG_XBP
mov REG_XBP, REG_XSP
*/
static void
codegen_prologue(void *dc, instrlist_t *ilist)
{
APP(ilist, INSTR_CREATE_push(dc, opnd_create_reg(DR_REG_XBP)));
APP(ilist, INSTR_CREATE_mov_ld
(dc, opnd_create_reg(DR_REG_XBP), opnd_create_reg(DR_REG_XSP)));
}
/*
epilogue:
leave
ret
*/
static void
codegen_epilogue(void *dc, instrlist_t *ilist)
{
APP(ilist, INSTR_CREATE_leave(dc));
APP(ilist, INSTR_CREATE_ret(dc));
}
/*
empty:
ret
*/
static instrlist_t *
codegen_empty(void *dc)
{
instrlist_t *ilist = instrlist_create(dc);
APP(ilist, INSTR_CREATE_ret(dc));
return ilist;
}
/* i#988: We fail to inline if the number of arguments to the same clean call
* routine increases. empty is used for a 0 arg clean call, so we add empty_1arg
* for test_inlined_call_args(), which passes 1 arg.
*/
static instrlist_t *
codegen_empty_1arg(void *dc)
{
return codegen_empty(dc);
}
/* Return either a stack access opnd_t or the first regparm. Assumes frame
* pointer is not omitted. */
static opnd_t
codegen_opnd_arg1(void)
{
/* FIXME: Perhaps DR should expose this. It currently tracks this in
* core/instr.h. */
#ifdef X64
# ifdef UNIX
int reg = DR_REG_RDI;
# else /* WINDOWS */
int reg = DR_REG_RCX;
# endif
return opnd_create_reg(reg);
#else /* X86 */
# ifdef UNIX
int arg_offset = 1;
# else /* WINDOWS */
int arg_offset = 5;
# endif
return OPND_CREATE_MEMPTR(DR_REG_XBP, arg_offset * sizeof(reg_t));
#endif
}
/*
inscount:
push REG_XBP
mov REG_XBP, REG_XSP
mov REG_XAX, ARG1
add [global_count], REG_XAX
leave
ret
*/
static instrlist_t *
codegen_inscount(void *dc)
{
instrlist_t *ilist = instrlist_create(dc);
opnd_t xax = opnd_create_reg(DR_REG_XAX);
codegen_prologue(dc, ilist);
APP(ilist, INSTR_CREATE_mov_ld(dc, xax, codegen_opnd_arg1()));
APP(ilist, INSTR_CREATE_add(dc, OPND_CREATE_ABSMEM(&global_count, OPSZ_PTR), xax));
codegen_epilogue(dc, ilist);
return ilist;
}
/*
callpic_pop:
push REG_XBP
mov REG_XBP, REG_XSP
call Lnext_label
Lnext_label:
pop REG_XBX
leave
ret
*/
static instrlist_t *
codegen_callpic_pop(void *dc)
{
instrlist_t *ilist = instrlist_create(dc);
instr_t *next_label = INSTR_CREATE_label(dc);
codegen_prologue(dc, ilist);
APP(ilist, INSTR_CREATE_call(dc, opnd_create_instr(next_label)));
APP(ilist, next_label);
APP(ilist, INSTR_CREATE_pop(dc, opnd_create_reg(DR_REG_XBX)));
codegen_epilogue(dc, ilist);
return ilist;
}
/*
callpic_mov:
push REG_XBP
mov REG_XBP, REG_XSP
call Lnext_instr_mov
Lnext_instr_mov:
mov REG_XBX, [REG_XSP]
leave
ret
*/
static instrlist_t *
codegen_callpic_mov(void *dc)
{
instrlist_t *ilist = instrlist_create(dc);
instr_t *next_label = INSTR_CREATE_label(dc);
codegen_prologue(dc, ilist);
APP(ilist, INSTR_CREATE_call(dc, opnd_create_instr(next_label)));
APP(ilist, next_label);
APP(ilist, INSTR_CREATE_mov_ld
(dc, opnd_create_reg(DR_REG_XBX), OPND_CREATE_MEMPTR(DR_REG_XSP, 0)));
codegen_epilogue(dc, ilist);
return ilist;
}
/* Non-leaf functions cannot be inlined.
nonleaf:
push REG_XBP
mov REG_XBP, REG_XSP
call other_func
leave
ret
other_func:
ret
*/
static instrlist_t *
codegen_nonleaf(void *dc)
{
instrlist_t *ilist = instrlist_create(dc);
instr_t *other_func = INSTR_CREATE_label(dc);
codegen_prologue(dc, ilist);
APP(ilist, INSTR_CREATE_call(dc, opnd_create_instr(other_func)));
codegen_epilogue(dc, ilist);
APP(ilist, other_func);
APP(ilist, INSTR_CREATE_ret(dc));
return ilist;
}
/* Conditional branches cannot be inlined. Avoid flags usage to make test case
* more specific.
cond_br:
push REG_XBP
mov REG_XBP, REG_XSP
mov REG_XCX, ARG1
jecxz Larg_zero
mov REG_XAX, HEX(DEADBEEF)
mov SYMREF(global_count), REG_XAX
Larg_zero:
leave
ret
*/
static instrlist_t *
codegen_cond_br(void *dc)
{
instrlist_t *ilist = instrlist_create(dc);
instr_t *arg_zero = INSTR_CREATE_label(dc);
opnd_t xcx = opnd_create_reg(DR_REG_XCX);
codegen_prologue(dc, ilist);
/* If arg1 is non-zero, write 0xDEADBEEF to global_count. */
APP(ilist, INSTR_CREATE_mov_ld(dc, xcx, codegen_opnd_arg1()));
APP(ilist, INSTR_CREATE_jecxz(dc, opnd_create_instr(arg_zero)));
APP(ilist, INSTR_CREATE_mov_imm(dc, xcx, OPND_CREATE_INTPTR(&global_count)));
APP(ilist, INSTR_CREATE_mov_st(dc, OPND_CREATE_MEMPTR(DR_REG_XCX, 0),
OPND_CREATE_INT32((int)0xDEADBEEF)));
APP(ilist, arg_zero);
codegen_epilogue(dc, ilist);
return ilist;
}
/* A function that uses 2 registers and 1 local variable, which should fill all
* of the scratch slots that the inliner uses. This used to clobber the scratch
* slots exposed to the client.
tls_clobber:
push REG_XBP
mov REG_XBP, REG_XSP
sub REG_XSP, ARG_SZ
mov REG_XAX, HEX(DEAD)
mov REG_XDX, HEX(BEEF)
mov [REG_XSP], REG_XAX
leave
ret
*/
static instrlist_t *
codegen_tls_clobber(void *dc)
{
instrlist_t *ilist = instrlist_create(dc);
opnd_t xax = opnd_create_reg(DR_REG_XAX);
opnd_t xdx = opnd_create_reg(DR_REG_XDX);
codegen_prologue(dc, ilist);
APP(ilist, INSTR_CREATE_sub
(dc, opnd_create_reg(DR_REG_XSP), OPND_CREATE_INT8(sizeof(reg_t))));
APP(ilist, INSTR_CREATE_mov_imm(dc, xax, OPND_CREATE_INT32(0xDEAD)));
APP(ilist, INSTR_CREATE_mov_imm(dc, xdx, OPND_CREATE_INT32(0xBEEF)));
APP(ilist, INSTR_CREATE_mov_st(dc, OPND_CREATE_MEMPTR(DR_REG_XSP, 0), xax));
codegen_epilogue(dc, ilist);
return ilist;
}
/* Zero the aflags. Inliner must ensure they are restored.
aflags_clobber:
push REG_XBP
mov REG_XBP, REG_XSP
mov REG_XAX, 0
add al, HEX(7F)
sahf
leave
ret
*/
static instrlist_t *
codegen_aflags_clobber(void *dc)
{
instrlist_t *ilist = instrlist_create(dc);
codegen_prologue(dc, ilist);
APP(ilist, INSTR_CREATE_mov_imm
(dc, opnd_create_reg(DR_REG_XAX), OPND_CREATE_INTPTR(0)));
APP(ilist, INSTR_CREATE_add
(dc, opnd_create_reg(DR_REG_AL), OPND_CREATE_INT8(0x7F)));
APP(ilist, INSTR_CREATE_sahf(dc));
codegen_epilogue(dc, ilist);
return ilist;
}
/*
bbcount:
push REG_XBP
mov REG_XBP, REG_XSP
inc [global_count]
leave
ret
*/
static instrlist_t *
codegen_bbcount(void *dc)
{
instrlist_t *ilist = instrlist_create(dc);
codegen_prologue(dc, ilist);
APP(ilist, INSTR_CREATE_inc(dc, OPND_CREATE_ABSMEM(&global_count, OPSZ_PTR)));
codegen_epilogue(dc, ilist);
return ilist;
}
/* Reduced code from inscount generated by gcc47 -O0.
gcc47_inscount:
#ifdef X64
push %rbp
mov %rsp,%rbp
mov %rdi,-0x8(%rbp)
mov global_count(%rip),%rdx
mov -0x8(%rbp),%rax
add %rdx,%rax
mov %rax,global_count(%rip)
pop %rbp
retq
#else
push %ebp
mov %esp,%ebp
call pic_thunk
add $0x1c86,%ecx
mov global_count(%ecx),%edx
mov 0x8(%ebp),%eax
add %edx,%eax
mov %eax,global_count(%ecx)
pop %ebp
ret
pic_thunk:
mov (%esp),%ecx
ret
#endif
*/
static instrlist_t *
codegen_gcc47_inscount(void *dc)
{
instrlist_t *ilist = instrlist_create(dc);
opnd_t global;
opnd_t xax = opnd_create_reg(DR_REG_XAX);
opnd_t xdx = opnd_create_reg(DR_REG_XDX);
#ifdef X64
/* This local is past TOS. That's OK by the sysv x64 ABI. */
opnd_t local = OPND_CREATE_MEMPTR(DR_REG_XBP, -(int)sizeof(reg_t));
codegen_prologue(dc, ilist);
global = opnd_create_rel_addr(&global_count, OPSZ_PTR);
APP(ilist, INSTR_CREATE_mov_st(dc, local, codegen_opnd_arg1()));
APP(ilist, INSTR_CREATE_mov_ld(dc, xdx, global));
APP(ilist, INSTR_CREATE_mov_ld(dc, xax, local));
APP(ilist, INSTR_CREATE_add(dc, xax, xdx));
APP(ilist, INSTR_CREATE_mov_st(dc, global, xax));
codegen_epilogue(dc, ilist);
#else
instr_t *pic_thunk = INSTR_CREATE_mov_ld
(dc, opnd_create_reg(DR_REG_XCX), OPND_CREATE_MEMPTR(DR_REG_XSP, 0));
codegen_prologue(dc, ilist);
/* XXX: Do a real 32-bit PIC-style access. For now we just use an absolute
* reference since we're 32-bit and everything is reachable.
*/
global = opnd_create_abs_addr(&global_count, OPSZ_PTR);
APP(ilist, INSTR_CREATE_call(dc, opnd_create_instr(pic_thunk)));
APP(ilist, INSTR_CREATE_add(dc, opnd_create_reg(DR_REG_XCX),
OPND_CREATE_INT32(0x0)));
APP(ilist, INSTR_CREATE_mov_ld(dc, xdx, global));
APP(ilist, INSTR_CREATE_mov_ld(dc, xax, codegen_opnd_arg1()));
APP(ilist, INSTR_CREATE_add(dc, xax, xdx));
APP(ilist, INSTR_CREATE_mov_st(dc, global, xax));
codegen_epilogue(dc, ilist);
APP(ilist, pic_thunk);
APP(ilist, INSTR_CREATE_ret(dc));
#endif
return ilist;
}
/* We want to test that we can auto-inline whatever the compiler generates for
* inscount.
*/
static void
compiler_inscount(ptr_uint_t count) {
global_count += count;
}
/* We generate an empty ilist for compiler_inscount and don't use it.
* Originally I tried to decode compiler_inscount and re-encode it in the RWX
* memory along with our other callees, but that breaks 32-bit PIC code. Even
* if we set the translation for each instruction in this ilist, that will be
* lost when we encode and decode in the inliner.
*/
static instrlist_t *
codegen_compiler_inscount(void *dc)
{
instrlist_t *ilist = instrlist_create(dc);
APP(ilist, INSTR_CREATE_ret(dc));
return ilist;
}