| /* ****************************************************************************** |
| * Copyright (c) 2011-2019 Google, Inc. All rights reserved. |
| * Copyright (c) 2010 Massachusetts Institute of Technology All rights reserved. |
| * ******************************************************************************/ |
| |
| /* |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are met: |
| * |
| * * Redistributions of source code must retain the above copyright notice, |
| * this list of conditions and the following disclaimer. |
| * |
| * * Redistributions in binary form must reproduce the above copyright notice, |
| * this list of conditions and the following disclaimer in the documentation |
| * and/or other materials provided with the distribution. |
| * |
| * * Neither the name of VMware, Inc. nor the names of its contributors may be |
| * used to endorse or promote products derived from this software without |
| * specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| * ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE |
| * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
| * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
| * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH |
| * DAMAGE. |
| */ |
| |
| /* Code Manipulation API Sample: |
| * instrace_x86.c |
| * |
| * Collects a dynamic instruction trace and dumps it to a file. |
| * This is an x86-specific implementation of an instruction tracing client. |
| * For a simpler (and slower) arch-independent version, please see instrace_simple.c. |
| * |
| * Illustrates how to create generated code in a local code cache and |
| * perform a lean procedure call to that generated code. |
| * |
| * (1) Fills a buffer and dumps the buffer when it is full. |
| * (2) Inlines the buffer filling code to avoid a full context switch. |
| * (3) Uses a lean procedure call for clean calls to reduce code cache size. |
| * |
| * The OUTPUT_TEXT define controls the format of the trace: text or binary. |
| * Creating a text trace file makes the tool an order of magnitude (!) slower |
| * than creating a binary file; thus, the default is binary. |
| */ |
| |
| #include <stdio.h> |
| #include <string.h> /* for memset */ |
| #include <stddef.h> /* for offsetof */ |
| #include "dr_api.h" |
| #include "drmgr.h" |
| #include "drreg.h" |
| #include "utils.h" |
| |
| /* Each ins_ref_t describes an executed instruction. */ |
| typedef struct _ins_ref_t { |
| app_pc pc; |
| int opcode; |
| } ins_ref_t; |
| |
| /* Max number of ins_ref a buffer can have */ |
| #define MAX_NUM_INS_REFS 8192 |
| /* The size of the memory buffer for holding ins_refs. When it fills up, |
| * we dump data from the buffer to the file. |
| */ |
| #define MEM_BUF_SIZE (sizeof(ins_ref_t) * MAX_NUM_INS_REFS) |
| |
| /* Thread-private data */ |
| typedef struct { |
| char *buf_ptr; |
| char *buf_base; |
| /* buf_end holds the negative value of real address of buffer end. */ |
| ptr_int_t buf_end; |
| void *cache; |
| file_t log; |
| #ifdef OUTPUT_TEXT |
| FILE *logf; |
| #endif |
| uint64 num_refs; |
| } per_thread_t; |
| |
| static size_t page_size; |
| static client_id_t client_id; |
| static app_pc code_cache; |
| static void *mutex; /* for multithread support */ |
| static uint64 global_num_refs; /* keep a global memory reference count */ |
| static int tls_index; |
| |
| static void |
| event_exit(void); |
| static void |
| event_thread_init(void *drcontext); |
| static void |
| event_thread_exit(void *drcontext); |
| |
| static dr_emit_flags_t |
| event_bb_insert(void *drcontext, void *tag, instrlist_t *bb, instr_t *instr, |
| bool for_trace, bool translating, void *user_data); |
| |
| static void |
| clean_call(void); |
| static void |
| instrace(void *drcontext); |
| static void |
| code_cache_init(void); |
| static void |
| code_cache_exit(void); |
| static void |
| instrument_instr(void *drcontext, instrlist_t *ilist, instr_t *where); |
| |
| DR_EXPORT void |
| dr_client_main(client_id_t id, int argc, const char *argv[]) |
| { |
| /* We need 2 reg slots beyond drreg's eflags slots => 3 slots */ |
| drreg_options_t ops = { sizeof(ops), 3, false }; |
| /* Specify priority relative to other instrumentation operations: */ |
| drmgr_priority_t priority = { sizeof(priority), /* size of struct */ |
| "instrace", /* name of our operation */ |
| NULL, /* optional name of operation we should precede */ |
| NULL, /* optional name of operation we should follow */ |
| 0 }; /* numeric priority */ |
| dr_set_client_name("DynamoRIO Sample Client 'instrace'", |
| "http://dynamorio.org/issues"); |
| page_size = dr_page_size(); |
| if (!drmgr_init() || drreg_init(&ops) != DRREG_SUCCESS) |
| DR_ASSERT(false); |
| client_id = id; |
| mutex = dr_mutex_create(); |
| dr_register_exit_event(event_exit); |
| if (!drmgr_register_thread_init_event(event_thread_init) || |
| !drmgr_register_thread_exit_event(event_thread_exit) || |
| !drmgr_register_bb_instrumentation_event(NULL /*analysis func*/, event_bb_insert, |
| &priority)) { |
| /* something is wrong: can't continue */ |
| DR_ASSERT(false); |
| return; |
| } |
| tls_index = drmgr_register_tls_field(); |
| DR_ASSERT(tls_index != -1); |
| |
| code_cache_init(); |
| dr_log(NULL, DR_LOG_ALL, 1, "Client 'instrace' initializing\n"); |
| #ifdef SHOW_RESULTS |
| if (dr_is_notify_on()) { |
| # ifdef WINDOWS |
| /* Ask for best-effort printing to cmd window. Must be called at init. */ |
| dr_enable_console_printing(); |
| # endif |
| dr_fprintf(STDERR, "Client instrace is running\n"); |
| } |
| #endif |
| } |
| |
| static void |
| event_exit() |
| { |
| #ifdef SHOW_RESULTS |
| char msg[512]; |
| int len; |
| len = dr_snprintf(msg, sizeof(msg) / sizeof(msg[0]), |
| "Instrumentation results:\n" |
| " saw %llu memory references\n", |
| global_num_refs); |
| DR_ASSERT(len > 0); |
| NULL_TERMINATE_BUFFER(msg); |
| DISPLAY_STRING(msg); |
| #endif /* SHOW_RESULTS */ |
| code_cache_exit(); |
| |
| if (!drmgr_unregister_tls_field(tls_index) || |
| !drmgr_unregister_thread_init_event(event_thread_init) || |
| !drmgr_unregister_thread_exit_event(event_thread_exit) || |
| !drmgr_unregister_bb_insertion_event(event_bb_insert) || |
| drreg_exit() != DRREG_SUCCESS) |
| DR_ASSERT(false); |
| |
| dr_mutex_destroy(mutex); |
| drmgr_exit(); |
| } |
| |
| #ifdef WINDOWS |
| # define IF_WINDOWS(x) x |
| #else |
| # define IF_WINDOWS(x) /* nothing */ |
| #endif |
| |
| static void |
| event_thread_init(void *drcontext) |
| { |
| per_thread_t *data; |
| |
| /* allocate thread private data */ |
| data = dr_thread_alloc(drcontext, sizeof(per_thread_t)); |
| drmgr_set_tls_field(drcontext, tls_index, data); |
| data->buf_base = dr_thread_alloc(drcontext, MEM_BUF_SIZE); |
| data->buf_ptr = data->buf_base; |
| /* set buf_end to be negative of address of buffer end for the lea later */ |
| data->buf_end = -(ptr_int_t)(data->buf_base + MEM_BUF_SIZE); |
| data->num_refs = 0; |
| |
| /* We're going to dump our data to a per-thread file. |
| * On Windows we need an absolute path so we place it in |
| * the same directory as our library. We could also pass |
| * in a path as a client argument. |
| */ |
| data->log = |
| log_file_open(client_id, drcontext, NULL /* using client lib path */, "instrace", |
| #ifndef WINDOWS |
| DR_FILE_CLOSE_ON_FORK | |
| #endif |
| DR_FILE_ALLOW_LARGE); |
| #ifdef OUTPUT_TEXT |
| data->logf = log_stream_from_file(data->log); |
| fprintf(data->logf, "Format: <instr address>,<opcode>\n"); |
| #endif |
| } |
| |
| static void |
| event_thread_exit(void *drcontext) |
| { |
| per_thread_t *data; |
| |
| instrace(drcontext); |
| data = drmgr_get_tls_field(drcontext, tls_index); |
| dr_mutex_lock(mutex); |
| global_num_refs += data->num_refs; |
| dr_mutex_unlock(mutex); |
| #ifdef OUTPUT_TEXT |
| log_stream_close(data->logf); /* closes fd too */ |
| #else |
| log_file_close(data->log); |
| #endif |
| dr_thread_free(drcontext, data->buf_base, MEM_BUF_SIZE); |
| dr_thread_free(drcontext, data, sizeof(per_thread_t)); |
| } |
| |
| /* event_bb_insert calls instrument_instr to instrument every |
| * application memory reference. |
| */ |
| static dr_emit_flags_t |
| event_bb_insert(void *drcontext, void *tag, instrlist_t *bb, instr_t *instr, |
| bool for_trace, bool translating, void *user_data) |
| { |
| if (instr_get_app_pc(instr) == NULL || !instr_is_app(instr)) |
| return DR_EMIT_DEFAULT; |
| instrument_instr(drcontext, bb, instr); |
| return DR_EMIT_DEFAULT; |
| } |
| |
| static void |
| instrace(void *drcontext) |
| { |
| per_thread_t *data; |
| int num_refs; |
| ins_ref_t *ins_ref; |
| #ifdef OUTPUT_TEXT |
| int i; |
| #endif |
| |
| data = drmgr_get_tls_field(drcontext, tls_index); |
| ins_ref = (ins_ref_t *)data->buf_base; |
| num_refs = (int)((ins_ref_t *)data->buf_ptr - ins_ref); |
| |
| #ifdef OUTPUT_TEXT |
| /* We use libc's fprintf as it is buffered and much faster than dr_fprintf |
| * for repeated printing that dominates performance, as the printing does here. |
| */ |
| for (i = 0; i < num_refs; i++) { |
| /* We use PIFX to avoid leading zeroes and shrink the resulting file. */ |
| fprintf(data->logf, PIFX ",%s\n", (ptr_uint_t)ins_ref->pc, |
| decode_opcode_name(ins_ref->opcode)); |
| ++ins_ref; |
| } |
| #else |
| dr_write_file(data->log, data->buf_base, (size_t)(data->buf_ptr - data->buf_base)); |
| #endif |
| |
| memset(data->buf_base, 0, MEM_BUF_SIZE); |
| data->num_refs += num_refs; |
| data->buf_ptr = data->buf_base; |
| } |
| |
| /* clean_call dumps the memory reference info to the log file */ |
| static void |
| clean_call(void) |
| { |
| void *drcontext = dr_get_current_drcontext(); |
| instrace(drcontext); |
| } |
| |
| static void |
| code_cache_init(void) |
| { |
| void *drcontext; |
| instrlist_t *ilist; |
| instr_t *where; |
| byte *end; |
| |
| drcontext = dr_get_current_drcontext(); |
| code_cache = |
| dr_nonheap_alloc(page_size, DR_MEMPROT_READ | DR_MEMPROT_WRITE | DR_MEMPROT_EXEC); |
| ilist = instrlist_create(drcontext); |
| /* The lean procecure simply performs a clean call, and then jumps back. */ |
| /* Jump back to DR's code cache. */ |
| where = INSTR_CREATE_jmp_ind(drcontext, opnd_create_reg(DR_REG_XCX)); |
| instrlist_meta_append(ilist, where); |
| /* Clean call */ |
| dr_insert_clean_call(drcontext, ilist, where, (void *)clean_call, false, 0); |
| /* Encode the instructions into memory and clean up. */ |
| end = instrlist_encode(drcontext, ilist, code_cache, false); |
| DR_ASSERT((size_t)(end - code_cache) < page_size); |
| instrlist_clear_and_destroy(drcontext, ilist); |
| /* Set the memory as just +rx now. */ |
| dr_memory_protect(code_cache, page_size, DR_MEMPROT_READ | DR_MEMPROT_EXEC); |
| } |
| |
| static void |
| code_cache_exit(void) |
| { |
| dr_nonheap_free(code_cache, page_size); |
| } |
| |
| /* instrument_instr is called whenever a memory reference is identified. |
| * It inserts code before the memory reference to to fill the memory buffer |
| * and jump to our own code cache to call the clean_call when the buffer is full. |
| */ |
| static void |
| instrument_instr(void *drcontext, instrlist_t *ilist, instr_t *where) |
| { |
| instr_t *instr, *call, *restore; |
| opnd_t opnd1, opnd2; |
| reg_id_t reg1, reg2; |
| drvector_t allowed; |
| per_thread_t *data; |
| app_pc pc; |
| |
| data = drmgr_get_tls_field(drcontext, tls_index); |
| |
| /* Steal two scratch registers. |
| * reg2 must be ECX or RCX for jecxz. |
| */ |
| drreg_init_and_fill_vector(&allowed, false); |
| drreg_set_vector_entry(&allowed, DR_REG_XCX, true); |
| if (drreg_reserve_register(drcontext, ilist, where, &allowed, ®2) != |
| DRREG_SUCCESS || |
| drreg_reserve_register(drcontext, ilist, where, NULL, ®1) != DRREG_SUCCESS) { |
| DR_ASSERT(false); /* cannot recover */ |
| drvector_delete(&allowed); |
| return; |
| } |
| drvector_delete(&allowed); |
| |
| /* The following assembly performs the following instructions |
| * buf_ptr->pc = pc; |
| * buf_ptr->opcode = opcode; |
| * buf_ptr++; |
| * if (buf_ptr >= buf_end_ptr) |
| * clean_call(); |
| */ |
| drmgr_insert_read_tls_field(drcontext, tls_index, ilist, where, reg2); |
| /* Load data->buf_ptr into reg2 */ |
| opnd1 = opnd_create_reg(reg2); |
| opnd2 = OPND_CREATE_MEMPTR(reg2, offsetof(per_thread_t, buf_ptr)); |
| instr = INSTR_CREATE_mov_ld(drcontext, opnd1, opnd2); |
| instrlist_meta_preinsert(ilist, where, instr); |
| |
| /* Store pc */ |
| pc = instr_get_app_pc(where); |
| /* For 64-bit, we can't use a 64-bit immediate so we split pc into two halves. |
| * We could alternatively load it into reg1 and then store reg1. |
| * We use a convenience routine that does the two-step store for us. |
| */ |
| opnd1 = OPND_CREATE_MEMPTR(reg2, offsetof(ins_ref_t, pc)); |
| instrlist_insert_mov_immed_ptrsz(drcontext, (ptr_int_t)pc, opnd1, ilist, where, NULL, |
| NULL); |
| |
| /* Store opcode */ |
| opnd1 = OPND_CREATE_MEMPTR(reg2, offsetof(ins_ref_t, opcode)); |
| opnd2 = OPND_CREATE_INT32(instr_get_opcode(where)); |
| instr = INSTR_CREATE_mov_st(drcontext, opnd1, opnd2); |
| instrlist_meta_preinsert(ilist, where, instr); |
| |
| /* Increment reg value by pointer size using lea instr */ |
| opnd1 = opnd_create_reg(reg2); |
| opnd2 = opnd_create_base_disp(reg2, DR_REG_NULL, 0, sizeof(ins_ref_t), OPSZ_lea); |
| instr = INSTR_CREATE_lea(drcontext, opnd1, opnd2); |
| instrlist_meta_preinsert(ilist, where, instr); |
| |
| /* Update the data->buf_ptr */ |
| drmgr_insert_read_tls_field(drcontext, tls_index, ilist, where, reg1); |
| opnd1 = OPND_CREATE_MEMPTR(reg1, offsetof(per_thread_t, buf_ptr)); |
| opnd2 = opnd_create_reg(reg2); |
| instr = INSTR_CREATE_mov_st(drcontext, opnd1, opnd2); |
| instrlist_meta_preinsert(ilist, where, instr); |
| |
| /* We use the lea + jecxz trick for better performance. |
| * lea and jecxz won't disturb the eflags, so we won't need |
| * code to save and restore the application's eflags. |
| */ |
| /* lea [reg2 - buf_end] => reg2 */ |
| opnd1 = opnd_create_reg(reg1); |
| opnd2 = OPND_CREATE_MEMPTR(reg1, offsetof(per_thread_t, buf_end)); |
| instr = INSTR_CREATE_mov_ld(drcontext, opnd1, opnd2); |
| instrlist_meta_preinsert(ilist, where, instr); |
| opnd1 = opnd_create_reg(reg2); |
| opnd2 = opnd_create_base_disp(reg1, reg2, 1, 0, OPSZ_lea); |
| instr = INSTR_CREATE_lea(drcontext, opnd1, opnd2); |
| instrlist_meta_preinsert(ilist, where, instr); |
| |
| /* jecxz call */ |
| call = INSTR_CREATE_label(drcontext); |
| opnd1 = opnd_create_instr(call); |
| instr = INSTR_CREATE_jecxz(drcontext, opnd1); |
| instrlist_meta_preinsert(ilist, where, instr); |
| |
| /* jump restore to skip clean call */ |
| restore = INSTR_CREATE_label(drcontext); |
| opnd1 = opnd_create_instr(restore); |
| instr = INSTR_CREATE_jmp(drcontext, opnd1); |
| instrlist_meta_preinsert(ilist, where, instr); |
| |
| /* clean call */ |
| /* We jump to our generated lean procedure which performs a full context |
| * switch and clean call invocation. This is to reduce the code cache size. |
| */ |
| instrlist_meta_preinsert(ilist, where, call); |
| /* mov restore DR_REG_XCX */ |
| opnd1 = opnd_create_reg(reg2); |
| /* This is the return address for jumping back from the lean procedure. */ |
| opnd2 = opnd_create_instr(restore); |
| /* We could use instrlist_insert_mov_instr_addr(), but with a register |
| * destination we know we can use a 64-bit immediate. |
| */ |
| instr = INSTR_CREATE_mov_imm(drcontext, opnd1, opnd2); |
| instrlist_meta_preinsert(ilist, where, instr); |
| /* jmp code_cache */ |
| opnd1 = opnd_create_pc(code_cache); |
| instr = INSTR_CREATE_jmp(drcontext, opnd1); |
| instrlist_meta_preinsert(ilist, where, instr); |
| |
| /* Restore scratch registers */ |
| instrlist_meta_preinsert(ilist, where, restore); |
| if (drreg_unreserve_register(drcontext, ilist, where, reg1) != DRREG_SUCCESS || |
| drreg_unreserve_register(drcontext, ilist, where, reg2) != DRREG_SUCCESS) |
| DR_ASSERT(false); |
| } |