| /* ********************************************************** |
| * Copyright (c) 2012-2015 Google, Inc. All rights reserved. |
| * Copyright (c) 2008-2009 VMware, Inc. All rights reserved. |
| * **********************************************************/ |
| |
| /* |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are met: |
| * |
| * * Redistributions of source code must retain the above copyright notice, |
| * this list of conditions and the following disclaimer. |
| * |
| * * Redistributions in binary form must reproduce the above copyright notice, |
| * this list of conditions and the following disclaimer in the documentation |
| * and/or other materials provided with the distribution. |
| * |
| * * Neither the name of VMware, Inc. nor the names of its contributors may be |
| * used to endorse or promote products derived from this software without |
| * specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| * ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE |
| * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
| * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
| * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH |
| * DAMAGE. |
| */ |
| |
| /** |
| \page API_tutorial API Usage Tutorial |
| |
| Below we provide a few short tutorials on using the DynamoRIO API to write |
| several clients. |
| |
| - A client that tracks the average dynamic basic block size |
| demonstrating some simple instrumentation : \subpage API_tutorial_bbdynsize1 "average_bb_size". |
| |
| - A client that steals a register from the application demonstrating |
| some simple state translation : \subpage API_tutorial_steal_reg1 "steal_reg". |
| |
| - A client that adjusts prefetch instructions demonstrating some simple |
| app modifications : \subpage API_tutorial_prefetch1 "prefetch". |
| |
| The following tutorial additionally shows how to create a new annotation to support a |
| client-specific feature. |
| |
| - A new annotation for a hypothetical memory usage client. The annotation marks any |
| specified memory region as defined, even when the client analysis shows it is undefined : |
| \subpage API_tutorial_annotation1 "create_annotation" |
| |
| **/ |
| |
| |
| /** |
| \page API_tutorial_bbdynsize1 average_bb_size |
| |
| In this tutorial we'll create a simple client that will compute the |
| average size of basic blocks executed (in instructions) weighted by |
| their execution frequency. We start with an empty DynamoRIO client. |
| |
| \code |
| #include "dr_api.h" |
| |
| DR_EXPORT void |
| dr_client_main(client_id_t id, int argc, const char *argv[]) |
| { |
| /* empty */ |
| } |
| \endcode |
| |
| [ \ref API_tutorial "prev" | \subpage API_tutorial_bbdynsize2 "next" ] |
| |
| \page API_tutorial_bbdynsize2 average_bb_size.p2 |
| Now we register for events we'll need. For our first pass we only |
| need to register for the bb event (to add our instrumentation) and the |
| exit event (to display the results). We don't need to register for |
| trace events (as we will add our instrumentation to the constituent |
| bbs) and we can ignore the \p for_trace and \p translating arguments |
| to the basic block callback as our changes are deterministic and |
| idempotent so DynamoRIO default translation will work fine. |
| |
| \code |
| #include "dr_api.h" |
| |
| + static void |
| + event_exit(void); |
| + static dr_emit_flags_t |
| + event_basic_block(void *drcontext, void *tag, instrlist_t *bb, |
| + bool for_trace, bool translating); |
| |
| DR_EXPORT void |
| dr_client_main(client_id_t id, int argc, const char *argv[]) |
| { |
| + /* register events */ |
| + dr_register_exit_event(event_exit); |
| + dr_register_bb_event(event_basic_block); |
| } |
| |
| + static void |
| + event_exit(void) |
| + { |
| + /* empty */ |
| + } |
| + |
| + static dr_emit_flags_t |
| + event_basic_block(void *drcontext, void *tag, instrlist_t *bb, |
| + bool for_trace, bool translating) |
| + { |
| + /* empty */ |
| + return DR_EMIT_DEFAULT; |
| + } |
| \endcode |
| |
| [ \ref API_tutorial_bbdynsize1 "prev" | \subpage API_tutorial_bbdynsize3 "next" ] |
| |
| \page API_tutorial_bbdynsize3 average_bb_size.p3 |
| Next we add code to track the size of blocks as we build them for our |
| as-built count. This will give us the number of blocks built and |
| their average size. Next we will add the dynamic execution counts. |
| |
| \code |
| #include "dr_api.h" |
| |
| + #ifdef WINDOWS |
| + # define DISPLAY_STRING(msg) dr_messagebox(msg) |
| + #else |
| + # define DISPLAY_STRING(msg) dr_printf("%s\n", msg) |
| + #endif |
| |
| + typedef struct bb_counts { |
| + uint64 blocks; |
| + uint64 total_size; |
| + } bb_counts; |
| |
| + static bb_counts counts_as_built; |
| + void *as_built_lock; |
| |
| static void |
| event_exit(void); |
| static dr_emit_flags_t |
| event_basic_block(void *drcontext, void *tag, instrlist_t *bb, |
| bool for_trace, bool translating); |
| |
| DR_EXPORT void |
| dr_client_main(client_id_t id, int argc, const char *argv[]) |
| { |
| /* register events */ |
| dr_register_exit_event(event_exit); |
| dr_register_bb_event(event_basic_block); |
| + |
| + /* initialize lock */ |
| + as_built_lock = dr_mutex_create(); |
| } |
| |
| static void |
| event_exit(void) |
| { |
| + /* Display results - we must first snpritnf the string as on windows |
| + * dr_printf(), dr_messagebox() and dr_fprintf() can't print floats. */ |
| + char msg[512]; |
| + int len; |
| + len = snprintf(msg, sizeof(msg)/sizeof(msg[0]), |
| + "Number of basic blocks built : %"UINT64_FORMAT_CODE"\n" |
| + " Average size : %5.2lf instructions\n", |
| + counts_as_built.blocks, |
| + counts_as_built.total_size / (double)counts_as_built.blocks); |
| + DR_ASSERT(len > 0); |
| + msg[sizeof(msg)/sizeof(msg[0])-1] = '\0'; /* NUll terminate */ |
| + DISPLAY_STRING(msg); |
| + |
| + /* free mutex */ |
| + dr_mutex_destroy(as_built_lock); |
| } |
| |
| static dr_emit_flags_t |
| event_basic_block(void *drcontext, void *tag, instrlist_t *bb, |
| bool for_trace, bool translating) |
| { |
| + uint num_instructions = 0; |
| + instr_t *instr; |
| + |
| + /* count the number of instructions in this block */ |
| + for (instr = instrlist_first(bb); instr != NULL; instr = instr_get_next(instr)) { |
| + num_instructions++; |
| + } |
| + |
| + /* update the as-built counts */ |
| + dr_mutex_lock(as_built_lock); |
| + counts_as_built.blocks++; |
| + counts_as_built.total_size += num_instructions; |
| + dr_mutex_unlock(as_built_lock); |
| return DR_EMIT_DEFAULT; |
| } |
| \endcode |
| |
| [ \ref API_tutorial_bbdynsize2 "prev" | \subpage API_tutorial_bbdynsize4 "next" ] |
| |
| \page API_tutorial_bbdynsize4 average_bb_size.p4 |
| Now we add instrumentation to gather the dynamic counts. For the |
| first pass we'll use a clean call to increment the counters from each |
| block. However this approach is very slow since for each basic block |
| execution a context switch out of the cache will occur. We'll look at |
| other approaches next. |
| |
| \code |
| #include "dr_api.h" |
| |
| #ifdef WINDOWS |
| # define DISPLAY_STRING(msg) dr_messagebox(msg) |
| #else |
| # define DISPLAY_STRING(msg) dr_printf("%s\n", msg) |
| #endif |
| |
| typedef struct bb_counts { |
| uint64 blocks; |
| uint64 total_size; |
| } bb_counts; |
| |
| static bb_counts counts_as_built; |
| void *as_built_lock; |
| |
| + static bb_counts counts_dynamic; |
| + void *count_lock; |
| |
| static void |
| event_exit(void); |
| static dr_emit_flags_t |
| event_basic_block(void *drcontext, void *tag, instrlist_t *bb, |
| bool for_trace, bool translating); |
| + static void |
| + clean_call(uint instruction_count); |
| |
| DR_EXPORT void |
| dr_client_main(client_id_t id, int argc, const char *argv[]) |
| { |
| /* register events */ |
| dr_register_exit_event(event_exit); |
| dr_register_bb_event(event_basic_block); |
| |
| /* initialize lock */ |
| as_built_lock = dr_mutex_create(); |
| + count_lock = dr_mutex_create(); |
| } |
| |
| static void |
| event_exit(void) |
| { |
| /* Display results - we must first snpritnf the string as on windows |
| * dr_printf(), dr_messagebox() and dr_fprintf() can't print floats. */ |
| char msg[512]; |
| int len; |
| len = snprintf(msg, sizeof(msg)/sizeof(msg[0]), |
| "Number of blocks built : %"UINT64_FORMAT_CODE"\n" |
| " Average size : %5.2lf instructions\n" |
| + "Number of blocks executed : %"UINT64_FORMAT_CODE"\n" |
| + " Average weighted size : %5.2lf instructions\n", |
| counts_as_built.blocks, |
| counts_as_built.total_size / (double)counts_as_built.blocks, |
| + counts_dynamic.blocks, |
| + counts_dynamic.total_size / (double)counts_dynamic.blocks); |
| DR_ASSERT(len > 0); |
| msg[sizeof(msg)/sizeof(msg[0])-1] = '\0'; |
| DISPLAY_STRING(msg); |
| |
| /* free mutex */ |
| dr_mutex_destroy(as_built_lock); |
| + dr_mutex_destroy(count_lock); |
| } |
| |
| + static void |
| + clean_call(uint instruction_count) |
| + { |
| + dr_mutex_lock(count_lock); |
| + counts_dynamic.blocks++; |
| + counts_dynamic.total_size += instruction_count; |
| + dr_mutex_unlock(count_lock); |
| + } |
| |
| static dr_emit_flags_t |
| event_basic_block(void *drcontext, void *tag, instrlist_t *bb, |
| bool for_trace, bool translating) |
| { |
| uint num_instructions = 0; |
| instr_t *instr; |
| |
| /* count the number of instructions in this block */ |
| for (instr = instrlist_first(bb); instr != NULL; instr = instr_get_next(instr)) { |
| num_instructions++; |
| } |
| |
| /* update the as-built counts */ |
| dr_mutex_lock(as_built_lock); |
| counts_as_built.blocks++; |
| counts_as_built.total_size += num_instructions; |
| dr_mutex_unlock(as_built_lock); |
| |
| + /* insert clean call */ |
| + dr_insert_clean_call(drcontext, bb, instrlist_first(bb), (void *)clean_call, |
| + false, 1, OPND_CREATE_INT32(num_instructions)); |
| + |
| return DR_EMIT_DEFAULT; |
| } |
| \endcode |
| |
| [ \ref API_tutorial_bbdynsize3 "prev" | \subpage API_tutorial_bbdynsize5 "next" ] |
| |
| \page API_tutorial_bbdynsize5 average_bb_size.p5 |
| In this variation we use inlined atomic instructions to adjust the counters. On |
| a single core box this form should be quite fast, but on a multi-core box the |
| cache line locking for atomic operations will slow things dramatically. We'll |
| look at forms more suitable for multi-core further on. |
| |
| \code |
| #include "dr_api.h" |
| |
| #ifdef WINDOWS |
| # define DISPLAY_STRING(msg) dr_messagebox(msg) |
| #else |
| # define DISPLAY_STRING(msg) dr_printf("%s\n", msg) |
| #endif |
| |
| typedef struct bb_counts { |
| uint64 blocks; |
| uint64 total_size; |
| } bb_counts; |
| |
| static bb_counts counts_as_built; |
| void *as_built_lock; |
| |
| static bb_counts counts_dynamic; |
| |
| static void |
| event_exit(void); |
| static dr_emit_flags_t |
| event_basic_block(void *drcontext, void *tag, instrlist_t *bb, |
| bool for_trace, bool translating); |
| |
| DR_EXPORT void |
| dr_client_main(client_id_t id, int argc, const char *argv[]) |
| { |
| /* register events */ |
| dr_register_exit_event(event_exit); |
| dr_register_bb_event(event_basic_block); |
| |
| /* initialize lock */ |
| as_built_lock = dr_mutex_create(); |
| } |
| |
| static void |
| event_exit(void) |
| { |
| /* Display results - we must first snpritnf the string as on windows |
| * dr_printf(), dr_messagebox() and dr_fprintf() can't print floats. */ |
| char msg[512]; |
| int len; |
| len = snprintf(msg, sizeof(msg)/sizeof(msg[0]), |
| "Number of blocks built : %"UINT64_FORMAT_CODE"\n" |
| " Average size : %5.2lf instructions\n" |
| "Number of blocks executed : %"UINT64_FORMAT_CODE"\n" |
| " Average weighted size : %5.2lf instructions\n", |
| counts_as_built.blocks, |
| counts_as_built.total_size / (double)counts_as_built.blocks, |
| counts_dynamic.blocks, |
| counts_dynamic.total_size / (double)counts_dynamic.blocks); |
| DR_ASSERT(len > 0); |
| msg[sizeof(msg)/sizeof(msg[0])-1] = '\0'; |
| DISPLAY_STRING(msg); |
| |
| /* free mutex */ |
| dr_mutex_destroy(as_built_lock); |
| } |
| |
| static dr_emit_flags_t |
| event_basic_block(void *drcontext, void *tag, instrlist_t *bb, |
| bool for_trace, bool translating) |
| { |
| uint num_instructions = 0; |
| instr_t *instr, *where = NULL; |
| |
| /* count the number of instructions in this block */ |
| for (instr = instrlist_first(bb); instr != NULL; instr = instr_get_next(instr)) { |
| num_instructions++; |
| } |
| |
| /* update the as-built counts */ |
| dr_mutex_lock(as_built_lock); |
| counts_as_built.blocks++; |
| counts_as_built.total_size += num_instructions; |
| dr_mutex_unlock(as_built_lock); |
| |
| + /* increment counters */ |
| + where = instrlist_first(bb); |
| + dr_save_arith_flags(drcontext, bb, where, SPILL_SLOT_1); |
| + #ifdef X86_32 |
| + /* Since the counters are 64-bit we must use an add + an addc to increment. |
| + * The operations is still effectively atomic since we're only increasing |
| + * the count. */ |
| + instrlist_meta_preinsert(bb, where, |
| + LOCK(INSTR_CREATE_add(drcontext, |
| + OPND_CREATE_ABSMEM((byte *)&counts_dynamic.blocks, OPSZ_4), |
| + OPND_CREATE_INT8(1)))); |
| + instrlist_meta_preinsert(bb, where, |
| + LOCK(INSTR_CREATE_adc(drcontext, |
| + OPND_CREATE_ABSMEM((byte *)&counts_dynamic.blocks + 4, OPSZ_4), |
| + OPND_CREATE_INT8(0)))); |
| + |
| + instrlist_meta_preinsert(bb, where, |
| + LOCK(INSTR_CREATE_add(drcontext, |
| + OPND_CREATE_ABSMEM((byte *)&counts_dynamic.total_size, OPSZ_4), |
| + OPND_CREATE_INT_32OR8(num_instructions)))); |
| + instrlist_meta_preinsert(bb, where, |
| + LOCK(INSTR_CREATE_adc(drcontext, |
| + OPND_CREATE_ABSMEM((byte *)&counts_dynamic.total_size + 4, OPSZ_4), |
| + OPND_CREATE_INT8(0)))); |
| + #else /* X86_64 */ |
| + instrlist_meta_preinsert(bb, where, |
| + LOCK(INSTR_CREATE_inc(drcontext, |
| + OPND_CREATE_ABSMEM((byte *)&counts_dynamic.blocks, OPSZ_8)))); |
| + instrlist_meta_preinsert(bb, where, |
| + LOCK(INSTR_CREATE_add(drcontext, |
| + OPND_CREATE_ABSMEM((byte *)&counts_dynamic.total_size, OPSZ_8), |
| + OPND_CREATE_INT_32OR8(num_instructions)))); |
| + #endif |
| + dr_restore_arith_flags(drcontext, bb, where, SPILL_SLOT_1); |
| + |
| return DR_EMIT_DEFAULT; |
| } |
| \endcode |
| |
| [ \ref API_tutorial_bbdynsize4 "prev" | \subpage API_tutorial_bbdynsize6 "next" ] |
| |
| \page API_tutorial_bbdynsize6 average_bb_size.p6 |
| Next up we add an optimization to only save and restore the flags if we can't find a spot |
| within the block where the flags are dead. |
| |
| \code |
| #include "dr_api.h" |
| |
| #ifdef WINDOWS |
| # define DISPLAY_STRING(msg) dr_messagebox(msg) |
| #else |
| # define DISPLAY_STRING(msg) dr_printf("%s\n", msg) |
| #endif |
| |
| + #define TESTALL(mask, var) (((mask) & (var)) == (mask)) |
| + #define TESTANY(mask, var) (((mask) & (var)) != 0) |
| |
| typedef struct bb_counts { |
| uint64 blocks; |
| uint64 total_size; |
| } bb_counts; |
| |
| |
| static bb_counts counts_as_built; |
| void *as_built_lock; |
| |
| static bb_counts counts_dynamic; |
| |
| + /* Protected by the as_built_lock */ |
| + static uint64 bbs_eflags_saved; |
| |
| |
| static void |
| event_exit(void); |
| static dr_emit_flags_t |
| event_basic_block(void *drcontext, void *tag, instrlist_t *bb, |
| bool for_trace, bool translating); |
| |
| DR_EXPORT void |
| dr_client_main(client_id_t id, int argc, const char *argv[]) |
| { |
| /* register events */ |
| dr_register_exit_event(event_exit); |
| dr_register_bb_event(event_basic_block); |
| |
| /* initialize lock */ |
| as_built_lock = dr_mutex_create(); |
| } |
| |
| static void |
| event_exit(void) |
| { |
| /* Display results - we must first snpritnf the string as on windows |
| * dr_printf(), dr_messagebox() and dr_fprintf() can't print floats. */ |
| char msg[512]; |
| int len; |
| len = snprintf(msg, sizeof(msg)/sizeof(msg[0]), |
| "Number of blocks built : %"UINT64_FORMAT_CODE"\n" |
| " Average size : %5.2lf instructions\n" |
| + " Num saved eflags : %"UINT64_FORMAT_CODE"\n" |
| "Number of blocks executed : %"UINT64_FORMAT_CODE"\n" |
| " Average weighted size : %5.2lf instructions\n", |
| counts_as_built.blocks, |
| counts_as_built.total_size / (double)counts_as_built.blocks, |
| + bbs_eflags_saved, |
| counts_dynamic.blocks, |
| counts_dynamic.total_size / (double)counts_dynamic.blocks); |
| DR_ASSERT(len > 0); |
| msg[sizeof(msg)/sizeof(msg[0])-1] = '\0'; |
| DISPLAY_STRING(msg); |
| |
| /* free mutex */ |
| dr_mutex_destroy(as_built_lock); |
| } |
| |
| static dr_emit_flags_t |
| event_basic_block(void *drcontext, void *tag, instrlist_t *bb, |
| bool for_trace, bool translating) |
| { |
| uint num_instructions = 0; |
| instr_t *instr, *where = NULL; |
| + bool eflags_saved = true; |
| |
| /* count the number of instructions in this block */ |
| for (instr = instrlist_first(bb); instr != NULL; instr = instr_get_next(instr)) { |
| + /* Since it doesn't matter where we insert, look for a place |
| + * where the eflags are dead. */ |
| + uint flags = instr_get_arith_flags(instr, DR_QUERY_DEFAULT); |
| + if (TESTALL(EFLAGS_WRITE_6, flags) && !TESTANY(EFLAGS_READ_6, flags)) { |
| + where = instr; |
| + eflags_saved = false; |
| + } |
| num_instructions++; |
| } |
| |
| /* update the as-built counts */ |
| dr_mutex_lock(as_built_lock); |
| counts_as_built.blocks++; |
| counts_as_built.total_size += num_instructions; |
| + if (eflags_saved) |
| + bbs_eflags_saved++; |
| dr_mutex_unlock(as_built_lock); |
| |
| /* increment counters */ |
| + if (eflags_saved) { |
| + where = instrlist_first(bb); |
| dr_save_arith_flags(drcontext, bb, where, SPILL_SLOT_1); |
| + } |
| #ifdef X86_32 |
| /* Since the counters are 64-bit we must use an add an addc to increment. |
| * The operations is still effectively atomic since we're only increasing |
| * the count. */ |
| instrlist_meta_preinsert(bb, where, |
| LOCK(INSTR_CREATE_add(drcontext, |
| OPND_CREATE_ABSMEM((byte *)&counts_dynamic.blocks, OPSZ_4), |
| OPND_CREATE_INT8(1)))); |
| instrlist_meta_preinsert(bb, where, |
| LOCK(INSTR_CREATE_adc(drcontext, |
| OPND_CREATE_ABSMEM((byte *)&counts_dynamic.blocks + 4, OPSZ_4), |
| OPND_CREATE_INT8(0)))); |
| |
| instrlist_meta_preinsert(bb, where, |
| LOCK(INSTR_CREATE_add(drcontext, |
| OPND_CREATE_ABSMEM((byte *)&counts_dynamic.total_size, OPSZ_4), |
| OPND_CREATE_INT_32OR8(num_instructions)))); |
| instrlist_meta_preinsert(bb, where, |
| LOCK(INSTR_CREATE_adc(drcontext, |
| OPND_CREATE_ABSMEM((byte *)&counts_dynamic.total_size + 4, OPSZ_4), |
| OPND_CREATE_INT8(0)))); |
| #else /* X86_64 */ |
| instrlist_meta_preinsert(bb, where, |
| LOCK(INSTR_CREATE_inc(drcontext, |
| OPND_CREATE_ABSMEM((byte *)&counts_dynamic.blocks, OPSZ_8)))); |
| instrlist_meta_preinsert(bb, where, |
| LOCK(INSTR_CREATE_add(drcontext, |
| OPND_CREATE_ABSMEM((byte *)&counts_dynamic.total_size, OPSZ_8), |
| OPND_CREATE_INT_32OR8(num_instructions)))); |
| #endif |
| + if (eflags_saved) { |
| dr_restore_arith_flags(drcontext, bb, where, SPILL_SLOT_1); |
| + } |
| |
| return DR_EMIT_DEFAULT; |
| } |
| \endcode |
| |
| [ \ref API_tutorial_bbdynsize5 "prev" | \subpage API_tutorial_bbdynsize7 "next" ] |
| |
| \page API_tutorial_bbdynsize7 average_bb_size.p7 |
| We now have a pretty fast implementation, probably about as fast as we can get |
| on a single core machine. However, on multi-core systems the cache |
| line locking for the atomic operations becomes a bottle neck. To get around that |
| we can use thread-private counts and aggregate them. With thread-private counts |
| no locked instructions are needed, however the extra indirection does has its |
| own cost of extra added instructions to spill a register and get the tls pointer |
| so on single core this will be slower. |
| |
| \code |
| #include "dr_api.h" |
| + #include <stddef.h> |
| |
| #ifdef WINDOWS |
| # define DISPLAY_STRING(msg) dr_messagebox(msg) |
| #else |
| # define DISPLAY_STRING(msg) dr_printf("%s\n", msg) |
| #endif |
| |
| #define TESTALL(mask, var) (((mask) & (var)) == (mask)) |
| #define TESTANY(mask, var) (((mask) & (var)) != 0) |
| |
| typedef struct bb_counts { |
| uint64 blocks; |
| uint64 total_size; |
| } bb_counts; |
| |
| static bb_counts counts_as_built; |
| void *as_built_lock; |
| |
| static bb_counts counts_dynamic; |
| + void *count_lock; |
| |
| /* Protected by the as_built_lock */ |
| static uint64 bbs_eflags_saved; |
| |
| |
| static void |
| event_exit(void); |
| static dr_emit_flags_t |
| event_basic_block(void *drcontext, void *tag, instrlist_t *bb, |
| bool for_trace, bool translating); |
| + static void |
| + event_thread_init(void *drcontext); |
| + static void |
| + event_thread_exit(void *drcontext); |
| |
| DR_EXPORT void |
| dr_client_main(client_id_t id, int argc, const char *argv[]) |
| { |
| /* register events */ |
| dr_register_exit_event(event_exit); |
| dr_register_bb_event(event_basic_block); |
| + dr_register_thread_init_event(event_thread_init); |
| + dr_register_thread_exit_event(event_thread_exit); |
| |
| /* initialize lock */ |
| as_built_lock = dr_mutex_create(); |
| + count_lock = dr_mutex_create(); |
| } |
| |
| static void |
| event_exit(void) |
| { |
| /* Display results - we must first snpritnf the string as on windows |
| * dr_printf(), dr_messagebox() and dr_fprintf() can't print floats. */ |
| char msg[512]; |
| int len; |
| len = snprintf(msg, sizeof(msg)/sizeof(msg[0]), |
| "Number of blocks built : %"UINT64_FORMAT_CODE"\n" |
| " Average size : %5.2lf instructions\n" |
| " Num saved eflags : %"UINT64_FORMAT_CODE"\n" |
| "Number of blocks executed : %"UINT64_FORMAT_CODE"\n" |
| " Average weighted size : %5.2lf instructions\n", |
| counts_as_built.blocks, |
| counts_as_built.total_size / (double)counts_as_built.blocks, |
| bbs_eflags_saved, |
| counts_dynamic.blocks, |
| counts_dynamic.total_size / (double)counts_dynamic.blocks); |
| DR_ASSERT(len > 0); |
| msg[sizeof(msg)/sizeof(msg[0])-1] = '\0'; |
| DISPLAY_STRING(msg); |
| |
| /* free mutex */ |
| dr_mutex_destroy(as_built_lock); |
| + dr_mutex_destroy(count_lock); |
| } |
| |
| + static void |
| + event_thread_init(void *drcontext) |
| + { |
| + /* create an instance of our data structure for this thread */ |
| + bb_counts *counts = (bb_counts *)dr_thread_alloc(drcontext, sizeof(bb_counts)); |
| + /* store it in the slot provided in the drcontext */ |
| + dr_set_tls_field(drcontext, counts); |
| + memset(counts, 0, sizeof(bb_counts)); |
| + } |
| + |
| + static void |
| + event_thread_exit(void *drcontext) |
| + { |
| + bb_counts *counts = (bb_counts *) dr_get_tls_field(drcontext); |
| + /* NOTE - if we so choose we could report per-thread sizes here. */ |
| + dr_mutex_lock(count_lock); |
| + counts_dynamic.blocks += counts->blocks; |
| + counts_dynamic.total_size += counts->total_size; |
| + dr_mutex_unlock(count_lock); |
| + dr_thread_free(drcontext, counts, sizeof(bb_counts)); |
| + } |
| |
| static dr_emit_flags_t |
| event_basic_block(void *drcontext, void *tag, instrlist_t *bb, |
| bool for_trace, bool translating) |
| { |
| uint num_instructions = 0; |
| instr_t *instr, *where = NULL; |
| bool eflags_saved = true; |
| + bb_counts *counts = (bb_counts *) dr_get_tls_field(drcontext); |
| |
| /* count the number of instructions in this block */ |
| for (instr = instrlist_first(bb); instr != NULL; instr = instr_get_next(instr)) { |
| /* Since it doesn't matter where we insert, look for a place |
| * where the eflags are dead. */ |
| uint flags = instr_get_arith_flags(instr, DR_QUERY_DEFAULT); |
| if (TESTALL(EFLAGS_WRITE_6, flags) && !TESTANY(EFLAGS_READ_6, flags)) { |
| where = instr; |
| eflags_saved = false; |
| } |
| num_instructions++; |
| } |
| |
| /* update the as-built counts */ |
| dr_mutex_lock(as_built_lock); |
| counts_as_built.blocks++; |
| counts_as_built.total_size += num_instructions; |
| if (eflags_saved) |
| bbs_eflags_saved++; |
| dr_mutex_unlock(as_built_lock); |
| |
| /* increment counters */ |
| if (eflags_saved) { |
| where = instrlist_first(bb); |
| dr_save_arith_flags(drcontext, bb, where, SPILL_SLOT_1); |
| } |
| + /* Spill a register to get a pointer to our TLS structure. */ |
| + dr_save_reg(drcontext, bb, where, DR_REG_XDI, SPILL_SLOT_2); |
| + dr_insert_read_tls_field(drcontext, bb, where, DR_REG_XDI); |
| #ifdef X86_32 |
| /* Since the counters are 64-bit we must use an add an addc to increment. |
| * The operations is still effectively atomic since we're only increasing |
| * the count. */ |
| instrlist_meta_preinsert(bb, where, |
| ! INSTR_CREATE_add(drcontext, |
| ! OPND_CREATE_MEM32(DR_REG_XDI, offsetof(bb_counts, blocks)), |
| OPND_CREATE_INT8(1))); |
| instrlist_meta_preinsert(bb, where, |
| ! INSTR_CREATE_adc(drcontext, |
| ! OPND_CREATE_MEM32(DR_REG_XDI, offsetof(bb_counts, blocks)+4), |
| OPND_CREATE_INT8(0))); |
| |
| instrlist_meta_preinsert(bb, where, |
| ! INSTR_CREATE_add(drcontext, |
| ! OPND_CREATE_MEM32(DR_REG_XDI, offsetof(bb_counts, total_size)), |
| OPND_CREATE_INT_32OR8(num_instructions))); |
| instrlist_meta_preinsert(bb, where, |
| ! INSTR_CREATE_adc(drcontext, |
| ! OPND_CREATE_MEM32(DR_REG_XDI, offsetof(bb_counts, total_size)+4), |
| OPND_CREATE_INT8(0))); |
| #else /* X86_64 */ |
| instrlist_meta_preinsert(bb, where, |
| ! INSTR_CREATE_inc(drcontext, |
| ! OPND_CREATE_MEM64(DR_REG_XDI, offsetof(bb_counts, blocks)))); |
| instrlist_meta_preinsert(bb, where, |
| ! INSTR_CREATE_add(drcontext, |
| ! OPND_CREATE_MEM64(DR_REG_XDI, offsetof(bb_counts, total_size)), |
| OPND_CREATE_INT_32OR8(num_instructions))); |
| #endif |
| + /* Restore spilled register. */ |
| + dr_restore_reg(drcontext, bb, where, DR_REG_XDI, SPILL_SLOT_2); |
| if (eflags_saved) { |
| dr_restore_arith_flags(drcontext, bb, where, SPILL_SLOT_1); |
| } |
| |
| return DR_EMIT_DEFAULT; |
| } |
| \endcode |
| |
| [ \ref API_tutorial_bbdynsize6 "prev" | \subpage API_tutorial_bbdynsize8 "next" ] |
| |
| \page API_tutorial_bbdynsize8 average_bb_size.p8 |
| Our final optimization is to note that if we're running with thread-private caches |
| (i.e. with the -thread_private option to DynamoRIO) we can use absolute addressing |
| on the per-thread counters instead of indirecting via TLS which should greatly speed |
| things up. The downside to -thread_private is that on applications with many threads |
| memory use can get quite high, potentially enough to impact performance of the system. |
| |
| \code |
| #include "dr_api.h" |
| #include <stddef.h> |
| |
| #ifdef WINDOWS |
| # define DISPLAY_STRING(msg) dr_messagebox(msg) |
| #else |
| # define DISPLAY_STRING(msg) dr_printf("%s\n", msg) |
| #endif |
| |
| #define TESTALL(mask, var) (((mask) & (var)) == (mask)) |
| #define TESTANY(mask, var) (((mask) & (var)) != 0) |
| |
| typedef struct bb_counts { |
| uint64 blocks; |
| uint64 total_size; |
| } bb_counts; |
| |
| static bb_counts counts_as_built; |
| void *as_built_lock; |
| |
| static bb_counts counts_dynamic; |
| void *count_lock; |
| |
| /* Protected by the as_built_lock */ |
| static uint64 bbs_eflags_saved; |
| |
| |
| static void |
| event_exit(void); |
| static dr_emit_flags_t |
| event_basic_block(void *drcontext, void *tag, instrlist_t *bb, |
| bool for_trace, bool translating); |
| static void |
| event_thread_init(void *drcontext); |
| static void |
| event_thread_exit(void *drcontext); |
| |
| DR_EXPORT void |
| dr_client_main(client_id_t id, int argc, const char *argv[]) |
| { |
| /* register events */ |
| dr_register_exit_event(event_exit); |
| dr_register_bb_event(event_basic_block); |
| dr_register_thread_init_event(event_thread_init); |
| dr_register_thread_exit_event(event_thread_exit); |
| |
| /* initialize lock */ |
| as_built_lock = dr_mutex_create(); |
| count_lock = dr_mutex_create(); |
| } |
| |
| static void |
| event_exit(void) |
| { |
| /* Display results - we must first snpritnf the string as on windows |
| * dr_printf(), dr_messagebox() and dr_fprintf() can't print floats. */ |
| char msg[512]; |
| int len; |
| len = snprintf(msg, sizeof(msg)/sizeof(msg[0]), |
| "Number of blocks built : %"UINT64_FORMAT_CODE"\n" |
| " Average size : %5.2lf instructions\n" |
| " Num saved eflags : %"UINT64_FORMAT_CODE"\n" |
| "Number of blocks executed : %"UINT64_FORMAT_CODE"\n" |
| " Average weighted size : %5.2lf instructions\n", |
| counts_as_built.blocks, |
| counts_as_built.total_size / (double)counts_as_built.blocks, |
| bbs_eflags_saved, |
| counts_dynamic.blocks, |
| counts_dynamic.total_size / (double)counts_dynamic.blocks); |
| DR_ASSERT(len > 0); |
| msg[sizeof(msg)/sizeof(msg[0])-1] = '\0'; |
| DISPLAY_STRING(msg); |
| |
| /* free mutex */ |
| dr_mutex_destroy(as_built_lock); |
| dr_mutex_destroy(count_lock); |
| } |
| |
| static void |
| event_thread_init(void *drcontext) |
| { |
| /* create an instance of our data structure for this thread */ |
| bb_counts *counts = (bb_counts *)dr_thread_alloc(drcontext, sizeof(bb_counts)); |
| /* store it in the slot provided in the drcontext */ |
| dr_set_tls_field(drcontext, counts); |
| memset(counts, 0, sizeof(bb_counts)); |
| } |
| |
| static void |
| event_thread_exit(void *drcontext) |
| { |
| bb_counts *counts = (bb_counts *) dr_get_tls_field(drcontext); |
| /* NOTE - if we so choose we could report per-thread sizes here. */ |
| dr_mutex_lock(count_lock); |
| counts_dynamic.blocks += counts->blocks; |
| counts_dynamic.total_size += counts->total_size; |
| dr_mutex_unlock(count_lock); |
| dr_thread_free(drcontext, counts, sizeof(bb_counts)); |
| } |
| |
| static dr_emit_flags_t |
| event_basic_block(void *drcontext, void *tag, instrlist_t *bb, |
| bool for_trace, bool translating) |
| { |
| uint num_instructions = 0; |
| instr_t *instr, *where = NULL; |
| bool eflags_saved = true; |
| bb_counts *counts = (bb_counts *) dr_get_tls_field(drcontext); |
| |
| /* count the number of instructions in this block */ |
| for (instr = instrlist_first(bb); instr != NULL; instr = instr_get_next(instr)) { |
| /* Since it doesn't matter where we insert, look for a place |
| * where the eflags are dead. */ |
| uint flags = instr_get_arith_flags(instr, DR_QUERY_DEFAULT); |
| if (TESTALL(EFLAGS_WRITE_6, flags) && !TESTANY(EFLAGS_READ_6, flags)) { |
| where = instr; |
| eflags_saved = false; |
| } |
| num_instructions++; |
| } |
| |
| /* update the as-built counts */ |
| dr_mutex_lock(as_built_lock); |
| counts_as_built.blocks++; |
| counts_as_built.total_size += num_instructions; |
| if (eflags_saved) |
| bbs_eflags_saved++; |
| dr_mutex_unlock(as_built_lock); |
| |
| /* increment counters */ |
| if (eflags_saved) { |
| where = instrlist_first(bb); |
| dr_save_arith_flags(drcontext, bb, where, SPILL_SLOT_1); |
| } |
| + /* If all thread-private caches we can use direct addressing. */ |
| + if (!dr_using_all_private_caches()) { |
| /* Spill a register to get a pointer to our TLS structure. */ |
| dr_save_reg(drcontext, bb, where, DR_REG_XDI, SPILL_SLOT_2); |
| dr_insert_read_tls_field(drcontext, bb, where, DR_REG_XDI); |
| + } |
| #ifdef X86_32 |
| /* Since the counters are 64-bit we must use an add an addc to increment. |
| * The operations is still effectively atomic since we're only increasing |
| * the count. */ |
| instrlist_meta_preinsert(bb, where, |
| INSTR_CREATE_add(drcontext, |
| ! dr_using_all_private_caches() ? |
| ! OPND_CREATE_ABSMEM((byte *)&counts->blocks, OPSZ_4) : |
| ! OPND_CREATE_MEM32(DR_REG_XDI, offsetof(bb_counts, blocks)), |
| OPND_CREATE_INT8(1))); |
| instrlist_meta_preinsert(bb, where, |
| INSTR_CREATE_adc(drcontext, |
| ! dr_using_all_private_caches() ? |
| ! OPND_CREATE_ABSMEM((byte *)&counts->blocks +4, OPSZ_4) : |
| ! OPND_CREATE_MEM32(DR_REG_XDI, offsetof(bb_counts, blocks)+4), |
| OPND_CREATE_INT8(0))); |
| |
| instrlist_meta_preinsert(bb, where, |
| INSTR_CREATE_add(drcontext, |
| ! dr_using_all_private_caches() ? |
| ! OPND_CREATE_ABSMEM((byte *)&counts->total_size, OPSZ_4) : |
| ! OPND_CREATE_MEM32(DR_REG_XDI, offsetof(bb_counts, total_size)), |
| OPND_CREATE_INT_32OR8(num_instructions))); |
| instrlist_meta_preinsert(bb, where, |
| INSTR_CREATE_adc(drcontext, |
| ! dr_using_all_private_caches() ? |
| ! OPND_CREATE_ABSMEM((byte *)&counts->total_size +4, OPSZ_4) : |
| ! OPND_CREATE_MEM32(DR_REG_XDI, offsetof(bb_counts, total_size)+4), |
| OPND_CREATE_INT8(0))); |
| #else /* X86_64 */ |
| instrlist_meta_preinsert(bb, where, |
| INSTR_CREATE_inc(drcontext, |
| ! dr_using_all_private_caches() ? |
| ! OPND_CREATE_ABSMEM((byte *)&counts->blocks, OPSZ_8) : |
| ! OPND_CREATE_MEM64(DR_REG_XDI, offsetof(bb_counts, blocks)))); |
| instrlist_meta_preinsert(bb, where, |
| INSTR_CREATE_add(drcontext, |
| ! dr_using_all_private_caches() ? |
| ! OPND_CREATE_ABSMEM((byte *)&counts->total_size, OPSZ_8) : |
| ! OPND_CREATE_MEM64(DR_REG_XDI, offsetof(bb_counts, total_size)), |
| OPND_CREATE_INT_32OR8(num_instructions))); |
| #endif |
| + if (!dr_using_all_private_caches()) { |
| /* Restore spilled register. */ |
| dr_restore_reg(drcontext, bb, where, DR_REG_XDI, SPILL_SLOT_2); |
| + } |
| if (eflags_saved) { |
| dr_restore_arith_flags(drcontext, bb, where, SPILL_SLOT_1); |
| } |
| |
| return DR_EMIT_DEFAULT; |
| } |
| \endcode |
| |
| |
| [ \ref API_tutorial_bbdynsize7 "prev" | \subpage API_tutorial_bbdynsize9 "next" ] |
| |
| \page API_tutorial_bbdynsize9 average_bb_size.p9 |
| The final multi-core optimized version. If we are single core then the LOCK version |
| is probably faster unless we are running with -thread_private in which case it's probably |
| close to equivalent. |
| |
| \code |
| #include "dr_api.h" |
| #include <stddef.h> |
| |
| #ifdef WINDOWS |
| # define DISPLAY_STRING(msg) dr_messagebox(msg) |
| #else |
| # define DISPLAY_STRING(msg) dr_printf("%s\n", msg) |
| #endif |
| |
| #define TESTALL(mask, var) (((mask) & (var)) == (mask)) |
| #define TESTANY(mask, var) (((mask) & (var)) != 0) |
| |
| typedef struct bb_counts { |
| uint64 blocks; |
| uint64 total_size; |
| } bb_counts; |
| |
| static bb_counts counts_as_built; |
| void *as_built_lock; |
| |
| static bb_counts counts_dynamic; |
| void *count_lock; |
| |
| /* Protected by the as_built_lock */ |
| static uint64 bbs_eflags_saved; |
| |
| |
| static void |
| event_exit(void); |
| static dr_emit_flags_t |
| event_basic_block(void *drcontext, void *tag, instrlist_t *bb, |
| bool for_trace, bool translating); |
| static void |
| event_thread_init(void *drcontext); |
| static void |
| event_thread_exit(void *drcontext); |
| |
| DR_EXPORT void |
| dr_client_main(client_id_t id, int argc, const char *argv[]) |
| { |
| /* register events */ |
| dr_register_exit_event(event_exit); |
| dr_register_bb_event(event_basic_block); |
| dr_register_thread_init_event(event_thread_init); |
| dr_register_thread_exit_event(event_thread_exit); |
| |
| /* initialize lock */ |
| as_built_lock = dr_mutex_create(); |
| count_lock = dr_mutex_create(); |
| } |
| |
| static void |
| event_exit(void) |
| { |
| /* Display results - we must first snpritnf the string as on windows |
| * dr_printf(), dr_messagebox() and dr_fprintf() can't print floats. */ |
| char msg[512]; |
| int len; |
| len = snprintf(msg, sizeof(msg)/sizeof(msg[0]), |
| "Number of blocks built : %"UINT64_FORMAT_CODE"\n" |
| " Average size : %5.2lf instructions\n" |
| " Num saved eflags : %"UINT64_FORMAT_CODE"\n" |
| "Number of blocks executed : %"UINT64_FORMAT_CODE"\n" |
| " Average weighted size : %5.2lf instructions\n", |
| counts_as_built.blocks, |
| counts_as_built.total_size / (double)counts_as_built.blocks, |
| bbs_eflags_saved, |
| counts_dynamic.blocks, |
| counts_dynamic.total_size / (double)counts_dynamic.blocks); |
| DR_ASSERT(len > 0); |
| msg[sizeof(msg)/sizeof(msg[0])-1] = '\0'; |
| DISPLAY_STRING(msg); |
| |
| /* free mutex */ |
| dr_mutex_destroy(as_built_lock); |
| dr_mutex_destroy(count_lock); |
| } |
| |
| static void |
| event_thread_init(void *drcontext) |
| { |
| /* create an instance of our data structure for this thread */ |
| bb_counts *counts = (bb_counts *)dr_thread_alloc(drcontext, sizeof(bb_counts)); |
| /* store it in the slot provided in the drcontext */ |
| dr_set_tls_field(drcontext, counts); |
| memset(counts, 0, sizeof(bb_counts)); |
| } |
| |
| static void |
| event_thread_exit(void *drcontext) |
| { |
| bb_counts *counts = (bb_counts *) dr_get_tls_field(drcontext); |
| /* NOTE - if we so choose we could report per-thread sizes here. */ |
| dr_mutex_lock(count_lock); |
| counts_dynamic.blocks += counts->blocks; |
| counts_dynamic.total_size += counts->total_size; |
| dr_mutex_unlock(count_lock); |
| dr_thread_free(drcontext, counts, sizeof(bb_counts)); |
| } |
| |
| static dr_emit_flags_t |
| event_basic_block(void *drcontext, void *tag, instrlist_t *bb, |
| bool for_trace, bool translating) |
| { |
| uint num_instructions = 0; |
| instr_t *instr, *where = NULL; |
| bool eflags_saved = true; |
| bb_counts *counts = (bb_counts *) dr_get_tls_field(drcontext); |
| |
| /* count the number of instructions in this block */ |
| for (instr = instrlist_first(bb); instr != NULL; instr = instr_get_next(instr)) { |
| /* Since it doesn't matter where we insert, look for a place |
| * where the eflags are dead. */ |
| uint flags = instr_get_arith_flags(instr, DR_QUERY_DEFAULT); |
| if (TESTALL(EFLAGS_WRITE_6, flags) && !TESTANY(EFLAGS_READ_6, flags)) { |
| where = instr; |
| eflags_saved = false; |
| } |
| num_instructions++; |
| } |
| |
| /* update the as-built counts */ |
| dr_mutex_lock(as_built_lock); |
| counts_as_built.blocks++; |
| counts_as_built.total_size += num_instructions; |
| if (eflags_saved) |
| bbs_eflags_saved++; |
| dr_mutex_unlock(as_built_lock); |
| |
| /* increment counters */ |
| if (eflags_saved) { |
| where = instrlist_first(bb); |
| dr_save_arith_flags(drcontext, bb, where, SPILL_SLOT_1); |
| } |
| /* If all thread-private caches we can use direct addressing. */ |
| if (!dr_using_all_private_caches()) { |
| /* Spill a register to get a pointer to our TLS structure. */ |
| dr_save_reg(drcontext, bb, where, DR_REG_XDI, SPILL_SLOT_2); |
| dr_insert_read_tls_field(drcontext, bb, where, DR_REG_XDI); |
| } |
| #ifdef X86_32 |
| /* Since the counters are 64-bit we must use an add an addc to increment. |
| * The operations is still effectively atomic since we're only increasing |
| * the count. */ |
| instrlist_meta_preinsert(bb, where, |
| INSTR_CREATE_add(drcontext, |
| dr_using_all_private_caches() ? |
| OPND_CREATE_ABSMEM((byte *)&counts->blocks, OPSZ_4) : |
| OPND_CREATE_MEM32(DR_REG_XDI, offsetof(bb_counts, blocks)), |
| OPND_CREATE_INT8(1))); |
| instrlist_meta_preinsert(bb, where, |
| INSTR_CREATE_adc(drcontext, |
| dr_using_all_private_caches() ? |
| OPND_CREATE_ABSMEM((byte *)&counts->blocks +4, OPSZ_4) : |
| OPND_CREATE_MEM32(DR_REG_XDI, offsetof(bb_counts, blocks)+4), |
| OPND_CREATE_INT8(0))); |
| |
| instrlist_meta_preinsert(bb, where, |
| INSTR_CREATE_add(drcontext, |
| dr_using_all_private_caches() ? |
| OPND_CREATE_ABSMEM((byte *)&counts->total_size, OPSZ_4) : |
| OPND_CREATE_MEM32(DR_REG_XDI, offsetof(bb_counts, total_size)), |
| OPND_CREATE_INT_32OR8(num_instructions))); |
| instrlist_meta_preinsert(bb, where, |
| INSTR_CREATE_adc(drcontext, |
| dr_using_all_private_caches() ? |
| OPND_CREATE_ABSMEM((byte *)&counts->total_size +4, OPSZ_4) : |
| OPND_CREATE_MEM32(DR_REG_XDI, offsetof(bb_counts, total_size)+4), |
| OPND_CREATE_INT8(0))); |
| #else /* X86_64 */ |
| instrlist_meta_preinsert(bb, where, |
| INSTR_CREATE_inc(drcontext, |
| dr_using_all_private_caches() ? |
| OPND_CREATE_ABSMEM((byte *)&counts->blocks, OPSZ_8) : |
| OPND_CREATE_MEM64(DR_REG_XDI, offsetof(bb_counts, blocks)))); |
| instrlist_meta_preinsert(bb, where, |
| INSTR_CREATE_add(drcontext, |
| dr_using_all_private_caches() ? |
| OPND_CREATE_ABSMEM((byte *)&counts->total_size, OPSZ_8) : |
| OPND_CREATE_MEM64(DR_REG_XDI, offsetof(bb_counts, total_size)), |
| OPND_CREATE_INT_32OR8(num_instructions))); |
| #endif |
| if (!dr_using_all_private_caches()) { |
| /* Restore spilled register. */ |
| dr_restore_reg(drcontext, bb, where, DR_REG_XDI, SPILL_SLOT_2); |
| } |
| if (eflags_saved) { |
| dr_restore_arith_flags(drcontext, bb, where, SPILL_SLOT_1); |
| } |
| |
| return DR_EMIT_DEFAULT; |
| } |
| \endcode |
| |
| [ \ref API_tutorial_bbdynsize8 "prev" | \ref API_tutorial "back" ] |
| |
| **/ |
| |
| /** |
| \page API_tutorial_annotation1 create_annotation |
| |
| In this tutorial we'll create a new annotation MEMCLIENT_MARK_MEM_DEFINED_IF_ADDRESSABLE() |
| for use in a hypothetical client named MemClient that detects use of uninitialized |
| variables. Since an annotation definition has several detailed requirements, it will be |
| easiest to start with a copy of a DynamoRIO annotation and transform it into the new |
| annotation. The first step is to copy the DynamoRIO souce file |
| <b>core/lib/dr_annotations.h</b>: |
| |
| \code |
| #ifndef _DYNAMORIO_ANNOTATIONS_H_ |
| #define _DYNAMORIO_ANNOTATIONS_H_ 1 |
| |
| #include "dr_annotations_asm.h" |
| |
| /* To simplify project configuration, this pragma excludes the file from GCC warnings. */ |
| #ifdef __GNUC__ |
| # pragma GCC system_header |
| #endif |
| |
| #define DYNAMORIO_ANNOTATE_RUNNING_ON_DYNAMORIO() \ |
| dynamorio_annotate_running_on_dynamorio() |
| |
| #define DYNAMORIO_ANNOTATE_LOG(format, ...) \ |
| DR_ANNOTATION(dynamorio_annotate_log, format, ##__VA_ARGS__) |
| |
| #ifdef __cplusplus |
| extern "C" { |
| #endif |
| |
| DR_DECLARE_ANNOTATION(char, dynamorio_annotate_running_on_dynamorio, (void)); |
| |
| DR_DECLARE_ANNOTATION(unsigned int, dynamorio_annotate_log, (const char *format, ...)); |
| |
| #ifdef __cplusplus |
| } |
| #endif |
| |
| #endif |
| \endcode |
| |
| [ \ref API_tutorial "prev" | \subpage API_tutorial_annotation2 "next" ] |
| |
| \page API_tutorial_annotation2 create_annotation.p2 |
| |
| Next, modify elements of the annotation macro and declaration according to its new |
| purpose: |
| |
| 1. Change the name; |
| 2. Change the return type to void; |
| 3. Replace the formal parameter list. |
| |
| \code |
| +#ifndef _MEMCLIENT_ANNOTATIONS_H_ |
| +#define _MEMCLIENT_ANNOTATIONS_H_ 1 |
| |
| #include "dr_annotations_asm.h" |
| |
| /* To simplify project configuration, this pragma excludes the file from GCC warnings. */ |
| #ifdef __GNUC__ |
| # pragma GCC system_header |
| #endif |
| |
| +#define MEMCLIENT_MARK_MEM_DEFINED_IF_ADDRESSABLE(start, size) \ |
| + DR_ANNOTATION(memclient_mark_mem_defined_if_addressable, start, size) |
| |
| #ifdef __cplusplus |
| extern "C" { |
| #endif |
| |
| +DR_DECLARE_ANNOTATION(void, memclient_mark_mem_defined_if_addressable, |
| + (void *p, size_t size)); |
| |
| #ifdef __cplusplus |
| } |
| #endif |
| |
| #endif |
| \endcode |
| |
| Note that one of the original annotations has been deleted. This completes the header for |
| the new annotation, and now the source must be similarly copied and modified. |
| |
| [ \ref API_tutorial_annotation1 "prev" | \subpage API_tutorial_annotation3 "next" ] |
| |
| \page API_tutorial_annotation3 create_annotation.p3 |
| |
| To create the annotation source file, first copy <b>core/lib/dr_annotations.c</b>: |
| |
| \code |
| #include "dr_annotations.h" |
| |
| DR_DEFINE_ANNOTATION(char, dynamorio_annotate_running_on_dynamorio, (void), return 0) |
| |
| DR_DEFINE_ANNOTATION(unsigned int, dynamorio_annotate_log, (const char *format, ...), |
| return 0) |
| \endcode |
| |
| Note that the last argument to the macro specifies the body of the annotation function, |
| which in the case of these annotations must contain a return statement because the |
| annotations are non-void. |
| |
| [ \ref API_tutorial_annotation2 "prev" | \subpage API_tutorial_annotation4 "next" ] |
| |
| \page API_tutorial_annotation4 create_annotation.p4 |
| |
| Next, modify elements of the annotation definition to match the header: |
| |
| \code |
| #include "dr_annotations.h" |
| |
| +DR_DEFINE_ANNOTATION(void, memclient_mark_mem_defined_if_addressable, |
| + (void *p, size_t size), ) |
| \endcode |
| |
| The last argument to the macro is now empty, since the annotation is now void and no |
| longer requires (or allows) a return statement. |
| |
| [ \ref API_tutorial_annotation3 "prev" | \subpage API_tutorial_annotation5 "next" ] |
| |
| \page API_tutorial_annotation5 create_annotation.p5 |
| |
| Target applications will now be able to mark any region of memory as defined by simply |
| invoking the macro MEMCLIENT_MARK_MEM_DEFINED_IF_ADDRESSABLE(). To implement the |
| corresponding functionality in memclient, the client developer creates a handler |
| function having the same signature as the annotation function and registers it for the |
| annotation using dr_annotation_register_call(). For example: |
| |
| \code |
| #include "dr_api.h" |
| |
| void handle_mark_mem_defined_if_addressable(void *p, size_t size) |
| { |
| size_t *skip_size = dr_global_alloc(sizeof(size_t)); |
| hashtable_add_replace(skip_init_check_table, p, skip_size); |
| } |
| |
| DR_EXPORT void |
| dr_client_main(client_id_t id, int argc, const char *argv[]) |
| { |
| dr_annotation_register_call("memclient_mark_mem_defined_if_addressable", |
| handle_mark_mem_defined_if_addressable, false, 2); |
| } |
| \endcode |
| |
| After registering the handler, all instances of the new annotation will be transformed |
| into a clean call to handle_mark_mem_defined_if_addressable(), where the arguments to |
| the annotation macro become arguments to the clean call. |
| |
| [ \ref API_tutorial_annotation4 "prev" | \subpage API_tutorial_annotation6 "next" ] |
| |
| \page API_tutorial_annotation6 create_annotation.p6 |
| |
| Since the annotation declarations and definitions are only referenced by target programs |
| (not by MemClient itself), no additions to the MemClient build are required. However, to |
| simplify integration of the new annotation into target programs, the MemClient |
| developer may wish to provide a cmake package that defines a function for configuring |
| annotations. DynamoRIO provides a similar function use_DynamoRIO_annotations() for its |
| default annotations, so it will be easiest to start with a copy of that function from |
| DynamoRIOConfig.cmake (which is located in the DynamoRIO source tree in |
| make/DynamoRIOConfig.cmake.in): |
| |
| \verbatim |
| # For configuring target applications that use default DynamoRIO annotations |
| function (use_DynamoRIO_annotations target target_srcs) |
| set(dr_annotation_dir "${DynamoRIO_cwd}/../include/annotations") |
| set(dr_annotation_srcs "${dr_annotation_dir}/dr_annotations.c") |
| configure_DynamoRIO_annotation_sources("${dr_annotation_srcs}") |
| set(${target_srcs} ${${target_srcs}} ${dr_annotation_srcs} PARENT_SCOPE) |
| endfunction (use_DynamoRIO_annotations target target_srcs) |
| \endverbatim |
| |
| [ \ref API_tutorial_annotation5 "prev" | \subpage API_tutorial_annotation7 "next" ] |
| |
| \page API_tutorial_annotation7 create_annotation.p7 |
| |
| Next modify elements of the cmake function for usage with the MemClient annotations: |
| |
| 1. Replace DynamoRIO with the client name, e.g. MemClient; |
| 2. Change the variable prefix from dr_ to a suitable prefix, e.g. memclient_; |
| 3. Specify the client's annotation source file(s). |
| |
| \verbatim |
| +# For configuring target applications that use MemClient annotations |
| +function (use_MemClient_annotations target target_srcs) |
| + set(memclient_annotation_dir "${MemClient_cwd}/../include/annotations") |
| + set(memclient_annotation_srcs "${memclient_annotation_dir}/memclient_annotations.c") |
| + configure_DynamoRIO_annotation_sources("${memclient_annotation_srcs}") |
| + set(${target_srcs} ${${target_srcs}} ${memclient_annotation_srcs} PARENT_SCOPE) |
| +endfunction (use_MemClient_annotations target target_srcs) |
| \endverbatim |
| |
| Note that the cmake variable memclient_annotation_dir has been set to the same direcotry |
| as the default DynamoRIO annotation directory, but this is not required--the client |
| developer may choose to put the new annotations in any directory within the build output |
| area. |
| |
| In more general terms, this cmake function takes the following steps: |
| |
| 1. Copies the MemClient annotation header and source file to the build output area; |
| 2. Assigns the required compiler flags; |
| 3. Adds the source file to the project source list. |
| |
| Target applications that are not built with cmake will need to take these same 3 steps in |
| a comparable way. |
| |
| [ \ref API_tutorial_annotation6 "prev" | \ref API_tutorial "back" ] |
| **/ |
| |
| |
| /** |
| \page API_tutorial_steal_reg1 steal_reg |
| <b>coming soon</b> |
| |
| [ \ref API_tutorial "back" ] |
| **/ |
| |
| |
| /** |
| \page API_tutorial_prefetch1 prefetch |
| <b>coming soon</b> |
| |
| [ \ref API_tutorial "back" ] |
| **/ |