blob: 53d6609b2e906eeaa9a1f31abf545c263d7a8ab4 [file] [log] [blame]
/* **********************************************************
* Copyright (c) 2012-2015 Google, Inc. All rights reserved.
* Copyright (c) 2008-2009 VMware, Inc. All rights reserved.
* **********************************************************/
/*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* * Neither the name of VMware, Inc. nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
/**
\page API_tutorial API Usage Tutorial
Below we provide a few short tutorials on using the DynamoRIO API to write
several clients.
- A client that tracks the average dynamic basic block size
demonstrating some simple instrumentation : \subpage API_tutorial_bbdynsize1 "average_bb_size".
- A client that steals a register from the application demonstrating
some simple state translation : \subpage API_tutorial_steal_reg1 "steal_reg".
- A client that adjusts prefetch instructions demonstrating some simple
app modifications : \subpage API_tutorial_prefetch1 "prefetch".
The following tutorial additionally shows how to create a new annotation to support a
client-specific feature.
- A new annotation for a hypothetical memory usage client. The annotation marks any
specified memory region as defined, even when the client analysis shows it is undefined :
\subpage API_tutorial_annotation1 "create_annotation"
**/
/**
\page API_tutorial_bbdynsize1 average_bb_size
In this tutorial we'll create a simple client that will compute the
average size of basic blocks executed (in instructions) weighted by
their execution frequency. We start with an empty DynamoRIO client.
\code
#include "dr_api.h"
DR_EXPORT void
dr_client_main(client_id_t id, int argc, const char *argv[])
{
/* empty */
}
\endcode
[ \ref API_tutorial "prev" | \subpage API_tutorial_bbdynsize2 "next" ]
\page API_tutorial_bbdynsize2 average_bb_size.p2
Now we register for events we'll need. For our first pass we only
need to register for the bb event (to add our instrumentation) and the
exit event (to display the results). We don't need to register for
trace events (as we will add our instrumentation to the constituent
bbs) and we can ignore the \p for_trace and \p translating arguments
to the basic block callback as our changes are deterministic and
idempotent so DynamoRIO default translation will work fine.
\code
#include "dr_api.h"
+ static void
+ event_exit(void);
+ static dr_emit_flags_t
+ event_basic_block(void *drcontext, void *tag, instrlist_t *bb,
+ bool for_trace, bool translating);
DR_EXPORT void
dr_client_main(client_id_t id, int argc, const char *argv[])
{
+ /* register events */
+ dr_register_exit_event(event_exit);
+ dr_register_bb_event(event_basic_block);
}
+ static void
+ event_exit(void)
+ {
+ /* empty */
+ }
+
+ static dr_emit_flags_t
+ event_basic_block(void *drcontext, void *tag, instrlist_t *bb,
+ bool for_trace, bool translating)
+ {
+ /* empty */
+ return DR_EMIT_DEFAULT;
+ }
\endcode
[ \ref API_tutorial_bbdynsize1 "prev" | \subpage API_tutorial_bbdynsize3 "next" ]
\page API_tutorial_bbdynsize3 average_bb_size.p3
Next we add code to track the size of blocks as we build them for our
as-built count. This will give us the number of blocks built and
their average size. Next we will add the dynamic execution counts.
\code
#include "dr_api.h"
+ #ifdef WINDOWS
+ # define DISPLAY_STRING(msg) dr_messagebox(msg)
+ #else
+ # define DISPLAY_STRING(msg) dr_printf("%s\n", msg)
+ #endif
+ typedef struct bb_counts {
+ uint64 blocks;
+ uint64 total_size;
+ } bb_counts;
+ static bb_counts counts_as_built;
+ void *as_built_lock;
static void
event_exit(void);
static dr_emit_flags_t
event_basic_block(void *drcontext, void *tag, instrlist_t *bb,
bool for_trace, bool translating);
DR_EXPORT void
dr_client_main(client_id_t id, int argc, const char *argv[])
{
/* register events */
dr_register_exit_event(event_exit);
dr_register_bb_event(event_basic_block);
+
+ /* initialize lock */
+ as_built_lock = dr_mutex_create();
}
static void
event_exit(void)
{
+ /* Display results - we must first snpritnf the string as on windows
+ * dr_printf(), dr_messagebox() and dr_fprintf() can't print floats. */
+ char msg[512];
+ int len;
+ len = snprintf(msg, sizeof(msg)/sizeof(msg[0]),
+ "Number of basic blocks built : %"UINT64_FORMAT_CODE"\n"
+ " Average size : %5.2lf instructions\n",
+ counts_as_built.blocks,
+ counts_as_built.total_size / (double)counts_as_built.blocks);
+ DR_ASSERT(len > 0);
+ msg[sizeof(msg)/sizeof(msg[0])-1] = '\0'; /* NUll terminate */
+ DISPLAY_STRING(msg);
+
+ /* free mutex */
+ dr_mutex_destroy(as_built_lock);
}
static dr_emit_flags_t
event_basic_block(void *drcontext, void *tag, instrlist_t *bb,
bool for_trace, bool translating)
{
+ uint num_instructions = 0;
+ instr_t *instr;
+
+ /* count the number of instructions in this block */
+ for (instr = instrlist_first(bb); instr != NULL; instr = instr_get_next(instr)) {
+ num_instructions++;
+ }
+
+ /* update the as-built counts */
+ dr_mutex_lock(as_built_lock);
+ counts_as_built.blocks++;
+ counts_as_built.total_size += num_instructions;
+ dr_mutex_unlock(as_built_lock);
return DR_EMIT_DEFAULT;
}
\endcode
[ \ref API_tutorial_bbdynsize2 "prev" | \subpage API_tutorial_bbdynsize4 "next" ]
\page API_tutorial_bbdynsize4 average_bb_size.p4
Now we add instrumentation to gather the dynamic counts. For the
first pass we'll use a clean call to increment the counters from each
block. However this approach is very slow since for each basic block
execution a context switch out of the cache will occur. We'll look at
other approaches next.
\code
#include "dr_api.h"
#ifdef WINDOWS
# define DISPLAY_STRING(msg) dr_messagebox(msg)
#else
# define DISPLAY_STRING(msg) dr_printf("%s\n", msg)
#endif
typedef struct bb_counts {
uint64 blocks;
uint64 total_size;
} bb_counts;
static bb_counts counts_as_built;
void *as_built_lock;
+ static bb_counts counts_dynamic;
+ void *count_lock;
static void
event_exit(void);
static dr_emit_flags_t
event_basic_block(void *drcontext, void *tag, instrlist_t *bb,
bool for_trace, bool translating);
+ static void
+ clean_call(uint instruction_count);
DR_EXPORT void
dr_client_main(client_id_t id, int argc, const char *argv[])
{
/* register events */
dr_register_exit_event(event_exit);
dr_register_bb_event(event_basic_block);
/* initialize lock */
as_built_lock = dr_mutex_create();
+ count_lock = dr_mutex_create();
}
static void
event_exit(void)
{
/* Display results - we must first snpritnf the string as on windows
* dr_printf(), dr_messagebox() and dr_fprintf() can't print floats. */
char msg[512];
int len;
len = snprintf(msg, sizeof(msg)/sizeof(msg[0]),
"Number of blocks built : %"UINT64_FORMAT_CODE"\n"
" Average size : %5.2lf instructions\n"
+ "Number of blocks executed : %"UINT64_FORMAT_CODE"\n"
+ " Average weighted size : %5.2lf instructions\n",
counts_as_built.blocks,
counts_as_built.total_size / (double)counts_as_built.blocks,
+ counts_dynamic.blocks,
+ counts_dynamic.total_size / (double)counts_dynamic.blocks);
DR_ASSERT(len > 0);
msg[sizeof(msg)/sizeof(msg[0])-1] = '\0';
DISPLAY_STRING(msg);
/* free mutex */
dr_mutex_destroy(as_built_lock);
+ dr_mutex_destroy(count_lock);
}
+ static void
+ clean_call(uint instruction_count)
+ {
+ dr_mutex_lock(count_lock);
+ counts_dynamic.blocks++;
+ counts_dynamic.total_size += instruction_count;
+ dr_mutex_unlock(count_lock);
+ }
static dr_emit_flags_t
event_basic_block(void *drcontext, void *tag, instrlist_t *bb,
bool for_trace, bool translating)
{
uint num_instructions = 0;
instr_t *instr;
/* count the number of instructions in this block */
for (instr = instrlist_first(bb); instr != NULL; instr = instr_get_next(instr)) {
num_instructions++;
}
/* update the as-built counts */
dr_mutex_lock(as_built_lock);
counts_as_built.blocks++;
counts_as_built.total_size += num_instructions;
dr_mutex_unlock(as_built_lock);
+ /* insert clean call */
+ dr_insert_clean_call(drcontext, bb, instrlist_first(bb), (void *)clean_call,
+ false, 1, OPND_CREATE_INT32(num_instructions));
+
return DR_EMIT_DEFAULT;
}
\endcode
[ \ref API_tutorial_bbdynsize3 "prev" | \subpage API_tutorial_bbdynsize5 "next" ]
\page API_tutorial_bbdynsize5 average_bb_size.p5
In this variation we use inlined atomic instructions to adjust the counters. On
a single core box this form should be quite fast, but on a multi-core box the
cache line locking for atomic operations will slow things dramatically. We'll
look at forms more suitable for multi-core further on.
\code
#include "dr_api.h"
#ifdef WINDOWS
# define DISPLAY_STRING(msg) dr_messagebox(msg)
#else
# define DISPLAY_STRING(msg) dr_printf("%s\n", msg)
#endif
typedef struct bb_counts {
uint64 blocks;
uint64 total_size;
} bb_counts;
static bb_counts counts_as_built;
void *as_built_lock;
static bb_counts counts_dynamic;
static void
event_exit(void);
static dr_emit_flags_t
event_basic_block(void *drcontext, void *tag, instrlist_t *bb,
bool for_trace, bool translating);
DR_EXPORT void
dr_client_main(client_id_t id, int argc, const char *argv[])
{
/* register events */
dr_register_exit_event(event_exit);
dr_register_bb_event(event_basic_block);
/* initialize lock */
as_built_lock = dr_mutex_create();
}
static void
event_exit(void)
{
/* Display results - we must first snpritnf the string as on windows
* dr_printf(), dr_messagebox() and dr_fprintf() can't print floats. */
char msg[512];
int len;
len = snprintf(msg, sizeof(msg)/sizeof(msg[0]),
"Number of blocks built : %"UINT64_FORMAT_CODE"\n"
" Average size : %5.2lf instructions\n"
"Number of blocks executed : %"UINT64_FORMAT_CODE"\n"
" Average weighted size : %5.2lf instructions\n",
counts_as_built.blocks,
counts_as_built.total_size / (double)counts_as_built.blocks,
counts_dynamic.blocks,
counts_dynamic.total_size / (double)counts_dynamic.blocks);
DR_ASSERT(len > 0);
msg[sizeof(msg)/sizeof(msg[0])-1] = '\0';
DISPLAY_STRING(msg);
/* free mutex */
dr_mutex_destroy(as_built_lock);
}
static dr_emit_flags_t
event_basic_block(void *drcontext, void *tag, instrlist_t *bb,
bool for_trace, bool translating)
{
uint num_instructions = 0;
instr_t *instr, *where = NULL;
/* count the number of instructions in this block */
for (instr = instrlist_first(bb); instr != NULL; instr = instr_get_next(instr)) {
num_instructions++;
}
/* update the as-built counts */
dr_mutex_lock(as_built_lock);
counts_as_built.blocks++;
counts_as_built.total_size += num_instructions;
dr_mutex_unlock(as_built_lock);
+ /* increment counters */
+ where = instrlist_first(bb);
+ dr_save_arith_flags(drcontext, bb, where, SPILL_SLOT_1);
+ #ifdef X86_32
+ /* Since the counters are 64-bit we must use an add + an addc to increment.
+ * The operations is still effectively atomic since we're only increasing
+ * the count. */
+ instrlist_meta_preinsert(bb, where,
+ LOCK(INSTR_CREATE_add(drcontext,
+ OPND_CREATE_ABSMEM((byte *)&counts_dynamic.blocks, OPSZ_4),
+ OPND_CREATE_INT8(1))));
+ instrlist_meta_preinsert(bb, where,
+ LOCK(INSTR_CREATE_adc(drcontext,
+ OPND_CREATE_ABSMEM((byte *)&counts_dynamic.blocks + 4, OPSZ_4),
+ OPND_CREATE_INT8(0))));
+
+ instrlist_meta_preinsert(bb, where,
+ LOCK(INSTR_CREATE_add(drcontext,
+ OPND_CREATE_ABSMEM((byte *)&counts_dynamic.total_size, OPSZ_4),
+ OPND_CREATE_INT_32OR8(num_instructions))));
+ instrlist_meta_preinsert(bb, where,
+ LOCK(INSTR_CREATE_adc(drcontext,
+ OPND_CREATE_ABSMEM((byte *)&counts_dynamic.total_size + 4, OPSZ_4),
+ OPND_CREATE_INT8(0))));
+ #else /* X86_64 */
+ instrlist_meta_preinsert(bb, where,
+ LOCK(INSTR_CREATE_inc(drcontext,
+ OPND_CREATE_ABSMEM((byte *)&counts_dynamic.blocks, OPSZ_8))));
+ instrlist_meta_preinsert(bb, where,
+ LOCK(INSTR_CREATE_add(drcontext,
+ OPND_CREATE_ABSMEM((byte *)&counts_dynamic.total_size, OPSZ_8),
+ OPND_CREATE_INT_32OR8(num_instructions))));
+ #endif
+ dr_restore_arith_flags(drcontext, bb, where, SPILL_SLOT_1);
+
return DR_EMIT_DEFAULT;
}
\endcode
[ \ref API_tutorial_bbdynsize4 "prev" | \subpage API_tutorial_bbdynsize6 "next" ]
\page API_tutorial_bbdynsize6 average_bb_size.p6
Next up we add an optimization to only save and restore the flags if we can't find a spot
within the block where the flags are dead.
\code
#include "dr_api.h"
#ifdef WINDOWS
# define DISPLAY_STRING(msg) dr_messagebox(msg)
#else
# define DISPLAY_STRING(msg) dr_printf("%s\n", msg)
#endif
+ #define TESTALL(mask, var) (((mask) & (var)) == (mask))
+ #define TESTANY(mask, var) (((mask) & (var)) != 0)
typedef struct bb_counts {
uint64 blocks;
uint64 total_size;
} bb_counts;
static bb_counts counts_as_built;
void *as_built_lock;
static bb_counts counts_dynamic;
+ /* Protected by the as_built_lock */
+ static uint64 bbs_eflags_saved;
static void
event_exit(void);
static dr_emit_flags_t
event_basic_block(void *drcontext, void *tag, instrlist_t *bb,
bool for_trace, bool translating);
DR_EXPORT void
dr_client_main(client_id_t id, int argc, const char *argv[])
{
/* register events */
dr_register_exit_event(event_exit);
dr_register_bb_event(event_basic_block);
/* initialize lock */
as_built_lock = dr_mutex_create();
}
static void
event_exit(void)
{
/* Display results - we must first snpritnf the string as on windows
* dr_printf(), dr_messagebox() and dr_fprintf() can't print floats. */
char msg[512];
int len;
len = snprintf(msg, sizeof(msg)/sizeof(msg[0]),
"Number of blocks built : %"UINT64_FORMAT_CODE"\n"
" Average size : %5.2lf instructions\n"
+ " Num saved eflags : %"UINT64_FORMAT_CODE"\n"
"Number of blocks executed : %"UINT64_FORMAT_CODE"\n"
" Average weighted size : %5.2lf instructions\n",
counts_as_built.blocks,
counts_as_built.total_size / (double)counts_as_built.blocks,
+ bbs_eflags_saved,
counts_dynamic.blocks,
counts_dynamic.total_size / (double)counts_dynamic.blocks);
DR_ASSERT(len > 0);
msg[sizeof(msg)/sizeof(msg[0])-1] = '\0';
DISPLAY_STRING(msg);
/* free mutex */
dr_mutex_destroy(as_built_lock);
}
static dr_emit_flags_t
event_basic_block(void *drcontext, void *tag, instrlist_t *bb,
bool for_trace, bool translating)
{
uint num_instructions = 0;
instr_t *instr, *where = NULL;
+ bool eflags_saved = true;
/* count the number of instructions in this block */
for (instr = instrlist_first(bb); instr != NULL; instr = instr_get_next(instr)) {
+ /* Since it doesn't matter where we insert, look for a place
+ * where the eflags are dead. */
+ uint flags = instr_get_arith_flags(instr, DR_QUERY_DEFAULT);
+ if (TESTALL(EFLAGS_WRITE_6, flags) && !TESTANY(EFLAGS_READ_6, flags)) {
+ where = instr;
+ eflags_saved = false;
+ }
num_instructions++;
}
/* update the as-built counts */
dr_mutex_lock(as_built_lock);
counts_as_built.blocks++;
counts_as_built.total_size += num_instructions;
+ if (eflags_saved)
+ bbs_eflags_saved++;
dr_mutex_unlock(as_built_lock);
/* increment counters */
+ if (eflags_saved) {
+ where = instrlist_first(bb);
dr_save_arith_flags(drcontext, bb, where, SPILL_SLOT_1);
+ }
#ifdef X86_32
/* Since the counters are 64-bit we must use an add an addc to increment.
* The operations is still effectively atomic since we're only increasing
* the count. */
instrlist_meta_preinsert(bb, where,
LOCK(INSTR_CREATE_add(drcontext,
OPND_CREATE_ABSMEM((byte *)&counts_dynamic.blocks, OPSZ_4),
OPND_CREATE_INT8(1))));
instrlist_meta_preinsert(bb, where,
LOCK(INSTR_CREATE_adc(drcontext,
OPND_CREATE_ABSMEM((byte *)&counts_dynamic.blocks + 4, OPSZ_4),
OPND_CREATE_INT8(0))));
instrlist_meta_preinsert(bb, where,
LOCK(INSTR_CREATE_add(drcontext,
OPND_CREATE_ABSMEM((byte *)&counts_dynamic.total_size, OPSZ_4),
OPND_CREATE_INT_32OR8(num_instructions))));
instrlist_meta_preinsert(bb, where,
LOCK(INSTR_CREATE_adc(drcontext,
OPND_CREATE_ABSMEM((byte *)&counts_dynamic.total_size + 4, OPSZ_4),
OPND_CREATE_INT8(0))));
#else /* X86_64 */
instrlist_meta_preinsert(bb, where,
LOCK(INSTR_CREATE_inc(drcontext,
OPND_CREATE_ABSMEM((byte *)&counts_dynamic.blocks, OPSZ_8))));
instrlist_meta_preinsert(bb, where,
LOCK(INSTR_CREATE_add(drcontext,
OPND_CREATE_ABSMEM((byte *)&counts_dynamic.total_size, OPSZ_8),
OPND_CREATE_INT_32OR8(num_instructions))));
#endif
+ if (eflags_saved) {
dr_restore_arith_flags(drcontext, bb, where, SPILL_SLOT_1);
+ }
return DR_EMIT_DEFAULT;
}
\endcode
[ \ref API_tutorial_bbdynsize5 "prev" | \subpage API_tutorial_bbdynsize7 "next" ]
\page API_tutorial_bbdynsize7 average_bb_size.p7
We now have a pretty fast implementation, probably about as fast as we can get
on a single core machine. However, on multi-core systems the cache
line locking for the atomic operations becomes a bottle neck. To get around that
we can use thread-private counts and aggregate them. With thread-private counts
no locked instructions are needed, however the extra indirection does has its
own cost of extra added instructions to spill a register and get the tls pointer
so on single core this will be slower.
\code
#include "dr_api.h"
+ #include <stddef.h>
#ifdef WINDOWS
# define DISPLAY_STRING(msg) dr_messagebox(msg)
#else
# define DISPLAY_STRING(msg) dr_printf("%s\n", msg)
#endif
#define TESTALL(mask, var) (((mask) & (var)) == (mask))
#define TESTANY(mask, var) (((mask) & (var)) != 0)
typedef struct bb_counts {
uint64 blocks;
uint64 total_size;
} bb_counts;
static bb_counts counts_as_built;
void *as_built_lock;
static bb_counts counts_dynamic;
+ void *count_lock;
/* Protected by the as_built_lock */
static uint64 bbs_eflags_saved;
static void
event_exit(void);
static dr_emit_flags_t
event_basic_block(void *drcontext, void *tag, instrlist_t *bb,
bool for_trace, bool translating);
+ static void
+ event_thread_init(void *drcontext);
+ static void
+ event_thread_exit(void *drcontext);
DR_EXPORT void
dr_client_main(client_id_t id, int argc, const char *argv[])
{
/* register events */
dr_register_exit_event(event_exit);
dr_register_bb_event(event_basic_block);
+ dr_register_thread_init_event(event_thread_init);
+ dr_register_thread_exit_event(event_thread_exit);
/* initialize lock */
as_built_lock = dr_mutex_create();
+ count_lock = dr_mutex_create();
}
static void
event_exit(void)
{
/* Display results - we must first snpritnf the string as on windows
* dr_printf(), dr_messagebox() and dr_fprintf() can't print floats. */
char msg[512];
int len;
len = snprintf(msg, sizeof(msg)/sizeof(msg[0]),
"Number of blocks built : %"UINT64_FORMAT_CODE"\n"
" Average size : %5.2lf instructions\n"
" Num saved eflags : %"UINT64_FORMAT_CODE"\n"
"Number of blocks executed : %"UINT64_FORMAT_CODE"\n"
" Average weighted size : %5.2lf instructions\n",
counts_as_built.blocks,
counts_as_built.total_size / (double)counts_as_built.blocks,
bbs_eflags_saved,
counts_dynamic.blocks,
counts_dynamic.total_size / (double)counts_dynamic.blocks);
DR_ASSERT(len > 0);
msg[sizeof(msg)/sizeof(msg[0])-1] = '\0';
DISPLAY_STRING(msg);
/* free mutex */
dr_mutex_destroy(as_built_lock);
+ dr_mutex_destroy(count_lock);
}
+ static void
+ event_thread_init(void *drcontext)
+ {
+ /* create an instance of our data structure for this thread */
+ bb_counts *counts = (bb_counts *)dr_thread_alloc(drcontext, sizeof(bb_counts));
+ /* store it in the slot provided in the drcontext */
+ dr_set_tls_field(drcontext, counts);
+ memset(counts, 0, sizeof(bb_counts));
+ }
+
+ static void
+ event_thread_exit(void *drcontext)
+ {
+ bb_counts *counts = (bb_counts *) dr_get_tls_field(drcontext);
+ /* NOTE - if we so choose we could report per-thread sizes here. */
+ dr_mutex_lock(count_lock);
+ counts_dynamic.blocks += counts->blocks;
+ counts_dynamic.total_size += counts->total_size;
+ dr_mutex_unlock(count_lock);
+ dr_thread_free(drcontext, counts, sizeof(bb_counts));
+ }
static dr_emit_flags_t
event_basic_block(void *drcontext, void *tag, instrlist_t *bb,
bool for_trace, bool translating)
{
uint num_instructions = 0;
instr_t *instr, *where = NULL;
bool eflags_saved = true;
+ bb_counts *counts = (bb_counts *) dr_get_tls_field(drcontext);
/* count the number of instructions in this block */
for (instr = instrlist_first(bb); instr != NULL; instr = instr_get_next(instr)) {
/* Since it doesn't matter where we insert, look for a place
* where the eflags are dead. */
uint flags = instr_get_arith_flags(instr, DR_QUERY_DEFAULT);
if (TESTALL(EFLAGS_WRITE_6, flags) && !TESTANY(EFLAGS_READ_6, flags)) {
where = instr;
eflags_saved = false;
}
num_instructions++;
}
/* update the as-built counts */
dr_mutex_lock(as_built_lock);
counts_as_built.blocks++;
counts_as_built.total_size += num_instructions;
if (eflags_saved)
bbs_eflags_saved++;
dr_mutex_unlock(as_built_lock);
/* increment counters */
if (eflags_saved) {
where = instrlist_first(bb);
dr_save_arith_flags(drcontext, bb, where, SPILL_SLOT_1);
}
+ /* Spill a register to get a pointer to our TLS structure. */
+ dr_save_reg(drcontext, bb, where, DR_REG_XDI, SPILL_SLOT_2);
+ dr_insert_read_tls_field(drcontext, bb, where, DR_REG_XDI);
#ifdef X86_32
/* Since the counters are 64-bit we must use an add an addc to increment.
* The operations is still effectively atomic since we're only increasing
* the count. */
instrlist_meta_preinsert(bb, where,
! INSTR_CREATE_add(drcontext,
! OPND_CREATE_MEM32(DR_REG_XDI, offsetof(bb_counts, blocks)),
OPND_CREATE_INT8(1)));
instrlist_meta_preinsert(bb, where,
! INSTR_CREATE_adc(drcontext,
! OPND_CREATE_MEM32(DR_REG_XDI, offsetof(bb_counts, blocks)+4),
OPND_CREATE_INT8(0)));
instrlist_meta_preinsert(bb, where,
! INSTR_CREATE_add(drcontext,
! OPND_CREATE_MEM32(DR_REG_XDI, offsetof(bb_counts, total_size)),
OPND_CREATE_INT_32OR8(num_instructions)));
instrlist_meta_preinsert(bb, where,
! INSTR_CREATE_adc(drcontext,
! OPND_CREATE_MEM32(DR_REG_XDI, offsetof(bb_counts, total_size)+4),
OPND_CREATE_INT8(0)));
#else /* X86_64 */
instrlist_meta_preinsert(bb, where,
! INSTR_CREATE_inc(drcontext,
! OPND_CREATE_MEM64(DR_REG_XDI, offsetof(bb_counts, blocks))));
instrlist_meta_preinsert(bb, where,
! INSTR_CREATE_add(drcontext,
! OPND_CREATE_MEM64(DR_REG_XDI, offsetof(bb_counts, total_size)),
OPND_CREATE_INT_32OR8(num_instructions)));
#endif
+ /* Restore spilled register. */
+ dr_restore_reg(drcontext, bb, where, DR_REG_XDI, SPILL_SLOT_2);
if (eflags_saved) {
dr_restore_arith_flags(drcontext, bb, where, SPILL_SLOT_1);
}
return DR_EMIT_DEFAULT;
}
\endcode
[ \ref API_tutorial_bbdynsize6 "prev" | \subpage API_tutorial_bbdynsize8 "next" ]
\page API_tutorial_bbdynsize8 average_bb_size.p8
Our final optimization is to note that if we're running with thread-private caches
(i.e. with the -thread_private option to DynamoRIO) we can use absolute addressing
on the per-thread counters instead of indirecting via TLS which should greatly speed
things up. The downside to -thread_private is that on applications with many threads
memory use can get quite high, potentially enough to impact performance of the system.
\code
#include "dr_api.h"
#include <stddef.h>
#ifdef WINDOWS
# define DISPLAY_STRING(msg) dr_messagebox(msg)
#else
# define DISPLAY_STRING(msg) dr_printf("%s\n", msg)
#endif
#define TESTALL(mask, var) (((mask) & (var)) == (mask))
#define TESTANY(mask, var) (((mask) & (var)) != 0)
typedef struct bb_counts {
uint64 blocks;
uint64 total_size;
} bb_counts;
static bb_counts counts_as_built;
void *as_built_lock;
static bb_counts counts_dynamic;
void *count_lock;
/* Protected by the as_built_lock */
static uint64 bbs_eflags_saved;
static void
event_exit(void);
static dr_emit_flags_t
event_basic_block(void *drcontext, void *tag, instrlist_t *bb,
bool for_trace, bool translating);
static void
event_thread_init(void *drcontext);
static void
event_thread_exit(void *drcontext);
DR_EXPORT void
dr_client_main(client_id_t id, int argc, const char *argv[])
{
/* register events */
dr_register_exit_event(event_exit);
dr_register_bb_event(event_basic_block);
dr_register_thread_init_event(event_thread_init);
dr_register_thread_exit_event(event_thread_exit);
/* initialize lock */
as_built_lock = dr_mutex_create();
count_lock = dr_mutex_create();
}
static void
event_exit(void)
{
/* Display results - we must first snpritnf the string as on windows
* dr_printf(), dr_messagebox() and dr_fprintf() can't print floats. */
char msg[512];
int len;
len = snprintf(msg, sizeof(msg)/sizeof(msg[0]),
"Number of blocks built : %"UINT64_FORMAT_CODE"\n"
" Average size : %5.2lf instructions\n"
" Num saved eflags : %"UINT64_FORMAT_CODE"\n"
"Number of blocks executed : %"UINT64_FORMAT_CODE"\n"
" Average weighted size : %5.2lf instructions\n",
counts_as_built.blocks,
counts_as_built.total_size / (double)counts_as_built.blocks,
bbs_eflags_saved,
counts_dynamic.blocks,
counts_dynamic.total_size / (double)counts_dynamic.blocks);
DR_ASSERT(len > 0);
msg[sizeof(msg)/sizeof(msg[0])-1] = '\0';
DISPLAY_STRING(msg);
/* free mutex */
dr_mutex_destroy(as_built_lock);
dr_mutex_destroy(count_lock);
}
static void
event_thread_init(void *drcontext)
{
/* create an instance of our data structure for this thread */
bb_counts *counts = (bb_counts *)dr_thread_alloc(drcontext, sizeof(bb_counts));
/* store it in the slot provided in the drcontext */
dr_set_tls_field(drcontext, counts);
memset(counts, 0, sizeof(bb_counts));
}
static void
event_thread_exit(void *drcontext)
{
bb_counts *counts = (bb_counts *) dr_get_tls_field(drcontext);
/* NOTE - if we so choose we could report per-thread sizes here. */
dr_mutex_lock(count_lock);
counts_dynamic.blocks += counts->blocks;
counts_dynamic.total_size += counts->total_size;
dr_mutex_unlock(count_lock);
dr_thread_free(drcontext, counts, sizeof(bb_counts));
}
static dr_emit_flags_t
event_basic_block(void *drcontext, void *tag, instrlist_t *bb,
bool for_trace, bool translating)
{
uint num_instructions = 0;
instr_t *instr, *where = NULL;
bool eflags_saved = true;
bb_counts *counts = (bb_counts *) dr_get_tls_field(drcontext);
/* count the number of instructions in this block */
for (instr = instrlist_first(bb); instr != NULL; instr = instr_get_next(instr)) {
/* Since it doesn't matter where we insert, look for a place
* where the eflags are dead. */
uint flags = instr_get_arith_flags(instr, DR_QUERY_DEFAULT);
if (TESTALL(EFLAGS_WRITE_6, flags) && !TESTANY(EFLAGS_READ_6, flags)) {
where = instr;
eflags_saved = false;
}
num_instructions++;
}
/* update the as-built counts */
dr_mutex_lock(as_built_lock);
counts_as_built.blocks++;
counts_as_built.total_size += num_instructions;
if (eflags_saved)
bbs_eflags_saved++;
dr_mutex_unlock(as_built_lock);
/* increment counters */
if (eflags_saved) {
where = instrlist_first(bb);
dr_save_arith_flags(drcontext, bb, where, SPILL_SLOT_1);
}
+ /* If all thread-private caches we can use direct addressing. */
+ if (!dr_using_all_private_caches()) {
/* Spill a register to get a pointer to our TLS structure. */
dr_save_reg(drcontext, bb, where, DR_REG_XDI, SPILL_SLOT_2);
dr_insert_read_tls_field(drcontext, bb, where, DR_REG_XDI);
+ }
#ifdef X86_32
/* Since the counters are 64-bit we must use an add an addc to increment.
* The operations is still effectively atomic since we're only increasing
* the count. */
instrlist_meta_preinsert(bb, where,
INSTR_CREATE_add(drcontext,
! dr_using_all_private_caches() ?
! OPND_CREATE_ABSMEM((byte *)&counts->blocks, OPSZ_4) :
! OPND_CREATE_MEM32(DR_REG_XDI, offsetof(bb_counts, blocks)),
OPND_CREATE_INT8(1)));
instrlist_meta_preinsert(bb, where,
INSTR_CREATE_adc(drcontext,
! dr_using_all_private_caches() ?
! OPND_CREATE_ABSMEM((byte *)&counts->blocks +4, OPSZ_4) :
! OPND_CREATE_MEM32(DR_REG_XDI, offsetof(bb_counts, blocks)+4),
OPND_CREATE_INT8(0)));
instrlist_meta_preinsert(bb, where,
INSTR_CREATE_add(drcontext,
! dr_using_all_private_caches() ?
! OPND_CREATE_ABSMEM((byte *)&counts->total_size, OPSZ_4) :
! OPND_CREATE_MEM32(DR_REG_XDI, offsetof(bb_counts, total_size)),
OPND_CREATE_INT_32OR8(num_instructions)));
instrlist_meta_preinsert(bb, where,
INSTR_CREATE_adc(drcontext,
! dr_using_all_private_caches() ?
! OPND_CREATE_ABSMEM((byte *)&counts->total_size +4, OPSZ_4) :
! OPND_CREATE_MEM32(DR_REG_XDI, offsetof(bb_counts, total_size)+4),
OPND_CREATE_INT8(0)));
#else /* X86_64 */
instrlist_meta_preinsert(bb, where,
INSTR_CREATE_inc(drcontext,
! dr_using_all_private_caches() ?
! OPND_CREATE_ABSMEM((byte *)&counts->blocks, OPSZ_8) :
! OPND_CREATE_MEM64(DR_REG_XDI, offsetof(bb_counts, blocks))));
instrlist_meta_preinsert(bb, where,
INSTR_CREATE_add(drcontext,
! dr_using_all_private_caches() ?
! OPND_CREATE_ABSMEM((byte *)&counts->total_size, OPSZ_8) :
! OPND_CREATE_MEM64(DR_REG_XDI, offsetof(bb_counts, total_size)),
OPND_CREATE_INT_32OR8(num_instructions)));
#endif
+ if (!dr_using_all_private_caches()) {
/* Restore spilled register. */
dr_restore_reg(drcontext, bb, where, DR_REG_XDI, SPILL_SLOT_2);
+ }
if (eflags_saved) {
dr_restore_arith_flags(drcontext, bb, where, SPILL_SLOT_1);
}
return DR_EMIT_DEFAULT;
}
\endcode
[ \ref API_tutorial_bbdynsize7 "prev" | \subpage API_tutorial_bbdynsize9 "next" ]
\page API_tutorial_bbdynsize9 average_bb_size.p9
The final multi-core optimized version. If we are single core then the LOCK version
is probably faster unless we are running with -thread_private in which case it's probably
close to equivalent.
\code
#include "dr_api.h"
#include <stddef.h>
#ifdef WINDOWS
# define DISPLAY_STRING(msg) dr_messagebox(msg)
#else
# define DISPLAY_STRING(msg) dr_printf("%s\n", msg)
#endif
#define TESTALL(mask, var) (((mask) & (var)) == (mask))
#define TESTANY(mask, var) (((mask) & (var)) != 0)
typedef struct bb_counts {
uint64 blocks;
uint64 total_size;
} bb_counts;
static bb_counts counts_as_built;
void *as_built_lock;
static bb_counts counts_dynamic;
void *count_lock;
/* Protected by the as_built_lock */
static uint64 bbs_eflags_saved;
static void
event_exit(void);
static dr_emit_flags_t
event_basic_block(void *drcontext, void *tag, instrlist_t *bb,
bool for_trace, bool translating);
static void
event_thread_init(void *drcontext);
static void
event_thread_exit(void *drcontext);
DR_EXPORT void
dr_client_main(client_id_t id, int argc, const char *argv[])
{
/* register events */
dr_register_exit_event(event_exit);
dr_register_bb_event(event_basic_block);
dr_register_thread_init_event(event_thread_init);
dr_register_thread_exit_event(event_thread_exit);
/* initialize lock */
as_built_lock = dr_mutex_create();
count_lock = dr_mutex_create();
}
static void
event_exit(void)
{
/* Display results - we must first snpritnf the string as on windows
* dr_printf(), dr_messagebox() and dr_fprintf() can't print floats. */
char msg[512];
int len;
len = snprintf(msg, sizeof(msg)/sizeof(msg[0]),
"Number of blocks built : %"UINT64_FORMAT_CODE"\n"
" Average size : %5.2lf instructions\n"
" Num saved eflags : %"UINT64_FORMAT_CODE"\n"
"Number of blocks executed : %"UINT64_FORMAT_CODE"\n"
" Average weighted size : %5.2lf instructions\n",
counts_as_built.blocks,
counts_as_built.total_size / (double)counts_as_built.blocks,
bbs_eflags_saved,
counts_dynamic.blocks,
counts_dynamic.total_size / (double)counts_dynamic.blocks);
DR_ASSERT(len > 0);
msg[sizeof(msg)/sizeof(msg[0])-1] = '\0';
DISPLAY_STRING(msg);
/* free mutex */
dr_mutex_destroy(as_built_lock);
dr_mutex_destroy(count_lock);
}
static void
event_thread_init(void *drcontext)
{
/* create an instance of our data structure for this thread */
bb_counts *counts = (bb_counts *)dr_thread_alloc(drcontext, sizeof(bb_counts));
/* store it in the slot provided in the drcontext */
dr_set_tls_field(drcontext, counts);
memset(counts, 0, sizeof(bb_counts));
}
static void
event_thread_exit(void *drcontext)
{
bb_counts *counts = (bb_counts *) dr_get_tls_field(drcontext);
/* NOTE - if we so choose we could report per-thread sizes here. */
dr_mutex_lock(count_lock);
counts_dynamic.blocks += counts->blocks;
counts_dynamic.total_size += counts->total_size;
dr_mutex_unlock(count_lock);
dr_thread_free(drcontext, counts, sizeof(bb_counts));
}
static dr_emit_flags_t
event_basic_block(void *drcontext, void *tag, instrlist_t *bb,
bool for_trace, bool translating)
{
uint num_instructions = 0;
instr_t *instr, *where = NULL;
bool eflags_saved = true;
bb_counts *counts = (bb_counts *) dr_get_tls_field(drcontext);
/* count the number of instructions in this block */
for (instr = instrlist_first(bb); instr != NULL; instr = instr_get_next(instr)) {
/* Since it doesn't matter where we insert, look for a place
* where the eflags are dead. */
uint flags = instr_get_arith_flags(instr, DR_QUERY_DEFAULT);
if (TESTALL(EFLAGS_WRITE_6, flags) && !TESTANY(EFLAGS_READ_6, flags)) {
where = instr;
eflags_saved = false;
}
num_instructions++;
}
/* update the as-built counts */
dr_mutex_lock(as_built_lock);
counts_as_built.blocks++;
counts_as_built.total_size += num_instructions;
if (eflags_saved)
bbs_eflags_saved++;
dr_mutex_unlock(as_built_lock);
/* increment counters */
if (eflags_saved) {
where = instrlist_first(bb);
dr_save_arith_flags(drcontext, bb, where, SPILL_SLOT_1);
}
/* If all thread-private caches we can use direct addressing. */
if (!dr_using_all_private_caches()) {
/* Spill a register to get a pointer to our TLS structure. */
dr_save_reg(drcontext, bb, where, DR_REG_XDI, SPILL_SLOT_2);
dr_insert_read_tls_field(drcontext, bb, where, DR_REG_XDI);
}
#ifdef X86_32
/* Since the counters are 64-bit we must use an add an addc to increment.
* The operations is still effectively atomic since we're only increasing
* the count. */
instrlist_meta_preinsert(bb, where,
INSTR_CREATE_add(drcontext,
dr_using_all_private_caches() ?
OPND_CREATE_ABSMEM((byte *)&counts->blocks, OPSZ_4) :
OPND_CREATE_MEM32(DR_REG_XDI, offsetof(bb_counts, blocks)),
OPND_CREATE_INT8(1)));
instrlist_meta_preinsert(bb, where,
INSTR_CREATE_adc(drcontext,
dr_using_all_private_caches() ?
OPND_CREATE_ABSMEM((byte *)&counts->blocks +4, OPSZ_4) :
OPND_CREATE_MEM32(DR_REG_XDI, offsetof(bb_counts, blocks)+4),
OPND_CREATE_INT8(0)));
instrlist_meta_preinsert(bb, where,
INSTR_CREATE_add(drcontext,
dr_using_all_private_caches() ?
OPND_CREATE_ABSMEM((byte *)&counts->total_size, OPSZ_4) :
OPND_CREATE_MEM32(DR_REG_XDI, offsetof(bb_counts, total_size)),
OPND_CREATE_INT_32OR8(num_instructions)));
instrlist_meta_preinsert(bb, where,
INSTR_CREATE_adc(drcontext,
dr_using_all_private_caches() ?
OPND_CREATE_ABSMEM((byte *)&counts->total_size +4, OPSZ_4) :
OPND_CREATE_MEM32(DR_REG_XDI, offsetof(bb_counts, total_size)+4),
OPND_CREATE_INT8(0)));
#else /* X86_64 */
instrlist_meta_preinsert(bb, where,
INSTR_CREATE_inc(drcontext,
dr_using_all_private_caches() ?
OPND_CREATE_ABSMEM((byte *)&counts->blocks, OPSZ_8) :
OPND_CREATE_MEM64(DR_REG_XDI, offsetof(bb_counts, blocks))));
instrlist_meta_preinsert(bb, where,
INSTR_CREATE_add(drcontext,
dr_using_all_private_caches() ?
OPND_CREATE_ABSMEM((byte *)&counts->total_size, OPSZ_8) :
OPND_CREATE_MEM64(DR_REG_XDI, offsetof(bb_counts, total_size)),
OPND_CREATE_INT_32OR8(num_instructions)));
#endif
if (!dr_using_all_private_caches()) {
/* Restore spilled register. */
dr_restore_reg(drcontext, bb, where, DR_REG_XDI, SPILL_SLOT_2);
}
if (eflags_saved) {
dr_restore_arith_flags(drcontext, bb, where, SPILL_SLOT_1);
}
return DR_EMIT_DEFAULT;
}
\endcode
[ \ref API_tutorial_bbdynsize8 "prev" | \ref API_tutorial "back" ]
**/
/**
\page API_tutorial_annotation1 create_annotation
In this tutorial we'll create a new annotation MEMCLIENT_MARK_MEM_DEFINED_IF_ADDRESSABLE()
for use in a hypothetical client named MemClient that detects use of uninitialized
variables. Since an annotation definition has several detailed requirements, it will be
easiest to start with a copy of a DynamoRIO annotation and transform it into the new
annotation. The first step is to copy the DynamoRIO souce file
<b>core/lib/dr_annotations.h</b>:
\code
#ifndef _DYNAMORIO_ANNOTATIONS_H_
#define _DYNAMORIO_ANNOTATIONS_H_ 1
#include "dr_annotations_asm.h"
/* To simplify project configuration, this pragma excludes the file from GCC warnings. */
#ifdef __GNUC__
# pragma GCC system_header
#endif
#define DYNAMORIO_ANNOTATE_RUNNING_ON_DYNAMORIO() \
dynamorio_annotate_running_on_dynamorio()
#define DYNAMORIO_ANNOTATE_LOG(format, ...) \
DR_ANNOTATION(dynamorio_annotate_log, format, ##__VA_ARGS__)
#ifdef __cplusplus
extern "C" {
#endif
DR_DECLARE_ANNOTATION(char, dynamorio_annotate_running_on_dynamorio, (void));
DR_DECLARE_ANNOTATION(unsigned int, dynamorio_annotate_log, (const char *format, ...));
#ifdef __cplusplus
}
#endif
#endif
\endcode
[ \ref API_tutorial "prev" | \subpage API_tutorial_annotation2 "next" ]
\page API_tutorial_annotation2 create_annotation.p2
Next, modify elements of the annotation macro and declaration according to its new
purpose:
1. Change the name;
2. Change the return type to void;
3. Replace the formal parameter list.
\code
+#ifndef _MEMCLIENT_ANNOTATIONS_H_
+#define _MEMCLIENT_ANNOTATIONS_H_ 1
#include "dr_annotations_asm.h"
/* To simplify project configuration, this pragma excludes the file from GCC warnings. */
#ifdef __GNUC__
# pragma GCC system_header
#endif
+#define MEMCLIENT_MARK_MEM_DEFINED_IF_ADDRESSABLE(start, size) \
+ DR_ANNOTATION(memclient_mark_mem_defined_if_addressable, start, size)
#ifdef __cplusplus
extern "C" {
#endif
+DR_DECLARE_ANNOTATION(void, memclient_mark_mem_defined_if_addressable,
+ (void *p, size_t size));
#ifdef __cplusplus
}
#endif
#endif
\endcode
Note that one of the original annotations has been deleted. This completes the header for
the new annotation, and now the source must be similarly copied and modified.
[ \ref API_tutorial_annotation1 "prev" | \subpage API_tutorial_annotation3 "next" ]
\page API_tutorial_annotation3 create_annotation.p3
To create the annotation source file, first copy <b>core/lib/dr_annotations.c</b>:
\code
#include "dr_annotations.h"
DR_DEFINE_ANNOTATION(char, dynamorio_annotate_running_on_dynamorio, (void), return 0)
DR_DEFINE_ANNOTATION(unsigned int, dynamorio_annotate_log, (const char *format, ...),
return 0)
\endcode
Note that the last argument to the macro specifies the body of the annotation function,
which in the case of these annotations must contain a return statement because the
annotations are non-void.
[ \ref API_tutorial_annotation2 "prev" | \subpage API_tutorial_annotation4 "next" ]
\page API_tutorial_annotation4 create_annotation.p4
Next, modify elements of the annotation definition to match the header:
\code
#include "dr_annotations.h"
+DR_DEFINE_ANNOTATION(void, memclient_mark_mem_defined_if_addressable,
+ (void *p, size_t size), )
\endcode
The last argument to the macro is now empty, since the annotation is now void and no
longer requires (or allows) a return statement.
[ \ref API_tutorial_annotation3 "prev" | \subpage API_tutorial_annotation5 "next" ]
\page API_tutorial_annotation5 create_annotation.p5
Target applications will now be able to mark any region of memory as defined by simply
invoking the macro MEMCLIENT_MARK_MEM_DEFINED_IF_ADDRESSABLE(). To implement the
corresponding functionality in memclient, the client developer creates a handler
function having the same signature as the annotation function and registers it for the
annotation using dr_annotation_register_call(). For example:
\code
#include "dr_api.h"
void handle_mark_mem_defined_if_addressable(void *p, size_t size)
{
size_t *skip_size = dr_global_alloc(sizeof(size_t));
hashtable_add_replace(skip_init_check_table, p, skip_size);
}
DR_EXPORT void
dr_client_main(client_id_t id, int argc, const char *argv[])
{
dr_annotation_register_call("memclient_mark_mem_defined_if_addressable",
handle_mark_mem_defined_if_addressable, false, 2);
}
\endcode
After registering the handler, all instances of the new annotation will be transformed
into a clean call to handle_mark_mem_defined_if_addressable(), where the arguments to
the annotation macro become arguments to the clean call.
[ \ref API_tutorial_annotation4 "prev" | \subpage API_tutorial_annotation6 "next" ]
\page API_tutorial_annotation6 create_annotation.p6
Since the annotation declarations and definitions are only referenced by target programs
(not by MemClient itself), no additions to the MemClient build are required. However, to
simplify integration of the new annotation into target programs, the MemClient
developer may wish to provide a cmake package that defines a function for configuring
annotations. DynamoRIO provides a similar function use_DynamoRIO_annotations() for its
default annotations, so it will be easiest to start with a copy of that function from
DynamoRIOConfig.cmake (which is located in the DynamoRIO source tree in
make/DynamoRIOConfig.cmake.in):
\verbatim
# For configuring target applications that use default DynamoRIO annotations
function (use_DynamoRIO_annotations target target_srcs)
set(dr_annotation_dir "${DynamoRIO_cwd}/../include/annotations")
set(dr_annotation_srcs "${dr_annotation_dir}/dr_annotations.c")
configure_DynamoRIO_annotation_sources("${dr_annotation_srcs}")
set(${target_srcs} ${${target_srcs}} ${dr_annotation_srcs} PARENT_SCOPE)
endfunction (use_DynamoRIO_annotations target target_srcs)
\endverbatim
[ \ref API_tutorial_annotation5 "prev" | \subpage API_tutorial_annotation7 "next" ]
\page API_tutorial_annotation7 create_annotation.p7
Next modify elements of the cmake function for usage with the MemClient annotations:
1. Replace DynamoRIO with the client name, e.g. MemClient;
2. Change the variable prefix from dr_ to a suitable prefix, e.g. memclient_;
3. Specify the client's annotation source file(s).
\verbatim
+# For configuring target applications that use MemClient annotations
+function (use_MemClient_annotations target target_srcs)
+ set(memclient_annotation_dir "${MemClient_cwd}/../include/annotations")
+ set(memclient_annotation_srcs "${memclient_annotation_dir}/memclient_annotations.c")
+ configure_DynamoRIO_annotation_sources("${memclient_annotation_srcs}")
+ set(${target_srcs} ${${target_srcs}} ${memclient_annotation_srcs} PARENT_SCOPE)
+endfunction (use_MemClient_annotations target target_srcs)
\endverbatim
Note that the cmake variable memclient_annotation_dir has been set to the same direcotry
as the default DynamoRIO annotation directory, but this is not required--the client
developer may choose to put the new annotations in any directory within the build output
area.
In more general terms, this cmake function takes the following steps:
1. Copies the MemClient annotation header and source file to the build output area;
2. Assigns the required compiler flags;
3. Adds the source file to the project source list.
Target applications that are not built with cmake will need to take these same 3 steps in
a comparable way.
[ \ref API_tutorial_annotation6 "prev" | \ref API_tutorial "back" ]
**/
/**
\page API_tutorial_steal_reg1 steal_reg
<b>coming soon</b>
[ \ref API_tutorial "back" ]
**/
/**
\page API_tutorial_prefetch1 prefetch
<b>coming soon</b>
[ \ref API_tutorial "back" ]
**/