| /* ********************************************************** |
| * Copyright (c) 2004-2009 VMware, Inc. All rights reserved. |
| * **********************************************************/ |
| |
| /* |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are met: |
| * |
| * * Redistributions of source code must retain the above copyright notice, |
| * this list of conditions and the following disclaimer. |
| * |
| * * Redistributions in binary form must reproduce the above copyright notice, |
| * this list of conditions and the following disclaimer in the documentation |
| * and/or other materials provided with the distribution. |
| * |
| * * Neither the name of VMware, Inc. nor the names of its contributors may be |
| * used to endorse or promote products derived from this software without |
| * specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| * ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE |
| * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
| * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
| * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH |
| * DAMAGE. |
| */ |
| |
| /* Copyright (c) 2004-2007 Determina Corp. */ |
| |
| /* |
| * stats.h - statistics related exports and internal macros |
| */ |
| |
| #ifndef STATS_H |
| #define STATS_H |
| |
| #ifdef KSTATS |
| void |
| kstat_init(void); |
| void |
| kstat_exit(void); |
| void |
| kstat_thread_init(dcontext_t *dcontext); |
| void |
| kstat_thread_exit(dcontext_t *dcontext); |
| void |
| dump_thread_kstats(dcontext_t *dcontext); |
| |
| /* for debugging only */ |
| #ifdef DEBUG |
| void |
| kstats_dump_stack(dcontext_t *dcontext); |
| #endif |
| |
| /* Kstat variable */ |
| typedef struct { |
| /* number of executions */ |
| uint num_self; |
| /* Currently only time data - more performance counters could be |
| * added in the future if necessary |
| */ |
| |
| /* We could always measure total elapsed time in subroutines, by |
| * keeping their start times on the stack. However, it is better |
| * to use essentially a single running timer that gets reset every |
| * time we enter a new level, and then we add up all outstanding |
| * times. This allows us to be more selective not to time spent |
| * waiting or when context switched. |
| * |
| * It in effect lets use as a drop-in replacement a sampling based |
| * counter for time in self. If we could add an action, every |
| * time we get a sample we can increment the TOS entry. |
| * Currently, we are measuring time at start/stop to calculate the |
| * exact time with some overhead. |
| */ |
| |
| timestamp_t total_self; /* measured time in this block */ |
| timestamp_t total_sub; /* nested subpaths */ |
| /* cumulative = self time + subpaths */ |
| /* total time for all calls */ |
| timestamp_t min_cum; /* most stable number for the common case */ |
| timestamp_t max_cum; /* a high number shows need for subpaths */ |
| timestamp_t total_outliers; /* attempt to account for context switches */ |
| /* VTune also keeps self_wait time, but we do not propagate that time up */ |
| } kstat_variable_t; |
| |
| /* All kstat variables - expanded as a structure instead of an array |
| * referenced by index allows easy to read debugger pretty prints |
| */ |
| typedef struct { |
| #define KSTAT_DEF(desc, name) kstat_variable_t name; |
| #define KSTAT_SUM(description, name, var1, var2) kstat_variable_t name; |
| #include "kstatsx.h" |
| #undef KSTAT_SUM |
| #undef KSTAT_DEF |
| } kstat_variables_t; |
| |
| /* Stack entry for an active execution node */ |
| typedef struct { |
| kstat_variable_t *var; |
| timestamp_t self_time; |
| timestamp_t subpath_time; |
| timestamp_t outlier_time; |
| } kstat_node_t; |
| |
| enum {KSTAT_MAX_DEPTH = 16}; |
| |
| /* Information about a current execution path */ |
| typedef struct { |
| volatile uint depth; /* Volatile for signal safety. */ |
| timestamp_t last_start_time; |
| timestamp_t last_end_time; |
| kstat_node_t node[KSTAT_MAX_DEPTH]; |
| } kstat_stack_t; |
| |
| /* Thread local context and collected data */ |
| typedef struct { |
| thread_id_t thread_id; |
| kstat_variables_t vars_kstats; |
| kstat_stack_t stack_kstats; |
| file_t outfile_kstats; |
| } thread_kstats_t; |
| |
| extern timestamp_t |
| kstat_ignore_context_switch; |
| |
| #define KSTAT_THREAD_NO_PV_START(dc) do { \ |
| dcontext_t *cur_dcontext = (dc); \ |
| if (cur_dcontext != NULL && cur_dcontext->thread_kstats != NULL) { \ |
| kstat_stack_t *ks = &cur_dcontext->thread_kstats->stack_kstats; |
| |
| #define KSTAT_THREAD_NO_PV_END() \ |
| } \ |
| } while (0) |
| |
| /* ensures statement can safely use ks=stack_kstats and pv=&vars_kstats.name, |
| */ |
| #define KSTAT_THREAD(name, statement) \ |
| KSTAT_THREAD_NO_PV_START(get_thread_private_dcontext()) \ |
| kstat_variable_t *pv = &cur_dcontext->thread_kstats-> \ |
| vars_kstats.name; \ |
| UNUSED_VARIABLE(pv); \ |
| statement; \ |
| KSTAT_THREAD_NO_PV_END() |
| |
| #define KSTAT_OTHER_THREAD(dc, name, statement) \ |
| KSTAT_THREAD_NO_PV_START(dc) \ |
| kstat_variable_t *pv = &cur_dcontext->thread_kstats-> \ |
| vars_kstats.name; \ |
| UNUSED_VARIABLE(pv); \ |
| statement; \ |
| KSTAT_THREAD_NO_PV_END() |
| |
| /* makes sure we're matching start/stop */ |
| # define KSTAT_TOS_MATCHING(name) KSTAT_THREAD(name, kstat_tos_matching_var(ks, pv)) |
| |
| /* optional - serialize instruction stream before measurement on my |
| * laptop the overhead of an empty inner block is 95 cycles without |
| * serialization vs 222 cycles with serialization, |
| * too much overhead for little extra stability */ |
| #define KSTAT_SERIALIZE_INSTRUCTIONS() /* no SERIALIZE_INSTRUCTIONS() */ |
| |
| /* optional - accumulate outliers separately for comparable analysis, |
| * for 88 vs 98 cycles in inner block - definitely worth it, e.g. two |
| * simultaneously run loops get same user CPU as single run, although |
| * wall clock time is twice longer. |
| */ |
| #ifdef KSTAT_NO_OUTLIERS |
| # define CORRECT_OUTLIER(stmt) /* ignore extra logic */ |
| #else |
| /* We do not update self_time on outlier so that higher level stats |
| * are also able to discount the context switches from self. |
| */ |
| # define CORRECT_OUTLIER(stmt) stmt |
| #endif /* KSTAT_NO_OUTLIERS */ |
| |
| |
| /* update current timer before starting a new one */ |
| #define UPDATE_CURRENT_COUNTER(kstack) do { \ |
| KSTAT_SERIALIZE_INSTRUCTIONS(); \ |
| RDTSC_LL((kstack)->last_end_time); \ |
| /* a dummy entry at 0 is always present, callers ASSERT */ \ |
| CORRECT_OUTLIER( \ |
| if ((kstack)->last_end_time - (kstack)->last_start_time > \ |
| kstat_ignore_context_switch) \ |
| (kstack)->node[(kstack)->depth - 1].outlier_time += \ |
| (kstack)->last_end_time - (kstack)->last_start_time; \ |
| else ) \ |
| (kstack)->node[(kstack)->depth - 1].self_time += \ |
| (kstack)->last_end_time - (kstack)->last_start_time; \ |
| (kstack)->last_start_time = (kstack)->last_end_time; \ |
| } while (0) |
| |
| /* since we want to add kstats to logging functions we can't use LOG in it */ |
| #define KSTAT_INTERNAL_DEBUG(kstack) \ |
| LOG(THREAD, LOG_STATS, 3, "ks start="FIXED_TIMESTAMP_FORMAT \ |
| " end="FIXED_TIMESTAMP_FORMAT \ |
| " cur diff="FIXED_TIMESTAMP_FORMAT \ |
| " depth=%d\n", \ |
| (kstack)->last_start_time, (kstack)->last_end_time, \ |
| (kstack)->node[(kstack)->depth - 1].self_time, (kstack)->depth); |
| |
| #ifdef DEBUG |
| /* avoid recursion in logging KSTAT - do not checkin uses */ |
| #define KSTAT_DUMP_STACK(kstack) \ |
| if ((kstack)->node[(kstack)->depth - 1].var != \ |
| &cur_dcontext->thread_kstats->vars_kstats.logging) { \ |
| LOG(THREAD_GET, LOG_STATS, 1, "ks %s:%d\n"__FILE__, __LINE__); \ |
| kstats_dump_stack(cur_dcontext); \ |
| } |
| #endif |
| |
| /* Most of these could be static inline's but DEBUG builds aren't |
| * inlining these and we want to minimize the measurement overhead |
| * FIXME: consider replacing them if we have too many copies, it will |
| * also clean up the argument evaluation in KSTAT_THREAD_NO_PV. |
| * |
| * KSTAT_THREAD encloses these statements in a do {} while (0) block, |
| * FIXME: if DEBUG stats are at all interesting may want to remove the |
| * one's from here if a compiler in fact adds real loops - e.g. cl does |
| */ |
| |
| /* starts a timer */ |
| #define kstat_start_var(kstack, pvar) do { \ |
| uint depth = (kstack)->depth; \ |
| DODEBUG({if (depth >= KSTAT_MAX_DEPTH) \ |
| kstats_dump_stack(cur_dcontext);}); \ |
| ASSERT(depth < KSTAT_MAX_DEPTH \ |
| && "probably missing a STOP on return"); \ |
| /* Increment depth first for re-entrancy. */ \ |
| (kstack)->depth++; \ |
| (kstack)->node[depth].var = pvar; \ |
| UPDATE_CURRENT_COUNTER((kstack)); \ |
| /* reset new counter */ \ |
| (kstack)->node[depth].subpath_time = 0; \ |
| (kstack)->node[depth].self_time = 0; \ |
| (kstack)->node[depth].outlier_time = 0; \ |
| } while(0) |
| |
| /* updates which variable will be counted */ |
| #define kstat_switch_var(kstack, pvar) (kstack)->node[(kstack)->depth - 1].var = (pvar) |
| |
| /* allow mismatched start/stop - for use with KSWITCH */ |
| #define kstat_stop_not_matching_var(kstack, pvar) do { \ |
| timestamp_t last_cum; \ |
| kstat_stop_not_propagated_var(kstack, pvar, &last_cum); \ |
| if ((kstack)->depth > 0) /* update subpath in parent */ \ |
| (kstack)->node[(kstack)->depth - 1].subpath_time += last_cum; \ |
| } while (0) |
| |
| #define kstat_stop_matching_var(kstack, pvar) do { \ |
| DODEBUG({if (ks->node[ks->depth - 1].var != pvar) \ |
| kstats_dump_stack(cur_dcontext);}); \ |
| ASSERT(ks->node[ks->depth - 1].var == pvar \ |
| && "stop not matching TOS"); \ |
| kstat_stop_not_matching_var(ks, pvar); \ |
| } while (0) |
| |
| #define kstat_tos_matching_var(kstack, pvar) (ks->node[ks->depth - 1].var == pv) |
| |
| #define kstat_stop_not_propagated_var(kstack, pvar, pcum) do { \ |
| uint depth = (kstack)->depth - 1; \ |
| ASSERT((kstack)->depth > 1); \ |
| UPDATE_CURRENT_COUNTER((kstack)); \ |
| (kstack)->node[depth].var->num_self++; \ |
| (kstack)->node[depth].var->total_self += \ |
| (kstack)->node[depth].self_time; \ |
| (kstack)->node[depth].var->total_sub += \ |
| (kstack)->node[depth].subpath_time; \ |
| (kstack)->node[depth].var->total_outliers += \ |
| (kstack)->node[depth].outlier_time; \ |
| *pcum = (kstack)->node[depth].self_time + \ |
| (kstack)->node[depth].subpath_time; \ |
| /* FIXME: an outlier should be counted as a NaN for outliers on subpaths */ \ |
| if (*pcum > 0 && (kstack)->node[depth].var->min_cum > *pcum) \ |
| (kstack)->node[depth].var->min_cum = *pcum; \ |
| if ((kstack)->node[depth].var->max_cum < *pcum) \ |
| (kstack)->node[depth].var->max_cum = *pcum; \ |
| /* Decrement after reads for re-entrancy. */ \ |
| (kstack)->depth--; \ |
| } while (0) |
| |
| /* FIXME: we may have to add a type argument to KSTAT_DEF saying whether |
| * a variable should be propagated or not - here we assume all are propagated |
| */ |
| #define kstat_stop_rewind_var(kstack, pvar) do { \ |
| kstat_stop_not_matching_var(kstack, ignored); \ |
| } while (kstack->node[kstack->depth /* the removed */].var != pvar) |
| |
| /* This is essentially rewind-until, stopping BEFORE deleting pvar |
| * FIXME: we may have to add a type argument to KSTAT_DEF saying whether |
| * a variable should be propagated or not - here we assume all are propagated |
| */ |
| #define kstat_stop_longjmp_var(kstack, pvar) \ |
| while (kstack->node[kstack->depth - 1 /* to be removed */].var != pvar) { \ |
| kstat_stop_not_matching_var(kstack, ignored); \ |
| } |
| #endif /* KSTATS */ |
| |
| #endif /* STATS_H */ |