blob: 10841c37f17125ff3a20bd19002430a7595c9156 [file] [log] [blame] [edit]
/* **********************************************************
* Copyright (c) 2004-2009 VMware, Inc. All rights reserved.
* **********************************************************/
/*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* * Neither the name of VMware, Inc. nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
/* Copyright (c) 2004-2007 Determina Corp. */
/*
* stats.h - statistics related exports and internal macros
*/
#ifndef STATS_H
#define STATS_H
#ifdef KSTATS
void
kstat_init(void);
void
kstat_exit(void);
void
kstat_thread_init(dcontext_t *dcontext);
void
kstat_thread_exit(dcontext_t *dcontext);
void
dump_thread_kstats(dcontext_t *dcontext);
/* for debugging only */
#ifdef DEBUG
void
kstats_dump_stack(dcontext_t *dcontext);
#endif
/* Kstat variable */
typedef struct {
/* number of executions */
uint num_self;
/* Currently only time data - more performance counters could be
* added in the future if necessary
*/
/* We could always measure total elapsed time in subroutines, by
* keeping their start times on the stack. However, it is better
* to use essentially a single running timer that gets reset every
* time we enter a new level, and then we add up all outstanding
* times. This allows us to be more selective not to time spent
* waiting or when context switched.
*
* It in effect lets use as a drop-in replacement a sampling based
* counter for time in self. If we could add an action, every
* time we get a sample we can increment the TOS entry.
* Currently, we are measuring time at start/stop to calculate the
* exact time with some overhead.
*/
timestamp_t total_self; /* measured time in this block */
timestamp_t total_sub; /* nested subpaths */
/* cumulative = self time + subpaths */
/* total time for all calls */
timestamp_t min_cum; /* most stable number for the common case */
timestamp_t max_cum; /* a high number shows need for subpaths */
timestamp_t total_outliers; /* attempt to account for context switches */
/* VTune also keeps self_wait time, but we do not propagate that time up */
} kstat_variable_t;
/* All kstat variables - expanded as a structure instead of an array
* referenced by index allows easy to read debugger pretty prints
*/
typedef struct {
#define KSTAT_DEF(desc, name) kstat_variable_t name;
#define KSTAT_SUM(description, name, var1, var2) kstat_variable_t name;
#include "kstatsx.h"
#undef KSTAT_SUM
#undef KSTAT_DEF
} kstat_variables_t;
/* Stack entry for an active execution node */
typedef struct {
kstat_variable_t *var;
timestamp_t self_time;
timestamp_t subpath_time;
timestamp_t outlier_time;
} kstat_node_t;
enum {KSTAT_MAX_DEPTH = 16};
/* Information about a current execution path */
typedef struct {
volatile uint depth; /* Volatile for signal safety. */
timestamp_t last_start_time;
timestamp_t last_end_time;
kstat_node_t node[KSTAT_MAX_DEPTH];
} kstat_stack_t;
/* Thread local context and collected data */
typedef struct {
thread_id_t thread_id;
kstat_variables_t vars_kstats;
kstat_stack_t stack_kstats;
file_t outfile_kstats;
} thread_kstats_t;
extern timestamp_t
kstat_ignore_context_switch;
#define KSTAT_THREAD_NO_PV_START(dc) do { \
dcontext_t *cur_dcontext = (dc); \
if (cur_dcontext != NULL && cur_dcontext->thread_kstats != NULL) { \
kstat_stack_t *ks = &cur_dcontext->thread_kstats->stack_kstats;
#define KSTAT_THREAD_NO_PV_END() \
} \
} while (0)
/* ensures statement can safely use ks=stack_kstats and pv=&vars_kstats.name,
*/
#define KSTAT_THREAD(name, statement) \
KSTAT_THREAD_NO_PV_START(get_thread_private_dcontext()) \
kstat_variable_t *pv = &cur_dcontext->thread_kstats-> \
vars_kstats.name; \
UNUSED_VARIABLE(pv); \
statement; \
KSTAT_THREAD_NO_PV_END()
#define KSTAT_OTHER_THREAD(dc, name, statement) \
KSTAT_THREAD_NO_PV_START(dc) \
kstat_variable_t *pv = &cur_dcontext->thread_kstats-> \
vars_kstats.name; \
UNUSED_VARIABLE(pv); \
statement; \
KSTAT_THREAD_NO_PV_END()
/* makes sure we're matching start/stop */
# define KSTAT_TOS_MATCHING(name) KSTAT_THREAD(name, kstat_tos_matching_var(ks, pv))
/* optional - serialize instruction stream before measurement on my
* laptop the overhead of an empty inner block is 95 cycles without
* serialization vs 222 cycles with serialization,
* too much overhead for little extra stability */
#define KSTAT_SERIALIZE_INSTRUCTIONS() /* no SERIALIZE_INSTRUCTIONS() */
/* optional - accumulate outliers separately for comparable analysis,
* for 88 vs 98 cycles in inner block - definitely worth it, e.g. two
* simultaneously run loops get same user CPU as single run, although
* wall clock time is twice longer.
*/
#ifdef KSTAT_NO_OUTLIERS
# define CORRECT_OUTLIER(stmt) /* ignore extra logic */
#else
/* We do not update self_time on outlier so that higher level stats
* are also able to discount the context switches from self.
*/
# define CORRECT_OUTLIER(stmt) stmt
#endif /* KSTAT_NO_OUTLIERS */
/* update current timer before starting a new one */
#define UPDATE_CURRENT_COUNTER(kstack) do { \
KSTAT_SERIALIZE_INSTRUCTIONS(); \
RDTSC_LL((kstack)->last_end_time); \
/* a dummy entry at 0 is always present, callers ASSERT */ \
CORRECT_OUTLIER( \
if ((kstack)->last_end_time - (kstack)->last_start_time > \
kstat_ignore_context_switch) \
(kstack)->node[(kstack)->depth - 1].outlier_time += \
(kstack)->last_end_time - (kstack)->last_start_time; \
else ) \
(kstack)->node[(kstack)->depth - 1].self_time += \
(kstack)->last_end_time - (kstack)->last_start_time; \
(kstack)->last_start_time = (kstack)->last_end_time; \
} while (0)
/* since we want to add kstats to logging functions we can't use LOG in it */
#define KSTAT_INTERNAL_DEBUG(kstack) \
LOG(THREAD, LOG_STATS, 3, "ks start="FIXED_TIMESTAMP_FORMAT \
" end="FIXED_TIMESTAMP_FORMAT \
" cur diff="FIXED_TIMESTAMP_FORMAT \
" depth=%d\n", \
(kstack)->last_start_time, (kstack)->last_end_time, \
(kstack)->node[(kstack)->depth - 1].self_time, (kstack)->depth);
#ifdef DEBUG
/* avoid recursion in logging KSTAT - do not checkin uses */
#define KSTAT_DUMP_STACK(kstack) \
if ((kstack)->node[(kstack)->depth - 1].var != \
&cur_dcontext->thread_kstats->vars_kstats.logging) { \
LOG(THREAD_GET, LOG_STATS, 1, "ks %s:%d\n"__FILE__, __LINE__); \
kstats_dump_stack(cur_dcontext); \
}
#endif
/* Most of these could be static inline's but DEBUG builds aren't
* inlining these and we want to minimize the measurement overhead
* FIXME: consider replacing them if we have too many copies, it will
* also clean up the argument evaluation in KSTAT_THREAD_NO_PV.
*
* KSTAT_THREAD encloses these statements in a do {} while (0) block,
* FIXME: if DEBUG stats are at all interesting may want to remove the
* one's from here if a compiler in fact adds real loops - e.g. cl does
*/
/* starts a timer */
#define kstat_start_var(kstack, pvar) do { \
uint depth = (kstack)->depth; \
DODEBUG({if (depth >= KSTAT_MAX_DEPTH) \
kstats_dump_stack(cur_dcontext);}); \
ASSERT(depth < KSTAT_MAX_DEPTH \
&& "probably missing a STOP on return"); \
/* Increment depth first for re-entrancy. */ \
(kstack)->depth++; \
(kstack)->node[depth].var = pvar; \
UPDATE_CURRENT_COUNTER((kstack)); \
/* reset new counter */ \
(kstack)->node[depth].subpath_time = 0; \
(kstack)->node[depth].self_time = 0; \
(kstack)->node[depth].outlier_time = 0; \
} while(0)
/* updates which variable will be counted */
#define kstat_switch_var(kstack, pvar) (kstack)->node[(kstack)->depth - 1].var = (pvar)
/* allow mismatched start/stop - for use with KSWITCH */
#define kstat_stop_not_matching_var(kstack, pvar) do { \
timestamp_t last_cum; \
kstat_stop_not_propagated_var(kstack, pvar, &last_cum); \
if ((kstack)->depth > 0) /* update subpath in parent */ \
(kstack)->node[(kstack)->depth - 1].subpath_time += last_cum; \
} while (0)
#define kstat_stop_matching_var(kstack, pvar) do { \
DODEBUG({if (ks->node[ks->depth - 1].var != pvar) \
kstats_dump_stack(cur_dcontext);}); \
ASSERT(ks->node[ks->depth - 1].var == pvar \
&& "stop not matching TOS"); \
kstat_stop_not_matching_var(ks, pvar); \
} while (0)
#define kstat_tos_matching_var(kstack, pvar) (ks->node[ks->depth - 1].var == pv)
#define kstat_stop_not_propagated_var(kstack, pvar, pcum) do { \
uint depth = (kstack)->depth - 1; \
ASSERT((kstack)->depth > 1); \
UPDATE_CURRENT_COUNTER((kstack)); \
(kstack)->node[depth].var->num_self++; \
(kstack)->node[depth].var->total_self += \
(kstack)->node[depth].self_time; \
(kstack)->node[depth].var->total_sub += \
(kstack)->node[depth].subpath_time; \
(kstack)->node[depth].var->total_outliers += \
(kstack)->node[depth].outlier_time; \
*pcum = (kstack)->node[depth].self_time + \
(kstack)->node[depth].subpath_time; \
/* FIXME: an outlier should be counted as a NaN for outliers on subpaths */ \
if (*pcum > 0 && (kstack)->node[depth].var->min_cum > *pcum) \
(kstack)->node[depth].var->min_cum = *pcum; \
if ((kstack)->node[depth].var->max_cum < *pcum) \
(kstack)->node[depth].var->max_cum = *pcum; \
/* Decrement after reads for re-entrancy. */ \
(kstack)->depth--; \
} while (0)
/* FIXME: we may have to add a type argument to KSTAT_DEF saying whether
* a variable should be propagated or not - here we assume all are propagated
*/
#define kstat_stop_rewind_var(kstack, pvar) do { \
kstat_stop_not_matching_var(kstack, ignored); \
} while (kstack->node[kstack->depth /* the removed */].var != pvar)
/* This is essentially rewind-until, stopping BEFORE deleting pvar
* FIXME: we may have to add a type argument to KSTAT_DEF saying whether
* a variable should be propagated or not - here we assume all are propagated
*/
#define kstat_stop_longjmp_var(kstack, pvar) \
while (kstack->node[kstack->depth - 1 /* to be removed */].var != pvar) { \
kstat_stop_not_matching_var(kstack, ignored); \
}
#endif /* KSTATS */
#endif /* STATS_H */