blob: 9ab3d314351bfc0ccf8fce6eb74f89fcbd0d6d59 [file] [log] [blame] [edit]
/* **********************************************************
* Copyright (c) 2011 Google, Inc. All rights reserved.
* Copyright (c) 2004-2009 VMware, Inc. All rights reserved.
* **********************************************************/
/*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* * Neither the name of VMware, Inc. nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
/* Copyright (c) 2004-2007 Determina Corp. */
/*
* stats.c - statistics related functionality
*/
/*
* Note that timer numbers from TSC cannot fully adjust for thread
* context switches. We truly want a virtual time stamp counter that
* is thread specific, yet without OS support that's not possible. If
* we could read the ETHREAD data that counts number of context
* switches, we should also regularly print the ThreadTimes data from
* ZwQueryInformationThread for sanity checking. See
* KSTAT_OUTLIER_THRESHOLD_MS for the current solution of not adding
* up at least the largest outliers, although it can't help with all.
*
* We could also add sampling collected statistics on platforms on which
* we can get custom action on profiling interrupts (Linux only).
*
* Numbers don't seem to be very reliable in VMware which isn't that
* surprising - considering RDTSC is not easy to virtualize. Even the
* QueryPerformanceFrequency call is not producing meaningful data,
* and minimums of 1 are often seen.
*/
#include "globals.h"
#include "dr_stats.h"
#include "stats.h"
#include <string.h> /* for memset */
#ifdef KSTATS
#ifdef KSTAT_UNIT_TEST
/* FIXME: add a Makefile rule for this */
kstat_variables_t tkv;
kstat_stack_t ks;
#endif
/* STATIC forward declarations */
static void
kstat_merge_var(kstat_variable_t *destination, kstat_variable_t *source);
static void
kstat_init_variable(kstat_variable_t *kv)
{
memset(kv, 0x0, sizeof(kstat_variable_t));
kv->min_cum = (timestamp_t)-1;
}
/* process all sum equations, any more complicated expressions should
* better be done out of the core
*/
static void
kstats_evaluate_expressions(kstat_variables_t *kvars)
{
/* sum can be recomputed at any time and target is reinitialized,
* all chained KSTAT_SUM equations should appear in evaluation order
*/
#define KSTAT_SUM(desc, name, var1, var2) \
kstat_init_variable(&kvars->name); \
kstat_merge_var(&kvars->name, &kvars->var1); \
kstat_merge_var(&kvars->name, &kvars->var2);
#define KSTAT_DEF(desc, name) /* nothing to do */
#include "kstatsx.h"
#undef KSTAT_SUM
#undef KSTAT_DEF
}
/* equivalent to KSTAT_DEF for the rest of the file */
#define KSTAT_SUM(desc, name, var1, var2) KSTAT_DEF(desc, name)
static void
kstat_init_variables(kstat_variables_t *ks)
{
#define KSTAT_DEF(desc, name) kstat_init_variable(&ks->name);
#include "kstatsx.h"
#undef KSTAT_DEF
}
/* There is no good minimum value here since a thread can get switched
* back in a shorter time slice, in case another thread is waiting or
* has yielded its share. If valid measurements are indeed taking
* longer than 1ms then checkpoints in between will be needed to count
* these properly. However, a millisecond is quite a lot of time and
* we shouldn't be doing anything like that.
*/
enum {KSTAT_OUTLIER_THRESHOLD_MS = 1}; /* 1 ms for now */
timestamp_t kstat_ignore_context_switch;
static timestamp_t
kstat_frequency_per_msec;
/* PR 312534: reduce stack usage (gcc does not combine locals) */
static void
kstat_print_individual(file_t outf, kstat_variable_t *kv,
const char *name, const char *desc)
{
print_file(outf, "%20s:"FIXED_TIMESTAMP_FORMAT" totc,"
"%8u num,"
FIXED_TIMESTAMP_FORMAT" minc,"
FIXED_TIMESTAMP_FORMAT" avg,"
FIXED_TIMESTAMP_FORMAT" maxc,"
FIXED_TIMESTAMP_FORMAT" self,"
FIXED_TIMESTAMP_FORMAT" sub,"
"\n"
" "
FIXED_TIMESTAMP_FORMAT" ms,"
FIXED_TIMESTAMP_FORMAT" ms out,"
"%s\n",
name,
kv->total_self + kv->total_sub,
kv->num_self,
(kv->min_cum == (timestamp_t)-1) ? 0 : kv->min_cum,
(kv->total_self + kv->total_sub) / kv->num_self,
kv->max_cum,
kv->total_self,
kv->total_sub,
(kv->total_self + kv->total_sub) / kstat_frequency_per_msec,
kv->total_outliers/ kstat_frequency_per_msec,
desc);
}
static void
kstat_report(file_t outf, kstat_variables_t *ks)
{
kstats_evaluate_expressions(ks);
/* FIXME: Outliers may make the minc number appear smaller than
* real, should at least mark with a '*' */
#define KSTAT_DEF(desc, name) \
if (ks->name.num_self) \
kstat_print_individual(outf, &ks->name, #name, desc);
#include "kstatsx.h"
#undef KSTAT_DEF
}
DECLARE_CXTSWPROT_VAR(mutex_t process_kstats_lock, INIT_LOCK_FREE(process_kstats_lock));
DECLARE_NEVERPROT_VAR(kstat_variables_t process_kstats, {{0,}});
DECLARE_NEVERPROT_VAR(file_t process_kstats_outfile, INVALID_FILE);
/* Log files are only needed for non-debug builds. */
#ifndef DEBUG
static const char *
kstats_main_logfile_name(void)
{
return "process-kstats";
}
static const char *
kstats_thread_logfile_name(void)
{
return "kstats";
}
#endif
void
kstat_init()
{
kstat_frequency_per_msec = get_timer_frequency();
kstat_ignore_context_switch = KSTAT_OUTLIER_THRESHOLD_MS * kstat_frequency_per_msec;
LOG(GLOBAL, LOG_STATS, 1, "Processor speed: "UINT64_FORMAT_STRING"MHz\n",
kstat_frequency_per_msec/1000);
/* FIXME: There is no check for TSC feature and whether CR4.TSD is set
* so we can read it at CPL 3
*/
if (!DYNAMO_OPTION(kstats))
return;
kstat_init_variables(&process_kstats);
#ifdef DEBUG
process_kstats_outfile = GLOBAL;
#else
/* Open a process-wide kstats file. open_thread_private_file() does the
* job when passed the appropriate basename (2nd arg).
*/
process_kstats_outfile =
open_log_file(kstats_main_logfile_name(), NULL, 0);
#endif
}
void
kstat_exit()
{
if (!DYNAMO_OPTION(kstats))
return;
/* report merged process statistics */
mutex_lock(&process_kstats_lock);
print_file(process_kstats_outfile, "Process KSTATS:\n");
kstat_report(process_kstats_outfile, &process_kstats);
mutex_unlock(&process_kstats_lock);
DELETE_LOCK(process_kstats_lock);
#ifndef DEBUG
os_close(process_kstats_outfile);
#endif
}
static void
kstat_calibrate()
{
uint i;
static bool kstats_calibrated = false;
if (kstats_calibrated)
return;
kstats_calibrated = true; /* slight innocent race */
/* FIXME: once we calculate the overhead of calibrate_empty we can
* subtract that from every self_time measurement.
* FIXME: The cost of
* overhead_nested-overhead_empty should be subtracted from each
* subpath_time.
*/
for (i=0; i<10000; i++) {
KSTART(overhead_nested);
KSTART(overhead_empty);
KSTOP_NOT_PROPAGATED(overhead_empty);
KSTOP(overhead_nested);
}
}
void
kstat_thread_init(dcontext_t *dcontext)
{
thread_kstats_t *new_thread_kstats;
if (!DYNAMO_OPTION(kstats))
return; /* dcontext->thread_kstats stays NULL */
/* allocated on thread heap - use global if timing initialization matters */
new_thread_kstats = HEAP_TYPE_ALLOC(dcontext, thread_kstats_t, ACCT_STATS, UNPROTECTED);
LOG(THREAD, LOG_STATS, 2, "thread_kstats="PFX" size=%d\n", new_thread_kstats,
sizeof(thread_kstats_t));
/* initialize any thread stats bookkeeping fields before assigning to dcontext */
kstat_init_variables(&new_thread_kstats->vars_kstats);
/* add a dummy node to save one branch in UPDATE_CURRENT_COUNTER */
new_thread_kstats->stack_kstats.depth = 1;
new_thread_kstats->thread_id = get_thread_id();
#ifdef DEBUG
new_thread_kstats->outfile_kstats = THREAD;
#else
new_thread_kstats->outfile_kstats =
open_log_file(kstats_thread_logfile_name(), NULL, 0);
#endif
dcontext->thread_kstats = new_thread_kstats;
/* need to do this in a thread after it's initialized */
kstat_calibrate();
KSTART_DC(dcontext, thread_measured);
LOG(THREAD, LOG_STATS, 2, "threads_started\n");
}
static void
kstat_merge_var(kstat_variable_t *destination, kstat_variable_t *source)
{
destination->num_self += source->num_self;
destination->total_self += source->total_self;
destination->total_sub += source->total_sub;
destination->total_outliers += source->total_outliers;
if (destination->min_cum > source->min_cum)
destination->min_cum = source->min_cum;
if (destination->max_cum < source->max_cum)
destination->max_cum = source->max_cum;
}
/* make sure sourcevars are merged in only once */
static void
kstat_merge(kstat_variables_t *destinationvars, kstat_variables_t *sourcevars)
{
#define KSTAT_DEF(desc, name) kstat_merge_var(&destinationvars->name, &sourcevars->name);
#include "kstatsx.h"
#undef KSTAT_DEF
}
void
dump_thread_kstats(dcontext_t *dcontext)
{
if (dcontext->thread_kstats == NULL)
return;
/* add thread id's in case outfile is rerouted to process_kstats_outfile */
print_file(dcontext->thread_kstats->outfile_kstats, "Thread %d KSTATS {\n",
dcontext->thread_kstats->thread_id);
kstat_report(dcontext->thread_kstats->outfile_kstats,
&dcontext->thread_kstats->vars_kstats);
print_file(dcontext->thread_kstats->outfile_kstats, "} KSTATS\n");
}
#ifdef DEBUG
/* We don't keep the variable name, but instead we lookup by addr when necessary */
/* Too convoluted solution but since not common case, we don't bother
* to initialize a name for each var in kstat_init_variables() */
static const char *
kstat_var_name(dcontext_t *dcontext, kstat_variable_t *kvar)
{
kstat_variables_t *kvs = &dcontext->thread_kstats->vars_kstats;
#define KSTAT_DEF(desc, name) \
if (kvar == &kvs->name) \
return #name;
#include "kstatsx.h"
#undef KSTAT_DEF
ASSERT_NOT_REACHED();
return "#ERROR#";
}
void
kstats_dump_stack(dcontext_t *dcontext)
{
uint i;
LOG(THREAD, 1, LOG_STATS, "Thread KSTAT stack:\n");
for (i = dcontext->thread_kstats->stack_kstats.depth - 1;
i > 0; i--) {
LOG(THREAD, 1, LOG_STATS, "[%d] "PIFX" %s\n",
i, &dcontext->thread_kstats->stack_kstats.node[i],
kstat_var_name(dcontext, dcontext->thread_kstats->stack_kstats.node[i].var));
}
}
#endif
void
kstat_thread_exit(dcontext_t *dcontext)
{
thread_kstats_t *old_thread_kstats = dcontext->thread_kstats;
if (old_thread_kstats == NULL)
return;
LOG(THREAD, LOG_ALL, 2, "kstat_thread_exit: kstats stack is:\n");
DOLOG(2, LOG_STATS, {kstats_dump_stack(dcontext);});
KSTOP_DC(dcontext, thread_measured);
ASSERT(old_thread_kstats->stack_kstats.depth == 1);
dump_thread_kstats(dcontext);
/* a good time to combine all of these with the global statistics */
mutex_lock(&process_kstats_lock);
kstat_merge(&process_kstats, &old_thread_kstats->vars_kstats);
mutex_unlock(&process_kstats_lock);
#ifdef DEBUG
/* for non-debug we do fast exit path and don't free local heap */
/* no clean up needed */
dcontext->thread_kstats = NULL; /* disable thread kstats before freeing memory */
HEAP_TYPE_FREE(dcontext, old_thread_kstats, thread_kstats_t, ACCT_STATS, UNPROTECTED);
#else
close_log_file(dcontext->thread_kstats->outfile_kstats);
#endif /* DEBUG */
}
#endif /* KSTATS */
#ifdef KSTAT_UNIT_TEST
uint
kstat_test()
{
KSTART(measured);
printf("test %d\n", __LINE__);
KSTART(empty);
KSTOP(empty);
KSTART(empty);
KSTOP(empty);
printf("test %d\n", __LINE__);
KSTART(dr_default);
KSWITCH(dr_existing_bb);
KSTOP_NOT_MATCHING(dr_default);
KSTART(dr_default);
KSTART(empty);
KSTOP(empty);
KSTOP_NOT_MATCHING(dr_default);
KSTART(dr_default);
KSTOP_NOT_MATCHING(dr_default);
printf("test %d\n", __LINE__);
KSTART(wait_event);
KSTOP_NOT_PROPAGATED(wait_event);
printf("test %d\n", __LINE__);
{
uint i;
for (i=0; i<100000; i++) {
KSTART(bb);
KSTOP(bb);
}
}
{
uint i,j,k;
for (i=0; i<100; i++) {
KSTART(iloop);
for (j=0; j<100; j++) {
KSTART(jloop);
for (k=0; k<100000; k++)
/* nothing */;
KSTOP(jloop);
}
KSTOP(iloop);
}
}
KSTART(syscalls);
KSTART(wait_event);
KSTOP_NOT_PROPAGATED(wait_event);
KSTART(wait_event);
KSTOP(wait_event);
KSTOP(syscalls);
printf("test %d\n", __LINE__);
KSTOP(measured);
}
int main()
{
kstat_init();
kstat_thread_init();
kstat_test();
kstat_thread_exit();
kstat_exit();
}
#endif /* KSTAT_UNIT_TEST */