blob: 4238bef628c59df92a88b36482fd60667d970261 [file] [log] [blame]
// Copyright 2012 Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.
#include <dia2.h>
#include <iostream>
#include <map>
#include "base/file_path.h"
#include "base/win/scoped_comptr.h"
#include "syzygy/grinder/grinder.h"
namespace grinder {
typedef uint32 RVA;
// A worker class to sink profile trace events and output the aggregate data in
// KCacheGrind-compatible format.
// The profiler instrumentation captures the wall-clock time from entry to exit
// for each pair of caller/function for each invocation. This is termed
// "inclusive" time, as it includes the time spent in other functions called.
// The KCacheGrind file format also requires listing "exclusive" time for each
// function, where exclusive time is the amount of time spent executing the
// function itself, e.g. exclusive of the time spent calling other functions.
// The profile data is captured in a trace log. The trace log is a run of
// records where each record in the log is associated with a particular thread
// (and process), and contains a set of invocation records.
// Each invocation record contains inclusive wall-clock time (and potentially
// other inclusive metrics) for one or more invocations from a particular
// caller address, to a particular function.
// Note that the same caller/function pair may occur multiple times in a trace
// log, even for the same thread, as the profile instrumentation spills a trace
// record to the log when the number of caller/function pairs it's seen exeeds
// the size of the thread-local buffer used to aggregate the data.
// This class aggregates the data from a trace log, and builds a graph of
// function nodes and call edges. For each call edge, it aggregates the data
// from one or more log records, by summing up the call counts and inclusive
// metrics. For each function node, it also computes the exclusive cost, by
// summing up the cost of the incoming edges, and subtracting the cost of the
// outgoing edges.
// For information on the KCacheGrind file format, see:
class ProfileGrinder : public GrinderInterface {
// @name Accessors and mutators.
// @{
// If thread_parts is true, the grinder will aggregate and output
// separate parts for each thread seen in the trace file(s).
bool thread_parts() const { return thread_parts_; }
void set_thread_parts(bool thread_parts) { thread_parts_ = thread_parts; }
// @}
// @name GrinderInterface implementation.
// @{
virtual bool ParseCommandLine(const CommandLine* command_line) OVERRIDE;
virtual void SetParser(Parser* parser) OVERRIDE;
virtual bool Grind() OVERRIDE;
virtual bool OutputData(FILE* file) OVERRIDE;
// @}
// @name ParseEventHandler overrides.
// @{
virtual void OnInvocationBatch(
base::Time time,
DWORD process_id,
DWORD thread_id,
size_t num_invocations,
const TraceBatchInvocationInfo* data) OVERRIDE;
virtual void OnThreadName(base::Time time,
DWORD process_id,
DWORD thread_id,
const base::StringPiece& thread_name) OVERRIDE;
// @}
typedef sym_util::ModuleInformation ModuleInformation;
// Forward declarations.
struct PartData;
struct ModuleRVA;
struct Metrics;
struct InvocationNode;
struct InvocationEdge;
typedef std::set<ModuleInformation,
bool (*)(const ModuleInformation& a, const ModuleInformation& b)>
typedef std::map<ModuleRVA, InvocationNode> InvocationNodeMap;
typedef std::pair<ModuleRVA, ModuleRVA> InvocationEdgeKey;
typedef std::map<InvocationEdgeKey, InvocationEdge> InvocationEdgeMap;
typedef base::win::ScopedComPtr<IDiaSession> SessionPtr;
typedef std::map<const ModuleInformation*, SessionPtr> ModuleSessionMap;
bool GetSessionForModule(const ModuleInformation* module,
IDiaSession** session_out);
// Finds or creates the part data for the given @p thread_id.
PartData* FindOrCreatePart(DWORD process_id, DWORD thread_id);
// Retrieves the function containing @p address.
// @param symbol on success returns the function's private symbol, or
// public symbol if no private symbol is available.
// @returns true on success.
bool GetFunctionByRVA(IDiaSession* session,
RVA address,
IDiaSymbol** symbol);
bool GetInfoForCallerRVA(const ModuleRVA& caller,
RVA* function_rva,
size_t* line);
bool GetInfoForFunctionRVA(const ModuleRVA& function,
std::wstring* function_name,
std::wstring* file_name,
size_t* line);
// Converts an absolute address to an RVA.
void ConvertToModuleRVA(uint32 process_id,
trace::parser::AbsoluteAddress64 addr,
ModuleRVA* rva);
// Aggregates a single invocation info and/or creates a new node and edge.
void AggregateEntryToPart(const ModuleRVA& function_rva,
const ModuleRVA& caller_rva,
const InvocationInfo& info,
PartData* part);
// This functions adds all caller edges to each function node's linked list of
// callers. In so doing, it also computes each function node's inclusive cost.
// @returns true on success, false on failure.
bool ResolveCallers();
// Resolves callers for @p part.
bool ResolveCallersForPart(PartData* part);
// Outputs data for @p part to @p file.
bool OutputDataForPart(const PartData& part, FILE* file);
// Stores the modules we encounter.
ModuleInformationSet modules_;
// Stores the DIA session objects we have going for each module.
ModuleSessionMap module_sessions_;
// The parts we store. If thread_parts_ is false, we store only a single
// part with id 0.
typedef std::map<uint32, PartData> PartDataMap;
PartDataMap parts_;
// If true, data is aggregated and output per-thread.
bool thread_parts_;
Parser* parser_;
// The data we store for each part.
struct ProfileGrinder::PartData {
// The thread name for this part.
std::string thread_name_;
// The process ID for this part.
uint32 process_id_;
// The thread ID for this part.
uint32 thread_id_;
// Stores the invocation nodes, aka the functions.
InvocationNodeMap nodes_;
// Stores the invocation edges.
InvocationEdgeMap edges_;
// RVA in a module. The module should be a canonical pointer
// to the module information to make this comparable against
// other RVAs in the same module.
struct ProfileGrinder::ModuleRVA {
ModuleRVA() : module(NULL), rva(0) {
bool operator < (const ModuleRVA& o) const {
if (module > o.module)
return false;
if (module < o.module)
return true;
return rva < o.rva;
bool operator > (const ModuleRVA& o) const {
return o < *this;
bool operator == (const ModuleRVA& o) const {
return !(o < *this || *this < o);
bool operator != (const ModuleRVA& o) const {
return !(*this == o);
const sym_util::ModuleInformation* module;
RVA rva;
// The metrics we capture per function and per caller.
struct ProfileGrinder::Metrics {
Metrics() : num_calls(0), cycles_min(0), cycles_max(0), cycles_sum(0) {
uint64 num_calls;
uint64 cycles_min;
uint64 cycles_max;
uint64 cycles_sum;
// An invocation node represents a function.
struct ProfileGrinder::InvocationNode {
InvocationNode() : first_call(NULL) {
// RVA for the function this instance represents.
ModuleRVA function;
// The metrics we've aggregated for this function.
Metrics metrics;
// Linked list of all the edges where the caller resolves to us.
InvocationEdge* first_call;
// An invocation edge represents a caller->function pair.
struct ProfileGrinder::InvocationEdge {
InvocationEdge() : caller_function(NULL), line(0), next_call(NULL) {
// The function/caller pair we denote.
ModuleRVA function;
ModuleRVA caller;
// Line number of the caller.
size_t line;
Metrics metrics;
// The calling function - resolved from caller.
InvocationNode* caller_function;
// Chains to the next edge resolving to the
// same calling function.
InvocationEdge* next_call;
} // namespace grinder