base/trace_event/heap_profiler_heap_dump_writer.cc - chromium/src - Git at Google

 // Copyright 2015 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "base/trace_event/heap_profiler_heap_dump_writer.h"

 #include <algorithm>
 #include <iterator>
 #include <tuple>
 #include <utility>
 #include <vector>

 #include "base/format_macros.h"
 #include "base/logging.h"
 #include "base/strings/stringprintf.h"
 #include "base/trace_event/heap_profiler_stack_frame_deduplicator.h"
 #include "base/trace_event/heap_profiler_type_name_deduplicator.h"
 #include "base/trace_event/trace_event_argument.h"

 // Most of what the |HeapDumpWriter| does is aggregating detailed information
 // about the heap and deciding what to dump. The Input to this process is a list
 // of |AllocationContext|s and size pairs.
 //
 // The pairs are grouped into |Bucket|s. A bucket is a group of (context, size)
 // pairs where the properties of the contexts share a prefix. (Type name is
 // considered a list of length one here.) First all pairs are put into one
 // bucket that represents the entire heap. Then this bucket is recursively
 // broken down into smaller buckets. Each bucket keeps track of whether further
 // breakdown is possible.

 namespace base {
 namespace trace_event {
 namespace internal {
 namespace {

 // Denotes a property of |AllocationContext| to break down by.
 enum class BreakDownMode { kByBacktrace, kByTypeName };

 // A group of bytes for which the context shares a prefix.
 struct Bucket {
   Bucket() : size(0), backtrace_cursor(0), is_broken_down_by_type_name(false) {}

   std::vector<std::pair<const AllocationContext*, size_t>> bytes_by_context;

   // The sum of the sizes of |bytes_by_context|.
   size_t size;

   // The index of the stack frame that has not yet been broken down by. For all
   // elements in this bucket, the stack frames 0 up to (but not including) the
   // cursor, must be equal.
   size_t backtrace_cursor;

   // When true, the type name for all elements in this bucket must be equal.
   bool is_broken_down_by_type_name;
 };

 // Comparison operator to order buckets by their size.
 bool operator<(const Bucket& lhs, const Bucket& rhs) {
   return lhs.size < rhs.size;
 }

 // Groups the allocations in the bucket by |breakBy|. The buckets in the
 // returned list will have |backtrace_cursor| advanced or
 // |is_broken_down_by_type_name| set depending on the property to group by.
 std::vector<Bucket> GetSubbuckets(const Bucket& bucket, BreakDownMode breakBy) {
   base::hash_map<const char*, Bucket> breakdown;

   if (breakBy == BreakDownMode::kByBacktrace) {
     for (const auto& context_and_size : bucket.bytes_by_context) {
       const Backtrace& backtrace = context_and_size.first->backtrace;
       const char* const* begin = std::begin(backtrace.frames);
       const char* const* end = std::end(backtrace.frames);
       const char* const* cursor = begin + bucket.backtrace_cursor;

       // The backtrace in the context is padded with null pointers, but these
       // should not be considered for breakdown. Adjust end to point past the
       // last non-null frame.
       while (begin != end && *(end - 1) == nullptr)
         end--;

       DCHECK_LE(cursor, end);

       if (cursor != end) {
         Bucket& subbucket = breakdown[*cursor];
         subbucket.size += context_and_size.second;
         subbucket.bytes_by_context.push_back(context_and_size);
         subbucket.backtrace_cursor = bucket.backtrace_cursor + 1;
         subbucket.is_broken_down_by_type_name =
             bucket.is_broken_down_by_type_name;
         DCHECK_GT(subbucket.size, 0u);
       }
     }
   } else if (breakBy == BreakDownMode::kByTypeName) {
     if (!bucket.is_broken_down_by_type_name) {
       for (const auto& context_and_size : bucket.bytes_by_context) {
         const AllocationContext* context = context_and_size.first;
         Bucket& subbucket = breakdown[context->type_name];
         subbucket.size += context_and_size.second;
         subbucket.bytes_by_context.push_back(context_and_size);
         subbucket.backtrace_cursor = bucket.backtrace_cursor;
         subbucket.is_broken_down_by_type_name = true;
         DCHECK_GT(subbucket.size, 0u);
       }
     }
   }

   std::vector<Bucket> buckets;
   buckets.reserve(breakdown.size());
   for (auto key_bucket : breakdown)
     buckets.push_back(key_bucket.second);

   return buckets;
 }

 // Breaks down the bucket by |breakBy|. Returns only buckets that contribute
 // significantly to the total size. The long tail is omitted.
 std::vector<Bucket> BreakDownBy(const Bucket& bucket, BreakDownMode breakBy) {
   std::vector<Bucket> buckets = GetSubbuckets(bucket, breakBy);

   // Ensure that |buckets| is a max-heap (the data structure, not memory heap),
   // so its front contains the largest bucket. Buckets should be iterated
   // ordered by size, but sorting the vector is overkill because the long tail
   // of small buckets will be discarded. By using a max-heap, the optimal case
   // where all but the first bucket are discarded is O(n). The worst case where
   // no bucket is discarded is doing a heap sort, which is O(n log n).
   std::make_heap(buckets.begin(), buckets.end());

   // Keep including buckets until adding one would increase the number of
   // bytes accounted for by less than a percent. The large buckets end up in
   // [it, end()), [begin(), it) is the part that contains the max-heap of small
   // buckets. TODO(ruuda): tweak the heuristic.
   size_t accounted_for = 0;
   std::vector<Bucket>::iterator it;
   for (it = buckets.end(); it != buckets.begin(); --it) {
     // Compute contribution to number of bytes accounted for in percent, rounded
     // down due to integer division. Buckets are iterated by descending size,
     // so later buckets cannot have a larger contribution than this one.
     accounted_for += buckets.front().size;
     size_t contribution = buckets.front().size * 100 / accounted_for;
     if (contribution == 0)
       break;

     // Put the largest bucket in [begin, it) at |it - 1| and max-heapify
     // [begin, it - 1). This puts the next largest bucket at |buckets.front()|.
     std::pop_heap(buckets.begin(), it);
   }

   // At this point, |buckets| looks like this (numbers are bucket sizes):
   //
   // <-- max-heap of small buckets --->
   //                                  <-- large buckets by ascending size -->
   // [ 19 | 11 | 13 | 7 | 2 | 5 | ... | 83 | 89 | 97 ]
   //   ^                                ^              ^
   //   |                                |              |
   //   begin()                          it             end()

   // Discard the long tail of buckets that contribute less than a percent.
   buckets.erase(buckets.begin(), it);

   return buckets;
 }

 }  // namespace

 bool operator<(Entry lhs, Entry rhs) {
   // There is no need to compare |size|. If the backtrace and type name are
   // equal then the sizes must be equal as well.
   return std::tie(lhs.stack_frame_id, lhs.type_id) <
          std::tie(rhs.stack_frame_id, rhs.type_id);
 }

 HeapDumpWriter::HeapDumpWriter(StackFrameDeduplicator* stack_frame_deduplicator,
                                TypeNameDeduplicator* type_name_deduplicator)
     : stack_frame_deduplicator_(stack_frame_deduplicator),
       type_name_deduplicator_(type_name_deduplicator) {}

 HeapDumpWriter::~HeapDumpWriter() {}

 bool HeapDumpWriter::AddEntryForBucket(const Bucket& bucket) {
   // The contexts in the bucket are all different, but the [begin, cursor) range
   // is equal for all contexts in the bucket, and the type names are the same if
   // |is_broken_down_by_type_name| is set.
   DCHECK(!bucket.bytes_by_context.empty());

   const AllocationContext* context = bucket.bytes_by_context.front().first;

   const char* const* backtrace_begin = std::begin(context->backtrace.frames);
   const char* const* backtrace_end = backtrace_begin + bucket.backtrace_cursor;
   DCHECK_LE(bucket.backtrace_cursor, arraysize(context->backtrace.frames));

   Entry entry;
   entry.stack_frame_id =
       stack_frame_deduplicator_->Insert(backtrace_begin, backtrace_end);

   // Deduplicate the type name, or use ID -1 if type name is not set.
   entry.type_id = bucket.is_broken_down_by_type_name
                       ? type_name_deduplicator_->Insert(context->type_name)
                       : -1;

   entry.size = bucket.size;

   auto position_and_inserted = entries_.insert(entry);
   return position_and_inserted.second;
 }

 void HeapDumpWriter::BreakDown(const Bucket& bucket) {
   auto by_backtrace = BreakDownBy(bucket, BreakDownMode::kByBacktrace);
   auto by_type_name = BreakDownBy(bucket, BreakDownMode::kByTypeName);

   // Insert entries for the buckets. If a bucket was not present before, it has
   // not been broken down before, so recursively continue breaking down in that
   // case. There might be multiple routes to the same entry (first break down
   // by type name, then by backtrace, or first by backtrace and then by type),
   // so a set is used to avoid dumping and breaking down entries more than once.

   for (const Bucket& subbucket : by_backtrace)
     if (AddEntryForBucket(subbucket))
       BreakDown(subbucket);

   for (const Bucket& subbucket : by_type_name)
     if (AddEntryForBucket(subbucket))
       BreakDown(subbucket);
 }

 const std::set<Entry>& HeapDumpWriter::Summarize(
     const hash_map<AllocationContext, size_t>& bytes_by_context) {
   // Start with one bucket that represents the entire heap. Iterate by
   // reference, because the allocation contexts are going to point to allocation
   // contexts stored in |bytes_by_context|.
   Bucket root_bucket;
   for (const auto& context_and_size : bytes_by_context) {
     const AllocationContext* context = &context_and_size.first;
     const size_t size = context_and_size.second;
     root_bucket.bytes_by_context.push_back(std::make_pair(context, size));
     root_bucket.size += size;
   }

   AddEntryForBucket(root_bucket);

   // Recursively break down the heap and fill |entries_| with entries to dump.
   BreakDown(root_bucket);

   return entries_;
 }

 scoped_refptr<TracedValue> Serialize(const std::set<Entry>& entries) {
   std::string buffer;
   scoped_refptr<TracedValue> traced_value = new TracedValue;

   traced_value->BeginArray("entries");

   for (const Entry& entry : entries) {
     traced_value->BeginDictionary();

     // Format size as hexadecimal string into |buffer|.
     SStringPrintf(&buffer, "%" PRIx64, static_cast<uint64_t>(entry.size));
     traced_value->SetString("size", buffer);

     if (entry.stack_frame_id == -1) {
       // An empty backtrace (which will have ID -1) is represented by the empty
       // string, because there is no leaf frame to reference in |stackFrames|.
       traced_value->SetString("bt", "");
     } else {
       // Format index of the leaf frame as a string, because |stackFrames| is a
       // dictionary, not an array.
       SStringPrintf(&buffer, "%i", entry.stack_frame_id);
       traced_value->SetString("bt", buffer);
     }

     // Type ID -1 (cumulative size for all types) is represented by the absence
     // of the "type" key in the dictionary.
     if (entry.type_id != -1) {
       // Format the type ID as a string.
       SStringPrintf(&buffer, "%i", entry.type_id);
       traced_value->SetString("type", buffer);
     }

     traced_value->EndDictionary();
   }

   traced_value->EndArray();  // "entries"
   return traced_value;
 }

 }  // namespace internal

 scoped_refptr<TracedValue> ExportHeapDump(
     const hash_map<AllocationContext, size_t>& bytes_by_size,
     StackFrameDeduplicator* stack_frame_deduplicator,
     TypeNameDeduplicator* type_name_deduplicator) {
   internal::HeapDumpWriter writer(stack_frame_deduplicator,
                                   type_name_deduplicator);
   return Serialize(writer.Summarize(bytes_by_size));
 }

 }  // namespace trace_event
 }  // namespace base
	// Copyright 2015 The Chromium Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#include "base/trace_event/heap_profiler_heap_dump_writer.h"

	#include <algorithm>
	#include <iterator>
	#include <tuple>
	#include <utility>
	#include <vector>

	#include "base/format_macros.h"
	#include "base/logging.h"
	#include "base/strings/stringprintf.h"
	#include "base/trace_event/heap_profiler_stack_frame_deduplicator.h"
	#include "base/trace_event/heap_profiler_type_name_deduplicator.h"
	#include "base/trace_event/trace_event_argument.h"

	// Most of what the \|HeapDumpWriter\| does is aggregating detailed information
	// about the heap and deciding what to dump. The Input to this process is a list
	// of \|AllocationContext\|s and size pairs.
	//
	// The pairs are grouped into \|Bucket\|s. A bucket is a group of (context, size)
	// pairs where the properties of the contexts share a prefix. (Type name is
	// considered a list of length one here.) First all pairs are put into one
	// bucket that represents the entire heap. Then this bucket is recursively
	// broken down into smaller buckets. Each bucket keeps track of whether further
	// breakdown is possible.

	namespace base {
	namespace trace_event {
	namespace internal {
	namespace {

	// Denotes a property of \|AllocationContext\| to break down by.
	enum class BreakDownMode { kByBacktrace, kByTypeName };

	// A group of bytes for which the context shares a prefix.
	struct Bucket {
	Bucket() : size(0), backtrace_cursor(0), is_broken_down_by_type_name(false) {}

	std::vector<std::pair<const AllocationContext*, size_t>> bytes_by_context;

	// The sum of the sizes of \|bytes_by_context\|.
	size_t size;

	// The index of the stack frame that has not yet been broken down by. For all
	// elements in this bucket, the stack frames 0 up to (but not including) the
	// cursor, must be equal.
	size_t backtrace_cursor;

	// When true, the type name for all elements in this bucket must be equal.
	bool is_broken_down_by_type_name;
	};

	// Comparison operator to order buckets by their size.
	bool operator<(const Bucket& lhs, const Bucket& rhs) {
	return lhs.size < rhs.size;
	}

	// Groups the allocations in the bucket by \|breakBy\|. The buckets in the
	// returned list will have \|backtrace_cursor\| advanced or
	// \|is_broken_down_by_type_name\| set depending on the property to group by.
	std::vector<Bucket> GetSubbuckets(const Bucket& bucket, BreakDownMode breakBy) {
	base::hash_map<const char*, Bucket> breakdown;

	if (breakBy == BreakDownMode::kByBacktrace) {
	for (const auto& context_and_size : bucket.bytes_by_context) {
	const Backtrace& backtrace = context_and_size.first->backtrace;
	const char* const* begin = std::begin(backtrace.frames);
	const char* const* end = std::end(backtrace.frames);
	const char* const* cursor = begin + bucket.backtrace_cursor;

	// The backtrace in the context is padded with null pointers, but these
	// should not be considered for breakdown. Adjust end to point past the
	// last non-null frame.
	while (begin != end && *(end - 1) == nullptr)
	end--;

	DCHECK_LE(cursor, end);

	if (cursor != end) {
	Bucket& subbucket = breakdown[*cursor];
	subbucket.size += context_and_size.second;
	subbucket.bytes_by_context.push_back(context_and_size);
	subbucket.backtrace_cursor = bucket.backtrace_cursor + 1;
	subbucket.is_broken_down_by_type_name =
	bucket.is_broken_down_by_type_name;
	DCHECK_GT(subbucket.size, 0u);
	}
	}
	} else if (breakBy == BreakDownMode::kByTypeName) {
	if (!bucket.is_broken_down_by_type_name) {
	for (const auto& context_and_size : bucket.bytes_by_context) {
	const AllocationContext* context = context_and_size.first;
	Bucket& subbucket = breakdown[context->type_name];
	subbucket.size += context_and_size.second;
	subbucket.bytes_by_context.push_back(context_and_size);
	subbucket.backtrace_cursor = bucket.backtrace_cursor;
	subbucket.is_broken_down_by_type_name = true;
	DCHECK_GT(subbucket.size, 0u);
	}
	}
	}

	std::vector<Bucket> buckets;
	buckets.reserve(breakdown.size());
	for (auto key_bucket : breakdown)
	buckets.push_back(key_bucket.second);

	return buckets;
	}

	// Breaks down the bucket by \|breakBy\|. Returns only buckets that contribute
	// significantly to the total size. The long tail is omitted.
	std::vector<Bucket> BreakDownBy(const Bucket& bucket, BreakDownMode breakBy) {
	std::vector<Bucket> buckets = GetSubbuckets(bucket, breakBy);

	// Ensure that \|buckets\| is a max-heap (the data structure, not memory heap),
	// so its front contains the largest bucket. Buckets should be iterated
	// ordered by size, but sorting the vector is overkill because the long tail
	// of small buckets will be discarded. By using a max-heap, the optimal case
	// where all but the first bucket are discarded is O(n). The worst case where
	// no bucket is discarded is doing a heap sort, which is O(n log n).
	std::make_heap(buckets.begin(), buckets.end());

	// Keep including buckets until adding one would increase the number of
	// bytes accounted for by less than a percent. The large buckets end up in
	// [it, end()), [begin(), it) is the part that contains the max-heap of small
	// buckets. TODO(ruuda): tweak the heuristic.
	size_t accounted_for = 0;
	std::vector<Bucket>::iterator it;
	for (it = buckets.end(); it != buckets.begin(); --it) {
	// Compute contribution to number of bytes accounted for in percent, rounded
	// down due to integer division. Buckets are iterated by descending size,
	// so later buckets cannot have a larger contribution than this one.
	accounted_for += buckets.front().size;
	size_t contribution = buckets.front().size * 100 / accounted_for;
	if (contribution == 0)
	break;

	// Put the largest bucket in [begin, it) at \|it - 1\| and max-heapify
	// [begin, it - 1). This puts the next largest bucket at \|buckets.front()\|.
	std::pop_heap(buckets.begin(), it);
	}

	// At this point, \|buckets\| looks like this (numbers are bucket sizes):
	//
	// <-- max-heap of small buckets --->
	// <-- large buckets by ascending size -->
	// [ 19 \| 11 \| 13 \| 7 \| 2 \| 5 \| ... \| 83 \| 89 \| 97 ]
	// ^ ^ ^
	// \| \| \|
	// begin() it end()

	// Discard the long tail of buckets that contribute less than a percent.
	buckets.erase(buckets.begin(), it);

	return buckets;
	}

	} // namespace

	bool operator<(Entry lhs, Entry rhs) {
	// There is no need to compare \|size\|. If the backtrace and type name are
	// equal then the sizes must be equal as well.
	return std::tie(lhs.stack_frame_id, lhs.type_id) <
	std::tie(rhs.stack_frame_id, rhs.type_id);
	}

	HeapDumpWriter::HeapDumpWriter(StackFrameDeduplicator* stack_frame_deduplicator,
	TypeNameDeduplicator* type_name_deduplicator)
	: stack_frame_deduplicator_(stack_frame_deduplicator),
	type_name_deduplicator_(type_name_deduplicator) {}

	HeapDumpWriter::~HeapDumpWriter() {}

	bool HeapDumpWriter::AddEntryForBucket(const Bucket& bucket) {
	// The contexts in the bucket are all different, but the [begin, cursor) range
	// is equal for all contexts in the bucket, and the type names are the same if
	// \|is_broken_down_by_type_name\| is set.
	DCHECK(!bucket.bytes_by_context.empty());

	const AllocationContext* context = bucket.bytes_by_context.front().first;

	const char* const* backtrace_begin = std::begin(context->backtrace.frames);
	const char* const* backtrace_end = backtrace_begin + bucket.backtrace_cursor;
	DCHECK_LE(bucket.backtrace_cursor, arraysize(context->backtrace.frames));

	Entry entry;
	entry.stack_frame_id =
	stack_frame_deduplicator_->Insert(backtrace_begin, backtrace_end);

	// Deduplicate the type name, or use ID -1 if type name is not set.
	entry.type_id = bucket.is_broken_down_by_type_name
	? type_name_deduplicator_->Insert(context->type_name)
	: -1;

	entry.size = bucket.size;

	auto position_and_inserted = entries_.insert(entry);
	return position_and_inserted.second;
	}

	void HeapDumpWriter::BreakDown(const Bucket& bucket) {
	auto by_backtrace = BreakDownBy(bucket, BreakDownMode::kByBacktrace);
	auto by_type_name = BreakDownBy(bucket, BreakDownMode::kByTypeName);

	// Insert entries for the buckets. If a bucket was not present before, it has
	// not been broken down before, so recursively continue breaking down in that
	// case. There might be multiple routes to the same entry (first break down
	// by type name, then by backtrace, or first by backtrace and then by type),
	// so a set is used to avoid dumping and breaking down entries more than once.

	for (const Bucket& subbucket : by_backtrace)
	if (AddEntryForBucket(subbucket))
	BreakDown(subbucket);

	for (const Bucket& subbucket : by_type_name)
	if (AddEntryForBucket(subbucket))
	BreakDown(subbucket);
	}

	const std::set<Entry>& HeapDumpWriter::Summarize(
	const hash_map<AllocationContext, size_t>& bytes_by_context) {
	// Start with one bucket that represents the entire heap. Iterate by
	// reference, because the allocation contexts are going to point to allocation
	// contexts stored in \|bytes_by_context\|.
	Bucket root_bucket;
	for (const auto& context_and_size : bytes_by_context) {
	const AllocationContext* context = &context_and_size.first;
	const size_t size = context_and_size.second;
	root_bucket.bytes_by_context.push_back(std::make_pair(context, size));
	root_bucket.size += size;
	}

	AddEntryForBucket(root_bucket);

	// Recursively break down the heap and fill \|entries_\| with entries to dump.
	BreakDown(root_bucket);

	return entries_;
	}

	scoped_refptr<TracedValue> Serialize(const std::set<Entry>& entries) {
	std::string buffer;
	scoped_refptr<TracedValue> traced_value = new TracedValue;

	traced_value->BeginArray("entries");

	for (const Entry& entry : entries) {
	traced_value->BeginDictionary();

	// Format size as hexadecimal string into \|buffer\|.
	SStringPrintf(&buffer, "%" PRIx64, static_cast<uint64_t>(entry.size));
	traced_value->SetString("size", buffer);

	if (entry.stack_frame_id == -1) {
	// An empty backtrace (which will have ID -1) is represented by the empty
	// string, because there is no leaf frame to reference in \|stackFrames\|.
	traced_value->SetString("bt", "");
	} else {
	// Format index of the leaf frame as a string, because \|stackFrames\| is a
	// dictionary, not an array.
	SStringPrintf(&buffer, "%i", entry.stack_frame_id);
	traced_value->SetString("bt", buffer);
	}

	// Type ID -1 (cumulative size for all types) is represented by the absence
	// of the "type" key in the dictionary.
	if (entry.type_id != -1) {
	// Format the type ID as a string.
	SStringPrintf(&buffer, "%i", entry.type_id);
	traced_value->SetString("type", buffer);
	}

	traced_value->EndDictionary();
	}

	traced_value->EndArray(); // "entries"
	return traced_value;
	}

	} // namespace internal

	scoped_refptr<TracedValue> ExportHeapDump(
	const hash_map<AllocationContext, size_t>& bytes_by_size,
	StackFrameDeduplicator* stack_frame_deduplicator,
	TypeNameDeduplicator* type_name_deduplicator) {
	internal::HeapDumpWriter writer(stack_frame_deduplicator,
	type_name_deduplicator);
	return Serialize(writer.Summarize(bytes_by_size));
	}

	} // namespace trace_event
	} // namespace base