base/tracked_objects.cc - chromium/src - Git at Google

 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "base/tracked_objects.h"

 #include <ctype.h>
 #include <limits.h>
 #include <stdlib.h>

 #include <limits>

 #include "base/atomicops.h"
 #include "base/base_switches.h"
 #include "base/command_line.h"
 #include "base/compiler_specific.h"
 #include "base/debug/leak_annotations.h"
 #include "base/logging.h"
 #include "base/metrics/histogram_macros.h"
 #include "base/numerics/safe_conversions.h"
 #include "base/numerics/safe_math.h"
 #include "base/process/process_handle.h"
 #include "base/third_party/valgrind/memcheck.h"
 #include "base/threading/platform_thread.h"
 #include "base/threading/worker_pool.h"
 #include "base/tracking_info.h"
 #include "build/build_config.h"

 using base::TimeDelta;

 namespace base {
 class TimeDelta;
 }

 namespace tracked_objects {

 namespace {

 constexpr char kWorkerThreadSanitizedName[] = "WorkerThread-*";

 // When ThreadData is first initialized, should we start in an ACTIVE state to
 // record all of the startup-time tasks, or should we start up DEACTIVATED, so
 // that we only record after parsing the command line flag --enable-tracking.
 // Note that the flag may force either state, so this really controls only the
 // period of time up until that flag is parsed.  If there is no flag seen, then
 // this state may prevail for much or all of the process lifetime.
 const ThreadData::Status kInitialStartupState = ThreadData::PROFILING_ACTIVE;

 // Possible states of the profiler timing enabledness.
 enum {
   UNDEFINED_TIMING,
   ENABLED_TIMING,
   DISABLED_TIMING,
 };

 // State of the profiler timing enabledness.
 base::subtle::Atomic32 g_profiler_timing_enabled = UNDEFINED_TIMING;

 // Returns whether profiler timing is enabled.  The default is true, but this
 // may be overridden by a command-line flag.  Some platforms may
 // programmatically set this command-line flag to the "off" value if it's not
 // specified.
 // This in turn can be overridden by explicitly calling
 // ThreadData::EnableProfilerTiming, say, based on a field trial.
 inline bool IsProfilerTimingEnabled() {
   // Reading |g_profiler_timing_enabled| is done without barrier because
   // multiple initialization is not an issue while the barrier can be relatively
   // costly given that this method is sometimes called in a tight loop.
   base::subtle::Atomic32 current_timing_enabled =
       base::subtle::NoBarrier_Load(&g_profiler_timing_enabled);
   if (current_timing_enabled == UNDEFINED_TIMING) {
     if (!base::CommandLine::InitializedForCurrentProcess())
       return true;
     current_timing_enabled =
         (base::CommandLine::ForCurrentProcess()->GetSwitchValueASCII(
              switches::kProfilerTiming) ==
          switches::kProfilerTimingDisabledValue)
             ? DISABLED_TIMING
             : ENABLED_TIMING;
     base::subtle::NoBarrier_Store(&g_profiler_timing_enabled,
                                   current_timing_enabled);
   }
   return current_timing_enabled == ENABLED_TIMING;
 }

 // Sanitize a thread name by replacing trailing sequence of digits with "*".
 // Examples:
 // 1. "BrowserBlockingWorker1/23857" => "BrowserBlockingWorker1/*"
 // 2. "Chrome_IOThread" => "Chrome_IOThread"
 std::string SanitizeThreadName(const std::string& thread_name) {
   size_t i = thread_name.length();

   while (i > 0 && isdigit(thread_name[i - 1]))
     --i;

   if (i == thread_name.length())
     return thread_name;

   return thread_name.substr(0, i) + '*';
 }

 }  // namespace

 //------------------------------------------------------------------------------
 // DeathData tallies durations when a death takes place.

 DeathData::DeathData()
     : count_(0),
       sample_probability_count_(0),
       run_duration_sum_(0),
       queue_duration_sum_(0),
       run_duration_max_(0),
       queue_duration_max_(0),
       alloc_ops_(0),
       free_ops_(0),
 #if !defined(ARCH_CPU_64_BITS)
       byte_update_counter_(0),
 #endif
       allocated_bytes_(),
       freed_bytes_(),
       alloc_overhead_bytes_(),
       max_allocated_bytes_(0),
       run_duration_sample_(0),
       queue_duration_sample_(0),
       last_phase_snapshot_(nullptr) {
 }

 DeathData::DeathData(const DeathData& other)
     : count_(other.count_),
       sample_probability_count_(other.sample_probability_count_),
       run_duration_sum_(other.run_duration_sum_),
       queue_duration_sum_(other.queue_duration_sum_),
       run_duration_max_(other.run_duration_max_),
       queue_duration_max_(other.queue_duration_max_),
       alloc_ops_(other.alloc_ops_),
       free_ops_(other.free_ops_),
 #if !defined(ARCH_CPU_64_BITS)
       byte_update_counter_(0),
 #endif
       allocated_bytes_(other.allocated_bytes_),
       freed_bytes_(other.freed_bytes_),
       alloc_overhead_bytes_(other.alloc_overhead_bytes_),
       max_allocated_bytes_(other.max_allocated_bytes_),
       run_duration_sample_(other.run_duration_sample_),
       queue_duration_sample_(other.queue_duration_sample_),
       last_phase_snapshot_(nullptr) {
   // This constructor will be used by std::map when adding new DeathData values
   // to the map.  At that point, last_phase_snapshot_ is still NULL, so we don't
   // need to worry about ownership transfer.
   DCHECK(other.last_phase_snapshot_ == nullptr);
 }

 DeathData::~DeathData() {
   while (last_phase_snapshot_) {
     const DeathDataPhaseSnapshot* snapshot = last_phase_snapshot_;
     last_phase_snapshot_ = snapshot->prev;
     delete snapshot;
   }
 }

 // TODO(jar): I need to see if this macro to optimize branching is worth using.
 //
 // This macro has no branching, so it is surely fast, and is equivalent to:
 //             if (assign_it)
 //               target = source;
 // We use a macro rather than a template to force this to inline.
 // Related code for calculating max is discussed on the web.
 #define CONDITIONAL_ASSIGN(assign_it, target, source) \
   ((target) ^= ((target) ^ (source)) & -static_cast<int32_t>(assign_it))

 void DeathData::RecordDurations(const base::TimeDelta queue_duration,
                                 const base::TimeDelta run_duration,
                                 const uint32_t random_number) {
   // We'll just clamp at INT_MAX, but we should note this in the UI as such.
   if (count_ < INT_MAX)
     base::subtle::NoBarrier_Store(&count_, count_ + 1);

   int sample_probability_count =
       base::subtle::NoBarrier_Load(&sample_probability_count_);
   if (sample_probability_count < INT_MAX)
     ++sample_probability_count;
   base::subtle::NoBarrier_Store(&sample_probability_count_,
                                 sample_probability_count);

   base::subtle::NoBarrier_Store(
       &queue_duration_sum_,
       queue_duration_sum_ + queue_duration.InMilliseconds());
   base::subtle::NoBarrier_Store(
       &run_duration_sum_, run_duration_sum_ + run_duration.InMilliseconds());

   if (queue_duration_max() < queue_duration.InMilliseconds())
     base::subtle::NoBarrier_Store(&queue_duration_max_,
                                   queue_duration.InMilliseconds());
   if (run_duration_max() < run_duration.InMilliseconds())
     base::subtle::NoBarrier_Store(&run_duration_max_,
                                   run_duration.InMilliseconds());

   // Take a uniformly distributed sample over all durations ever supplied during
   // the current profiling phase.
   // The probability that we (instead) use this new sample is
   // 1/sample_probability_count_. This results in a completely uniform selection
   // of the sample (at least when we don't clamp sample_probability_count_...
   // but that should be inconsequentially likely).  We ignore the fact that we
   // correlated our selection of a sample to the run and queue times (i.e., we
   // used them to generate random_number).
   CHECK_GT(sample_probability_count, 0);
   if (0 == (random_number % sample_probability_count)) {
     base::subtle::NoBarrier_Store(&queue_duration_sample_,
                                   queue_duration.InMilliseconds());
     base::subtle::NoBarrier_Store(&run_duration_sample_,
                                   run_duration.InMilliseconds());
   }
 }

 void DeathData::RecordAllocations(const uint32_t alloc_ops,
                                   const uint32_t free_ops,
                                   const uint32_t allocated_bytes,
                                   const uint32_t freed_bytes,
                                   const uint32_t alloc_overhead_bytes,
                                   const uint32_t max_allocated_bytes) {
 #if !defined(ARCH_CPU_64_BITS)
   // On 32 bit systems, we use an even/odd locking scheme to make possible to
   // read 64 bit sums consistently. Note that since writes are bound to the
   // thread owning this DeathData, there's no race on these writes.
   int32_t counter_val =
       base::subtle::Barrier_AtomicIncrement(&byte_update_counter_, 1);
   // The counter must be odd.
   DCHECK_EQ(1, counter_val & 1);
 #endif

   // Use saturating arithmetic.
   SaturatingMemberAdd(alloc_ops, &alloc_ops_);
   SaturatingMemberAdd(free_ops, &free_ops_);
   SaturatingByteCountMemberAdd(allocated_bytes, &allocated_bytes_);
   SaturatingByteCountMemberAdd(freed_bytes, &freed_bytes_);
   SaturatingByteCountMemberAdd(alloc_overhead_bytes, &alloc_overhead_bytes_);

   int32_t max = base::saturated_cast<int32_t>(max_allocated_bytes);
   if (max > max_allocated_bytes_)
     base::subtle::NoBarrier_Store(&max_allocated_bytes_, max);

 #if !defined(ARCH_CPU_64_BITS)
   // Now release the value while rolling to even.
   counter_val = base::subtle::Barrier_AtomicIncrement(&byte_update_counter_, 1);
   DCHECK_EQ(0, counter_val & 1);
 #endif
 }

 void DeathData::OnProfilingPhaseCompleted(int profiling_phase) {
   // Snapshotting and storing current state.
   last_phase_snapshot_ =
       new DeathDataPhaseSnapshot(profiling_phase, *this, last_phase_snapshot_);

   // Not touching fields for which a delta can be computed by comparing with a
   // snapshot from the previous phase. Resetting other fields.  Sample values
   // will be reset upon next death recording because sample_probability_count_
   // is set to 0.
   // We avoid resetting to 0 in favor of deltas whenever possible.  The reason
   // is that for incrementable fields, resetting to 0 from the snapshot thread
   // potentially in parallel with incrementing in the death thread may result in
   // significant data corruption that has a potential to grow with time.  Not
   // resetting incrementable fields and using deltas will cause any
   // off-by-little corruptions to be likely fixed at the next snapshot.
   // The max values are not incrementable, and cannot be deduced using deltas
   // for a given phase. Hence, we have to reset them to 0.  But the potential
   // damage is limited to getting the previous phase's max to apply for the next
   // phase, and the error doesn't have a potential to keep growing with new
   // resets.
   // sample_probability_count_ is incrementable, but must be reset to 0 at the
   // phase end, so that we start a new uniformly randomized sample selection
   // after the reset. These fields are updated using atomics. However, race
   // conditions are possible since these are updated individually and not
   // together atomically, resulting in the values being mutually inconsistent.
   // The damage is limited to selecting a wrong sample, which is not something
   // that can cause accumulating or cascading effects.
   // If there were no inconsistencies caused by race conditions, we never send a
   // sample for the previous phase in the next phase's snapshot because
   // ThreadData::SnapshotExecutedTasks doesn't send deltas with 0 count.
   base::subtle::NoBarrier_Store(&sample_probability_count_, 0);
   base::subtle::NoBarrier_Store(&run_duration_max_, 0);
   base::subtle::NoBarrier_Store(&queue_duration_max_, 0);
 }

 // static
 int64_t DeathData::UnsafeCumulativeByteCountRead(
     const CumulativeByteCount* count) {
 #if defined(ARCH_CPU_64_BITS)
   return base::subtle::NoBarrier_Load(count);
 #else
   return static_cast<int64_t>(base::subtle::NoBarrier_Load(&count->hi_word))
              << 32 |
          static_cast<uint32_t>(base::subtle::NoBarrier_Load(&count->lo_word));
 #endif
 }

 int64_t DeathData::ConsistentCumulativeByteCountRead(
     const CumulativeByteCount* count) const {
 #if defined(ARCH_CPU_64_BITS)
   return base::subtle::NoBarrier_Load(count);
 #else
   // We're on a 32 bit system, this is going to be complicated.
   while (true) {
     int32_t update_counter = 0;
     // Acquire the starting count, spin until it's even.

     // The value of |kYieldProcessorTries| is cargo culted from the page
     // allocator, TCMalloc, Window critical section defaults, and various other
     // recommendations.
     // This is not performance critical here, as the reads are vanishingly rare
     // and only happen under the --enable-heap-profiling=task-profiler flag.
     constexpr size_t kYieldProcessorTries = 1000;
     size_t lock_attempts = 0;
     do {
       ++lock_attempts;
       if (lock_attempts == kYieldProcessorTries) {
         // Yield the current thread periodically to avoid writer starvation.
         base::PlatformThread::YieldCurrentThread();
         lock_attempts = 0;
       }

       update_counter = base::subtle::NoBarrier_Load(&byte_update_counter_);
     } while (update_counter & 1);

     // Make sure the reads below see all changes before the update counter.
     base::subtle::MemoryBarrier();

     DCHECK_EQ(update_counter & 1, 0);

     int64_t value =
         static_cast<int64_t>(base::subtle::NoBarrier_Load(&count->hi_word))
             << 32 |
         static_cast<uint32_t>(base::subtle::NoBarrier_Load(&count->lo_word));

     // Release_Load() semantics here ensure that the |byte_update_counter_|
     // value seen is at least as old as the |hi_word|/|lo_word| values seen
     // above, which means that if it's still equal to |update_counter|, the read
     // is consistent, since the above MemoryBarrier() ensures they're at least
     // as new as the afore-obtained |update_counter|'s value.
     if (update_counter == base::subtle::Release_Load(&byte_update_counter_))
       return value;
   }
 #endif
 }

 // static
 void DeathData::SaturatingMemberAdd(const uint32_t addend,
                                     base::subtle::Atomic32* sum) {
   constexpr int32_t kInt32Max = std::numeric_limits<int32_t>::max();
   // Bail quick if no work or already saturated.
   if (addend == 0U || *sum == kInt32Max)
     return;

   base::CheckedNumeric<int32_t> new_sum = *sum;
   new_sum += addend;
   base::subtle::NoBarrier_Store(sum, new_sum.ValueOrDefault(kInt32Max));
 }

 void DeathData::SaturatingByteCountMemberAdd(const uint32_t addend,
                                              CumulativeByteCount* sum) {
   constexpr int64_t kInt64Max = std::numeric_limits<int64_t>::max();
   // Bail quick if no work or already saturated.
   if (addend == 0U || UnsafeCumulativeByteCountRead(sum) == kInt64Max)
     return;

   base::CheckedNumeric<int64_t> new_sum = UnsafeCumulativeByteCountRead(sum);
   new_sum += addend;
   int64_t new_value = new_sum.ValueOrDefault(kInt64Max);
 // Update our value.
 #if defined(ARCH_CPU_64_BITS)
   base::subtle::NoBarrier_Store(sum, new_value);
 #else
   // This must only be called while the update counter is "locked" (i.e. odd).
   DCHECK_EQ(base::subtle::NoBarrier_Load(&byte_update_counter_) & 1, 1);

   base::subtle::NoBarrier_Store(&sum->hi_word,
                                 static_cast<int32_t>(new_value >> 32));
   base::subtle::NoBarrier_Store(&sum->lo_word,
                                 static_cast<int32_t>(new_value & 0xFFFFFFFF));
 #endif
 }

 //------------------------------------------------------------------------------
 DeathDataSnapshot::DeathDataSnapshot()
     : count(-1),
       run_duration_sum(-1),
       run_duration_max(-1),
       run_duration_sample(-1),
       queue_duration_sum(-1),
       queue_duration_max(-1),
       queue_duration_sample(-1),
       alloc_ops(-1),
       free_ops(-1),
       allocated_bytes(-1),
       freed_bytes(-1),
       alloc_overhead_bytes(-1),
       max_allocated_bytes(-1) {}

 DeathDataSnapshot::DeathDataSnapshot(int count,
                                      int32_t run_duration_sum,
                                      int32_t run_duration_max,
                                      int32_t run_duration_sample,
                                      int32_t queue_duration_sum,
                                      int32_t queue_duration_max,
                                      int32_t queue_duration_sample,
                                      int32_t alloc_ops,
                                      int32_t free_ops,
                                      int64_t allocated_bytes,
                                      int64_t freed_bytes,
                                      int64_t alloc_overhead_bytes,
                                      int32_t max_allocated_bytes)
     : count(count),
       run_duration_sum(run_duration_sum),
       run_duration_max(run_duration_max),
       run_duration_sample(run_duration_sample),
       queue_duration_sum(queue_duration_sum),
       queue_duration_max(queue_duration_max),
       queue_duration_sample(queue_duration_sample),
       alloc_ops(alloc_ops),
       free_ops(free_ops),
       allocated_bytes(allocated_bytes),
       freed_bytes(freed_bytes),
       alloc_overhead_bytes(alloc_overhead_bytes),
       max_allocated_bytes(max_allocated_bytes) {}

 DeathDataSnapshot::DeathDataSnapshot(const DeathData& death_data)
     : count(death_data.count()),
       run_duration_sum(death_data.run_duration_sum()),
       run_duration_max(death_data.run_duration_max()),
       run_duration_sample(death_data.run_duration_sample()),
       queue_duration_sum(death_data.queue_duration_sum()),
       queue_duration_max(death_data.queue_duration_max()),
       queue_duration_sample(death_data.queue_duration_sample()),
       alloc_ops(death_data.alloc_ops()),
       free_ops(death_data.free_ops()),
       allocated_bytes(death_data.allocated_bytes()),
       freed_bytes(death_data.freed_bytes()),
       alloc_overhead_bytes(death_data.alloc_overhead_bytes()),
       max_allocated_bytes(death_data.max_allocated_bytes()) {}

 DeathDataSnapshot::DeathDataSnapshot(const DeathDataSnapshot& death_data) =
     default;

 DeathDataSnapshot::~DeathDataSnapshot() {
 }

 DeathDataSnapshot DeathDataSnapshot::Delta(
     const DeathDataSnapshot& older) const {
   return DeathDataSnapshot(
       count - older.count, run_duration_sum - older.run_duration_sum,
       run_duration_max, run_duration_sample,
       queue_duration_sum - older.queue_duration_sum, queue_duration_max,
       queue_duration_sample, alloc_ops - older.alloc_ops,
       free_ops - older.free_ops, allocated_bytes - older.allocated_bytes,
       freed_bytes - older.freed_bytes,
       alloc_overhead_bytes - older.alloc_overhead_bytes, max_allocated_bytes);
 }

 //------------------------------------------------------------------------------
 BirthOnThread::BirthOnThread(const Location& location,
                              const ThreadData& current)
     : location_(location),
       birth_thread_(&current) {
 }

 //------------------------------------------------------------------------------
 BirthOnThreadSnapshot::BirthOnThreadSnapshot() {
 }

 BirthOnThreadSnapshot::BirthOnThreadSnapshot(const BirthOnThread& birth)
     : location(birth.location()),
       sanitized_thread_name(birth.birth_thread()->sanitized_thread_name()) {}

 BirthOnThreadSnapshot::~BirthOnThreadSnapshot() {
 }

 //------------------------------------------------------------------------------
 Births::Births(const Location& location, const ThreadData& current)
     : BirthOnThread(location, current),
       birth_count_(1) { }

 int Births::birth_count() const { return birth_count_; }

 void Births::RecordBirth() { ++birth_count_; }

 //------------------------------------------------------------------------------
 // ThreadData maintains the central data for all births and deaths on a single
 // thread.

 // TODO(jar): We should pull all these static vars together, into a struct, and
 // optimize layout so that we benefit from locality of reference during accesses
 // to them.

 // static
 ThreadData::NowFunction* ThreadData::now_function_for_testing_ = NULL;

 // A TLS slot which points to the ThreadData instance for the current thread.
 // We do a fake initialization here (zeroing out data), and then the real
 // in-place construction happens when we call tls_index_.Initialize().
 // static
 base::ThreadLocalStorage::StaticSlot ThreadData::tls_index_ = TLS_INITIALIZER;

 // static
 int ThreadData::cleanup_count_ = 0;

 // static
 int ThreadData::incarnation_counter_ = 0;

 // static
 ThreadData* ThreadData::all_thread_data_list_head_ = NULL;

 // static
 ThreadData* ThreadData::first_retired_thread_data_ = NULL;

 // static
 base::LazyInstance<base::Lock>::Leaky
     ThreadData::list_lock_ = LAZY_INSTANCE_INITIALIZER;

 // static
 base::subtle::Atomic32 ThreadData::status_ = ThreadData::UNINITIALIZED;

 ThreadData::ThreadData(const std::string& sanitized_thread_name)
     : next_(NULL),
       next_retired_thread_data_(NULL),
       sanitized_thread_name_(sanitized_thread_name),
       incarnation_count_for_pool_(-1),
       current_stopwatch_(NULL) {
   DCHECK(sanitized_thread_name_.empty() ||
          !isdigit(sanitized_thread_name_.back()));
   PushToHeadOfList();  // Which sets real incarnation_count_for_pool_.
 }

 ThreadData::~ThreadData() {
 }

 void ThreadData::PushToHeadOfList() {
   // Toss in a hint of randomness (atop the uniniitalized value).
   (void)VALGRIND_MAKE_MEM_DEFINED_IF_ADDRESSABLE(&random_number_,
                                                  sizeof(random_number_));
   MSAN_UNPOISON(&random_number_, sizeof(random_number_));
   random_number_ += static_cast<uint32_t>(this - static_cast<ThreadData*>(0));
   random_number_ ^=
       static_cast<uint32_t>((Now() - base::TimeTicks()).InMilliseconds());

   DCHECK(!next_);
   base::AutoLock lock(*list_lock_.Pointer());
   incarnation_count_for_pool_ = incarnation_counter_;
   next_ = all_thread_data_list_head_;
   all_thread_data_list_head_ = this;
 }

 // static
 ThreadData* ThreadData::first() {
   base::AutoLock lock(*list_lock_.Pointer());
   return all_thread_data_list_head_;
 }

 ThreadData* ThreadData::next() const { return next_; }

 // static
 void ThreadData::InitializeThreadContext(const std::string& thread_name) {
   if (base::WorkerPool::RunsTasksOnCurrentThread())
     return;
   DCHECK_NE(thread_name, kWorkerThreadSanitizedName);
   EnsureTlsInitialization();
   ThreadData* current_thread_data =
       reinterpret_cast<ThreadData*>(tls_index_.Get());
   if (current_thread_data)
     return;  // Browser tests instigate this.
   current_thread_data =
       GetRetiredOrCreateThreadData(SanitizeThreadName(thread_name));
   tls_index_.Set(current_thread_data);
 }

 // static
 ThreadData* ThreadData::Get() {
   if (!tls_index_.initialized())
     return NULL;  // For unittests only.
   ThreadData* registered = reinterpret_cast<ThreadData*>(tls_index_.Get());
   if (registered)
     return registered;

   // We must be a worker thread, since we didn't pre-register.
   ThreadData* worker_thread_data =
       GetRetiredOrCreateThreadData(kWorkerThreadSanitizedName);
   tls_index_.Set(worker_thread_data);
   return worker_thread_data;
 }

 // static
 void ThreadData::OnThreadTermination(void* thread_data) {
   DCHECK(thread_data);  // TLS should *never* call us with a NULL.
   // We must NOT do any allocations during this callback.  There is a chance
   // that the allocator is no longer active on this thread.
   reinterpret_cast<ThreadData*>(thread_data)->OnThreadTerminationCleanup();
 }

 void ThreadData::OnThreadTerminationCleanup() {
   // We must NOT do any allocations during this callback. There is a chance that
   // the allocator is no longer active on this thread.

   // The list_lock_ was created when we registered the callback, so it won't be
   // allocated here despite the lazy reference.
   base::AutoLock lock(*list_lock_.Pointer());
   if (incarnation_counter_ != incarnation_count_for_pool_)
     return;  // ThreadData was constructed in an earlier unit test.
   ++cleanup_count_;

   // Add this ThreadData to a retired list so that it can be reused by a thread
   // with the same name sanitized name in the future.
   // |next_retired_thread_data_| is expected to be nullptr for a ThreadData
   // associated with an active thread.
   DCHECK(!next_retired_thread_data_);
   next_retired_thread_data_ = first_retired_thread_data_;
   first_retired_thread_data_ = this;
 }

 // static
 void ThreadData::Snapshot(int current_profiling_phase,
                           ProcessDataSnapshot* process_data_snapshot) {
   // Get an unchanging copy of a ThreadData list.
   ThreadData* my_list = ThreadData::first();

   // Gather data serially.
   // This hackish approach *can* get some slightly corrupt tallies, as we are
   // grabbing values without the protection of a lock, but it has the advantage
   // of working even with threads that don't have message loops.  If a user
   // sees any strangeness, they can always just run their stats gathering a
   // second time.
   BirthCountMap birth_counts;
   for (ThreadData* thread_data = my_list; thread_data;
        thread_data = thread_data->next()) {
     thread_data->SnapshotExecutedTasks(current_profiling_phase,
                                        &process_data_snapshot->phased_snapshots,
                                        &birth_counts);
   }

   // Add births that are still active -- i.e. objects that have tallied a birth,
   // but have not yet tallied a matching death, and hence must be either
   // running, queued up, or being held in limbo for future posting.
   auto* current_phase_tasks =
       &process_data_snapshot->phased_snapshots[current_profiling_phase].tasks;
   for (const auto& birth_count : birth_counts) {
     if (birth_count.second > 0) {
       current_phase_tasks->push_back(
           TaskSnapshot(BirthOnThreadSnapshot(*birth_count.first),
                        DeathDataSnapshot(birth_count.second, 0, 0, 0, 0, 0, 0,
                                          0, 0, 0, 0, 0, 0),
                        "Still_Alive"));
     }
   }
 }

 // static
 void ThreadData::OnProfilingPhaseCompleted(int profiling_phase) {
   // Get an unchanging copy of a ThreadData list.
   ThreadData* my_list = ThreadData::first();

   // Add snapshots for all instances of death data in all threads serially.
   // This hackish approach *can* get some slightly corrupt tallies, as we are
   // grabbing values without the protection of a lock, but it has the advantage
   // of working even with threads that don't have message loops.  Any corruption
   // shouldn't cause "cascading damage" to anything else (in later phases).
   for (ThreadData* thread_data = my_list; thread_data;
        thread_data = thread_data->next()) {
     thread_data->OnProfilingPhaseCompletedOnThread(profiling_phase);
   }
 }

 Births* ThreadData::TallyABirth(const Location& location) {
   BirthMap::iterator it = birth_map_.find(location);
   Births* child;
   if (it != birth_map_.end()) {
     child =  it->second;
     child->RecordBirth();
   } else {
     child = new Births(location, *this);  // Leak this.
     // Lock since the map may get relocated now, and other threads sometimes
     // snapshot it (but they lock before copying it).
     base::AutoLock lock(map_lock_);
     birth_map_[location] = child;
   }

   return child;
 }

 void ThreadData::TallyADeath(const Births& births,
                              const base::TimeDelta queue_duration,
                              const TaskStopwatch& stopwatch) {
   base::TimeDelta run_duration = stopwatch.RunDuration();

   // Stir in some randomness, plus add constant in case durations are zero.
   const uint32_t kSomePrimeNumber = 2147483647;
   random_number_ += queue_duration.InMilliseconds() +
                     run_duration.InMilliseconds() + kSomePrimeNumber;
   // An address is going to have some randomness to it as well ;-).
   random_number_ ^=
       static_cast<uint32_t>(&births - reinterpret_cast<Births*>(0));

   DeathMap::iterator it = death_map_.find(&births);
   DeathData* death_data;
   if (it != death_map_.end()) {
     death_data = &it->second;
   } else {
     base::AutoLock lock(map_lock_);  // Lock as the map may get relocated now.
     death_data = &death_map_[&births];
   }  // Release lock ASAP.
   death_data->RecordDurations(queue_duration, run_duration, random_number_);

 #if BUILDFLAG(USE_ALLOCATOR_SHIM)
   if (stopwatch.heap_tracking_enabled()) {
     base::debug::ThreadHeapUsage heap_usage = stopwatch.heap_usage().usage();
     // Saturate the 64 bit counts on conversion to 32 bit storage.
     death_data->RecordAllocations(
         base::saturated_cast<int32_t>(heap_usage.alloc_ops),
         base::saturated_cast<int32_t>(heap_usage.free_ops),
         base::saturated_cast<int32_t>(heap_usage.alloc_bytes),
         base::saturated_cast<int32_t>(heap_usage.free_bytes),
         base::saturated_cast<int32_t>(heap_usage.alloc_overhead_bytes),
         base::saturated_cast<int32_t>(heap_usage.max_allocated_bytes));
   }
 #endif
 }

 // static
 Births* ThreadData::TallyABirthIfActive(const Location& location) {
   if (!TrackingStatus())
     return NULL;
   ThreadData* current_thread_data = Get();
   if (!current_thread_data)
     return NULL;
   return current_thread_data->TallyABirth(location);
 }

 // static
 void ThreadData::TallyRunOnNamedThreadIfTracking(
     const base::TrackingInfo& completed_task,
     const TaskStopwatch& stopwatch) {
   // Even if we have been DEACTIVATED, we will process any pending births so
   // that our data structures (which counted the outstanding births) remain
   // consistent.
   const Births* births = completed_task.birth_tally;
   if (!births)
     return;
   ThreadData* current_thread_data = stopwatch.GetThreadData();
   if (!current_thread_data)
     return;

   // Watch out for a race where status_ is changing, and hence one or both
   // of start_of_run or end_of_run is zero.  In that case, we didn't bother to
   // get a time value since we "weren't tracking" and we were trying to be
   // efficient by not calling for a genuine time value.  For simplicity, we'll
   // use a default zero duration when we can't calculate a true value.
   base::TimeTicks start_of_run = stopwatch.StartTime();
   base::TimeDelta queue_duration;
   if (!start_of_run.is_null()) {
     queue_duration = start_of_run - completed_task.EffectiveTimePosted();
   }
   current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
 }

 // static
 void ThreadData::TallyRunOnWorkerThreadIfTracking(
     const Births* births,
     const base::TimeTicks& time_posted,
     const TaskStopwatch& stopwatch) {
   // Even if we have been DEACTIVATED, we will process any pending births so
   // that our data structures (which counted the outstanding births) remain
   // consistent.
   if (!births)
     return;

   // TODO(jar): Support the option to coalesce all worker-thread activity under
   // one ThreadData instance that uses locks to protect *all* access.  This will
   // reduce memory (making it provably bounded), but run incrementally slower
   // (since we'll use locks on TallyABirth and TallyADeath).  The good news is
   // that the locks on TallyADeath will be *after* the worker thread has run,
   // and hence nothing will be waiting for the completion (...  besides some
   // other thread that might like to run).  Also, the worker threads tasks are
   // generally longer, and hence the cost of the lock may perchance be amortized
   // over the long task's lifetime.
   ThreadData* current_thread_data = stopwatch.GetThreadData();
   if (!current_thread_data)
     return;

   base::TimeTicks start_of_run = stopwatch.StartTime();
   base::TimeDelta queue_duration;
   if (!start_of_run.is_null()) {
     queue_duration = start_of_run - time_posted;
   }
   current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
 }

 // static
 void ThreadData::TallyRunInAScopedRegionIfTracking(
     const Births* births,
     const TaskStopwatch& stopwatch) {
   // Even if we have been DEACTIVATED, we will process any pending births so
   // that our data structures (which counted the outstanding births) remain
   // consistent.
   if (!births)
     return;

   ThreadData* current_thread_data = stopwatch.GetThreadData();
   if (!current_thread_data)
     return;

   base::TimeDelta queue_duration;
   current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
 }

 void ThreadData::SnapshotExecutedTasks(
     int current_profiling_phase,
     PhasedProcessDataSnapshotMap* phased_snapshots,
     BirthCountMap* birth_counts) {
   // Get copy of data, so that the data will not change during the iterations
   // and processing.
   BirthMap birth_map;
   DeathsSnapshot deaths;
   SnapshotMaps(current_profiling_phase, &birth_map, &deaths);

   for (const auto& birth : birth_map) {
     (*birth_counts)[birth.second] += birth.second->birth_count();
   }

   for (const auto& death : deaths) {
     (*birth_counts)[death.first] -= death.first->birth_count();

     // For the current death data, walk through all its snapshots, starting from
     // the current one, then from the previous profiling phase etc., and for
     // each snapshot calculate the delta between the snapshot and the previous
     // phase, if any.  Store the deltas in the result.
     for (const DeathDataPhaseSnapshot* phase = &death.second; phase;
          phase = phase->prev) {
       const DeathDataSnapshot& death_data =
           phase->prev ? phase->death_data.Delta(phase->prev->death_data)
                       : phase->death_data;

       if (death_data.count > 0) {
         (*phased_snapshots)[phase->profiling_phase].tasks.push_back(
             TaskSnapshot(BirthOnThreadSnapshot(*death.first), death_data,
                          sanitized_thread_name()));
       }
     }
   }
 }

 // This may be called from another thread.
 void ThreadData::SnapshotMaps(int profiling_phase,
                               BirthMap* birth_map,
                               DeathsSnapshot* deaths) {
   base::AutoLock lock(map_lock_);

   for (const auto& birth : birth_map_)
     (*birth_map)[birth.first] = birth.second;

   for (const auto& death : death_map_) {
     deaths->push_back(std::make_pair(
         death.first,
         DeathDataPhaseSnapshot(profiling_phase, death.second,
                                death.second.last_phase_snapshot())));
   }
 }

 void ThreadData::OnProfilingPhaseCompletedOnThread(int profiling_phase) {
   base::AutoLock lock(map_lock_);

   for (auto& death : death_map_) {
     death.second.OnProfilingPhaseCompleted(profiling_phase);
   }
 }

 void ThreadData::EnsureTlsInitialization() {
   if (base::subtle::Acquire_Load(&status_) >= DEACTIVATED)
     return;  // Someone else did the initialization.
   // Due to racy lazy initialization in tests, we'll need to recheck status_
   // after we acquire the lock.

   // Ensure that we don't double initialize tls.  We are called when single
   // threaded in the product, but some tests may be racy and lazy about our
   // initialization.
   base::AutoLock lock(*list_lock_.Pointer());
   if (base::subtle::Acquire_Load(&status_) >= DEACTIVATED)
     return;  // Someone raced in here and beat us.

   // Perform the "real" TLS initialization now, and leave it intact through
   // process termination.
   if (!tls_index_.initialized()) {  // Testing may have initialized this.
     DCHECK_EQ(base::subtle::NoBarrier_Load(&status_), UNINITIALIZED);
     tls_index_.Initialize(&ThreadData::OnThreadTermination);
     DCHECK(tls_index_.initialized());
   } else {
     // TLS was initialzed for us earlier.
     DCHECK_EQ(base::subtle::NoBarrier_Load(&status_), DORMANT_DURING_TESTS);
   }

   // Incarnation counter is only significant to testing, as it otherwise will
   // never again change in this process.
   ++incarnation_counter_;

   // The lock is not critical for setting status_, but it doesn't hurt.  It also
   // ensures that if we have a racy initialization, that we'll bail as soon as
   // we get the lock earlier in this method.
   base::subtle::Release_Store(&status_, kInitialStartupState);
   DCHECK(base::subtle::NoBarrier_Load(&status_) != UNINITIALIZED);
 }

 // static
 void ThreadData::InitializeAndSetTrackingStatus(Status status) {
   DCHECK_GE(status, DEACTIVATED);
   DCHECK_LE(status, PROFILING_ACTIVE);

   EnsureTlsInitialization();  // No-op if already initialized.

   if (status > DEACTIVATED)
     status = PROFILING_ACTIVE;

   base::subtle::Release_Store(&status_, status);
 }

 // static
 ThreadData::Status ThreadData::status() {
   return static_cast<ThreadData::Status>(base::subtle::Acquire_Load(&status_));
 }

 // static
 bool ThreadData::TrackingStatus() {
   return base::subtle::Acquire_Load(&status_) > DEACTIVATED;
 }

 // static
 void ThreadData::EnableProfilerTiming() {
   base::subtle::NoBarrier_Store(&g_profiler_timing_enabled, ENABLED_TIMING);
 }

 // static
 base::TimeTicks ThreadData::Now() {
   if (now_function_for_testing_)
     return base::TimeTicks() +
            base::TimeDelta::FromMilliseconds((*now_function_for_testing_)());
   if (IsProfilerTimingEnabled() && TrackingStatus())
     return base::TimeTicks::Now();
   return base::TimeTicks();  // Super fast when disabled, or not compiled.
 }

 // static
 void ThreadData::EnsureCleanupWasCalled(int major_threads_shutdown_count) {
   base::AutoLock lock(*list_lock_.Pointer());

   // TODO(jar): until this is working on XP, don't run the real test.
 #if 0
   // Verify that we've at least shutdown/cleanup the major namesd threads.  The
   // caller should tell us how many thread shutdowns should have taken place by
   // now.
   CHECK_GT(cleanup_count_, major_threads_shutdown_count);
 #endif
 }

 // static
 void ThreadData::ShutdownSingleThreadedCleanup(bool leak) {
   // This is only called from test code, where we need to cleanup so that
   // additional tests can be run.
   // We must be single threaded... but be careful anyway.
   InitializeAndSetTrackingStatus(DEACTIVATED);

   ThreadData* thread_data_list;
   {
     base::AutoLock lock(*list_lock_.Pointer());
     thread_data_list = all_thread_data_list_head_;
     all_thread_data_list_head_ = NULL;
     ++incarnation_counter_;
     // To be clean, break apart the retired worker list (though we leak them).
     while (first_retired_thread_data_) {
       ThreadData* thread_data = first_retired_thread_data_;
       first_retired_thread_data_ = thread_data->next_retired_thread_data_;
       thread_data->next_retired_thread_data_ = nullptr;
     }
   }

   // Put most global static back in pristine shape.
   cleanup_count_ = 0;
   tls_index_.Set(NULL);
   // Almost UNINITIALIZED.
   base::subtle::Release_Store(&status_, DORMANT_DURING_TESTS);

   // To avoid any chance of racing in unit tests, which is the only place we
   // call this function, we may sometimes leak all the data structures we
   // recovered, as they may still be in use on threads from prior tests!
   if (leak) {
     ThreadData* thread_data = thread_data_list;
     while (thread_data) {
       ANNOTATE_LEAKING_OBJECT_PTR(thread_data);
       thread_data = thread_data->next();
     }
     return;
   }

   // When we want to cleanup (on a single thread), here is what we do.

   // Do actual recursive delete in all ThreadData instances.
   while (thread_data_list) {
     ThreadData* next_thread_data = thread_data_list;
     thread_data_list = thread_data_list->next();

     for (BirthMap::iterator it = next_thread_data->birth_map_.begin();
          next_thread_data->birth_map_.end() != it; ++it)
       delete it->second;  // Delete the Birth Records.
     delete next_thread_data;  // Includes all Death Records.
   }
 }

 // static
 ThreadData* ThreadData::GetRetiredOrCreateThreadData(
     const std::string& sanitized_thread_name) {
   SCOPED_UMA_HISTOGRAM_TIMER("TrackedObjects.GetRetiredOrCreateThreadData");

   {
     base::AutoLock lock(*list_lock_.Pointer());
     ThreadData** pcursor = &first_retired_thread_data_;
     ThreadData* cursor = first_retired_thread_data_;

     // Assuming that there aren't more than a few tens of retired ThreadData
     // instances, this lookup should be quick compared to the thread creation
     // time. Retired ThreadData instances cannot be stored in a map because
     // insertions are done from OnThreadTerminationCleanup() where allocations
     // are not allowed.
     //
     // Note: Test processes may have more than a few tens of retired ThreadData
     // instances.
     while (cursor) {
       if (cursor->sanitized_thread_name() == sanitized_thread_name) {
         DCHECK_EQ(*pcursor, cursor);
         *pcursor = cursor->next_retired_thread_data_;
         cursor->next_retired_thread_data_ = nullptr;
         return cursor;
       }
       pcursor = &cursor->next_retired_thread_data_;
       cursor = cursor->next_retired_thread_data_;
     }
   }

   return new ThreadData(sanitized_thread_name);
 }

 //------------------------------------------------------------------------------
 TaskStopwatch::TaskStopwatch()
     : wallclock_duration_(),
       current_thread_data_(NULL),
       excluded_duration_(),
       parent_(NULL) {
 #if DCHECK_IS_ON()
   state_ = CREATED;
   child_ = NULL;
 #endif
 #if BUILDFLAG(USE_ALLOCATOR_SHIM)
   heap_tracking_enabled_ =
       base::debug::ThreadHeapUsageTracker::IsHeapTrackingEnabled();
 #endif
 }

 TaskStopwatch::~TaskStopwatch() {
 #if DCHECK_IS_ON()
   DCHECK(state_ != RUNNING);
   DCHECK(child_ == NULL);
 #endif
 }

 void TaskStopwatch::Start() {
 #if DCHECK_IS_ON()
   DCHECK(state_ == CREATED);
   state_ = RUNNING;
 #endif

   start_time_ = ThreadData::Now();
 #if BUILDFLAG(USE_ALLOCATOR_SHIM)
   if (heap_tracking_enabled_)
     heap_usage_.Start();
 #endif

   current_thread_data_ = ThreadData::Get();
   if (!current_thread_data_)
     return;

   parent_ = current_thread_data_->current_stopwatch_;
 #if DCHECK_IS_ON()
   if (parent_) {
     DCHECK(parent_->state_ == RUNNING);
     DCHECK(parent_->child_ == NULL);
     parent_->child_ = this;
   }
 #endif
   current_thread_data_->current_stopwatch_ = this;
 }

 void TaskStopwatch::Stop() {
   const base::TimeTicks end_time = ThreadData::Now();
 #if DCHECK_IS_ON()
   DCHECK(state_ == RUNNING);
   state_ = STOPPED;
   DCHECK(child_ == NULL);
 #endif
 #if BUILDFLAG(USE_ALLOCATOR_SHIM)
   if (heap_tracking_enabled_)
     heap_usage_.Stop(true);
 #endif

   if (!start_time_.is_null() && !end_time.is_null()) {
     wallclock_duration_ = end_time - start_time_;
   }

   if (!current_thread_data_)
     return;

   DCHECK(current_thread_data_->current_stopwatch_ == this);
   current_thread_data_->current_stopwatch_ = parent_;
   if (!parent_)
     return;

 #if DCHECK_IS_ON()
   DCHECK(parent_->state_ == RUNNING);
   DCHECK(parent_->child_ == this);
   parent_->child_ = NULL;
 #endif
   parent_->excluded_duration_ += wallclock_duration_;
   parent_ = NULL;
 }

 base::TimeTicks TaskStopwatch::StartTime() const {
 #if DCHECK_IS_ON()
   DCHECK(state_ != CREATED);
 #endif

   return start_time_;
 }

 base::TimeDelta TaskStopwatch::RunDuration() const {
 #if DCHECK_IS_ON()
   DCHECK(state_ == STOPPED);
 #endif

   return wallclock_duration_ - excluded_duration_;
 }

 ThreadData* TaskStopwatch::GetThreadData() const {
 #if DCHECK_IS_ON()
   DCHECK(state_ != CREATED);
 #endif

   return current_thread_data_;
 }

 //------------------------------------------------------------------------------
 // DeathDataPhaseSnapshot

 DeathDataPhaseSnapshot::DeathDataPhaseSnapshot(
     int profiling_phase,
     const DeathData& death,
     const DeathDataPhaseSnapshot* prev)
     : profiling_phase(profiling_phase), death_data(death), prev(prev) {}

 //------------------------------------------------------------------------------
 // TaskSnapshot

 TaskSnapshot::TaskSnapshot() {
 }

 TaskSnapshot::TaskSnapshot(const BirthOnThreadSnapshot& birth,
                            const DeathDataSnapshot& death_data,
                            const std::string& death_sanitized_thread_name)
     : birth(birth),
       death_data(death_data),
       death_sanitized_thread_name(death_sanitized_thread_name) {}

 TaskSnapshot::~TaskSnapshot() {
 }

 //------------------------------------------------------------------------------
 // ProcessDataPhaseSnapshot

 ProcessDataPhaseSnapshot::ProcessDataPhaseSnapshot() {
 }

 ProcessDataPhaseSnapshot::ProcessDataPhaseSnapshot(
     const ProcessDataPhaseSnapshot& other) = default;

 ProcessDataPhaseSnapshot::~ProcessDataPhaseSnapshot() {
 }

 //------------------------------------------------------------------------------
 // ProcessDataPhaseSnapshot

 ProcessDataSnapshot::ProcessDataSnapshot()
 #if !defined(OS_NACL)
     : process_id(base::GetCurrentProcId()) {
 #else
     : process_id(base::kNullProcessId) {
 #endif
 }

 ProcessDataSnapshot::ProcessDataSnapshot(const ProcessDataSnapshot& other) =
     default;

 ProcessDataSnapshot::~ProcessDataSnapshot() {
 }

 }  // namespace tracked_objects