tools/memory/partition_allocator/pa_tcache_inspect.cc - chromium/src - Git at Google

 // Copyright 2021 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 // Connects to a running Chrome process, and outputs statistics about its thread
 // caches.

 #include <fcntl.h>
 #include <signal.h>
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <unistd.h>
 #include <algorithm>
 #include <cstring>
 #include <ios>
 #include <iostream>
 #include <map>
 #include <string>
 #include <vector>

 #include "base/allocator/partition_allocator/partition_root.h"
 #include "base/allocator/partition_allocator/thread_cache.h"

 #include "base/check_op.h"
 #include "base/command_line.h"
 #include "base/debug/proc_maps_linux.h"
 #include "base/files/file.h"
 #include "base/files/file_enumerator.h"
 #include "base/files/scoped_file.h"
 #include "base/json/json_writer.h"
 #include "base/logging.h"
 #include "base/posix/eintr_wrapper.h"
 #include "base/strings/string_number_conversions.h"
 #include "base/strings/stringprintf.h"
 #include "base/thread_annotations.h"
 #include "base/time/time.h"
 #include "base/values.h"
 #include "build/build_config.h"
 #include "third_party/abseil-cpp/absl/types/optional.h"
 #include "tools/memory/partition_allocator/inspect_utils.h"

 namespace partition_alloc::internal::tools {
 namespace {

 // Scans the process memory to look for the thread cache registry address. This
 // does not need symbols.
 uintptr_t FindThreadCacheRegistry(pid_t pid, int mem_fd) {
   return IndexThreadCacheNeedleArray(pid, mem_fd, 1);
 }

 // Allows to access an object copied from remote memory "as if" it were
 // local. Of course, dereferencing any pointer from within it will at best
 // fault.
 template <typename T>
 class RawBuffer {
  public:
   RawBuffer() = default;
   const T* get() const { return reinterpret_cast<const T*>(buffer_); }
   char* get_buffer() { return buffer_; }

   static absl::optional<RawBuffer<T>> ReadFromMemFd(int mem_fd,
                                                     uintptr_t address) {
     RawBuffer<T> buf;
     bool ok = ReadMemory(mem_fd, reinterpret_cast<unsigned long>(address),
                          sizeof(T), buf.get_buffer());
     if (!ok)
       return absl::nullopt;

     return {buf};
   }

  private:
   alignas(T) char buffer_[sizeof(T)];
 };

 // List all thread names for a given PID.
 std::map<base::PlatformThreadId, std::string> ThreadNames(pid_t pid) {
   std::map<base::PlatformThreadId, std::string> result;

   base::FilePath root_path =
       base::FilePath(base::StringPrintf("/proc/%d/task", pid));
   base::FileEnumerator enumerator{root_path, false,
                                   base::FileEnumerator::DIRECTORIES};

   for (base::FilePath path = enumerator.Next(); !path.empty();
        path = enumerator.Next()) {
     auto stat_path = path.Append("stat");
     base::File stat_file{stat_path,
                          base::File::FLAG_OPEN | base::File::FLAG_READ};
     if (!stat_file.IsValid()) {
       LOG(WARNING) << "Invalid file: " << stat_path.value();
       continue;
     }

     char buffer[4096 + 1];
     int bytes_read = stat_file.ReadAtCurrentPos(buffer, 4096);
     if (bytes_read <= 0)
       continue;
     buffer[bytes_read] = '\0';

     int process_id, ppid, pgrp;
     char name[256];
     char state;
     sscanf(buffer, "%d %s %c %d %d", &process_id, name, &state, &ppid, &pgrp);
     result[base::PlatformThreadId(process_id)] = std::string(name);
   }

   return result;
 }

 }  // namespace

 class ThreadCacheInspector {
  public:
   // Distinct from ThreadCache::Bucket because |count| is uint8_t.
   struct BucketStats {
     int count = 0;
     int per_thread_limit = 0;
     size_t size = 0;
   };

   ThreadCacheInspector(uintptr_t registry_addr, int mem_fd, pid_t pid);
   bool GetAllThreadCaches();
   size_t CachedMemory() const;
   uintptr_t GetRootAddress();

   const std::vector<RawBuffer<base::internal::ThreadCache>>& thread_caches()
       const {
     return thread_caches_;
   }

   static bool should_purge(
       const RawBuffer<base::internal::ThreadCache>& tcache) {
     return tcache.get()->should_purge_;
   }

   std::vector<BucketStats> AccumulateThreadCacheBuckets();
   std::uint8_t largest_active_bucket_index() {
     return registry_.get()->largest_active_bucket_index_;
   }

  private:
   uintptr_t registry_addr_;
   int mem_fd_;
   pid_t pid_;
   RawBuffer<base::internal::ThreadCacheRegistry> registry_;
   std::vector<RawBuffer<base::internal::ThreadCache>> thread_caches_;
 };

 class PartitionRootInspector {
  public:
   struct BucketStats {
     size_t slot_size = 0;
     size_t allocated_slots = 0;
     size_t freelist_size = 0;

     base::internal::PartitionBucket<base::internal::ThreadSafe> bucket;
     std::vector<size_t> freelist_sizes;
     // Flattened versions of the lists.
     std::vector<SlotSpanMetadata<ThreadSafe>> active_slot_spans;
     std::vector<SlotSpanMetadata<ThreadSafe>> empty_slot_spans;
     std::vector<SlotSpanMetadata<ThreadSafe>> decommitted_slot_spans;
   };

   PartitionRootInspector(uintptr_t root_addr, int mem_fd, pid_t pid)
       : root_addr_(root_addr), mem_fd_(mem_fd), pid_(pid) {}
   // Returns true for success.
   bool GatherStatistics();
   const std::vector<BucketStats>& bucket_stats() const { return bucket_stats_; }
   const PartitionRoot<base::internal::ThreadSafe>* root() {
     return root_.get();
   }

  private:
   void Update();

   uintptr_t root_addr_;
   int mem_fd_;
   pid_t pid_;
   RawBuffer<PartitionRoot<base::internal::ThreadSafe>> root_;
   std::vector<BucketStats> bucket_stats_;
 };

 ThreadCacheInspector::ThreadCacheInspector(uintptr_t registry_addr,
                                            int mem_fd,
                                            pid_t pid)
     : registry_addr_(registry_addr), mem_fd_(mem_fd), pid_(pid) {}

 // NO_THREAD_SAFETY_ANALYSIS: Well, reading a running process' memory is not
 // really thread-safe.
 bool ThreadCacheInspector::GetAllThreadCaches() NO_THREAD_SAFETY_ANALYSIS {
   thread_caches_.clear();

   // This is going to take a while, make sure that the metadata don't change.
   ScopedSigStopper stopper{pid_};

   auto registry = RawBuffer<base::internal::ThreadCacheRegistry>::ReadFromMemFd(
       mem_fd_, registry_addr_);
   if (!registry.has_value())
     return false;

   registry_ = *registry;
   base::internal::ThreadCache* head = registry_.get()->list_head_;
   while (head) {
     auto tcache = RawBuffer<base::internal::ThreadCache>::ReadFromMemFd(
         mem_fd_, reinterpret_cast<uintptr_t>(head));
     if (!tcache.has_value()) {
       LOG(WARNING) << "Failed to read a ThreadCache";
       return false;
     }
     thread_caches_.push_back(tcache.value());
     head = tcache->get()->next_;
   }
   return true;
 }

 size_t ThreadCacheInspector::CachedMemory() const {
   size_t total_memory = 0;

   for (auto& tcache : thread_caches_) {
     size_t cached_memory = tcache.get()->CachedMemory();
     total_memory += cached_memory;
   }

   return total_memory;
 }

 uintptr_t ThreadCacheInspector::GetRootAddress() {
   CHECK(!thread_caches_.empty());
   return reinterpret_cast<uintptr_t>(thread_caches_[0].get()->root_);
 }

 std::vector<ThreadCacheInspector::BucketStats>
 ThreadCacheInspector::AccumulateThreadCacheBuckets() {
   std::vector<BucketStats> result(base::internal::ThreadCache::kBucketCount);
   for (auto& tcache : thread_caches_) {
     for (int i = 0; i < base::internal::ThreadCache::kBucketCount; i++) {
       result[i].count += tcache.get()->buckets_[i].count;
       result[i].per_thread_limit = tcache.get()->buckets_[i].limit;
     }
   }

   base::internal::BucketIndexLookup lookup{};
   for (int i = 0; i < base::internal::ThreadCache::kBucketCount; i++) {
     result[i].size = lookup.bucket_sizes()[i];
   }
   return result;
 }

 void PartitionRootInspector::Update() {
   auto root =
       RawBuffer<PartitionRoot<base::internal::ThreadSafe>>::ReadFromMemFd(
           mem_fd_, root_addr_);
   if (root.has_value())
     root_ = *root;
 }

 namespace {

 bool CopySlotSpanList(
     std::vector<base::internal::SlotSpanMetadata<base::internal::ThreadSafe>>&
         list,
     uintptr_t head_address,
     int mem_fd) {
   absl::optional<RawBuffer<base::internal::SlotSpanMetadata<ThreadSafe>>>
       metadata;
   for (uintptr_t slot_span_address = head_address; slot_span_address;
        slot_span_address =
            reinterpret_cast<uintptr_t>(metadata->get()->next_slot_span)) {
     metadata = RawBuffer<base::internal::SlotSpanMetadata<
         base::internal::ThreadSafe>>::ReadFromMemFd(mem_fd, slot_span_address);
     if (!metadata.has_value())
       return false;
     list.push_back(*metadata->get());
   }

   return true;
 }

 }  // namespace

 bool PartitionRootInspector::GatherStatistics() {
   // This is going to take a while, make sure that the metadata don't change.
   ScopedSigStopper stopper{pid_};

   Update();
   bucket_stats_.clear();

   for (auto& bucket : root_.get()->buckets) {
     BucketStats stats;
     stats.slot_size = bucket.slot_size;
     stats.bucket = bucket;

     // Only look at the small buckets.
     if (bucket.slot_size > 4096)
       return true;

     bool ok = CopySlotSpanList(
         stats.active_slot_spans,
         reinterpret_cast<uintptr_t>(bucket.active_slot_spans_head), mem_fd_);
     if (!ok)
       return false;

     ok = CopySlotSpanList(
         stats.empty_slot_spans,
         reinterpret_cast<uintptr_t>(bucket.empty_slot_spans_head), mem_fd_);
     if (!ok)
       return false;

     ok = CopySlotSpanList(
         stats.decommitted_slot_spans,
         reinterpret_cast<uintptr_t>(bucket.decommitted_slot_spans_head),
         mem_fd_);
     if (!ok)
       return false;

     for (const auto& active_slot_span : stats.active_slot_spans) {
       uint16_t allocated_slots = active_slot_span.num_allocated_slots;

       stats.allocated_slots += allocated_slots;
       size_t allocated_unprovisioned = active_slot_span.num_allocated_slots +
                                        active_slot_span.num_unprovisioned_slots;
       // Inconsistent data. This can happen since we stopped the process at an
       // arbitrary point.
       if (allocated_unprovisioned > bucket.get_slots_per_span())
         return false;

       size_t freelist_size =
           bucket.get_slots_per_span() - allocated_unprovisioned;

       stats.freelist_size += freelist_size;
       stats.freelist_sizes.push_back(freelist_size);
     }

     // Full slot spans are not in any list.
     stats.allocated_slots +=
         bucket.num_full_slot_spans * bucket.get_slots_per_span();
     bucket_stats_.push_back(stats);
   }

   // We should have found at least one bucket too large, and returned earlier.
   return false;
 }

 void DisplayBucket(const ThreadCacheInspector::BucketStats& bucket,
                    bool is_limit) {
   size_t bucket_memory = bucket.size * bucket.count;

   std::string line = base::StringPrintf(
       "% 4d\t% 4d\t% 4d\t% 4dkiB", static_cast<int>(bucket.size),
       static_cast<int>(bucket.per_thread_limit), static_cast<int>(bucket.count),
       static_cast<int>(bucket_memory / 1024));

   std::cout << (is_limit ? "*" : " ") << line;
 }

 void DisplayPerThreadData(
     ThreadCacheInspector& inspector,
     std::map<base::PlatformThreadId, std::string>& tid_to_name) {
   std::cout << "Found " << inspector.thread_caches().size()
             << " caches, total cached memory = "
             << inspector.CachedMemory() / 1024 << "kiB"
             << "\n";

   std::cout << "Per thread:\n"
             << "Thread Name         Size\tPurge\n"
             << std::string(80, '-') << "\n";
   base::ThreadCacheStats all_threads_stats = {0};
   for (const auto& tcache : inspector.thread_caches()) {
     base::ThreadCacheStats stats = {0};
     // No alloc stats, they reach into tcache->root_, which is not valid.
     tcache.get()->AccumulateStats(&stats);
     tcache.get()->AccumulateStats(&all_threads_stats);
     uint64_t count = stats.alloc_count;
     uint64_t hit_rate = (100 * stats.alloc_hits) / count;
     uint64_t too_large = (100 * stats.alloc_miss_too_large) / count;
     uint64_t empty = (100 * stats.alloc_miss_empty) / count;

     std::string thread_name = tid_to_name[tcache.get()->thread_id()];
     std::string padding(20 - thread_name.size(), ' ');
     std::cout << thread_name << padding << tcache.get()->CachedMemory() / 1024
               << "kiB\t" << (inspector.should_purge(tcache) ? 'X' : ' ')
               << "\tHit Rate = " << hit_rate << "%"
               << "\tToo Large = " << too_large << "%"
               << "\tEmpty = " << empty << "%"
               << "\t Count = " << count / 1000 << "k"
               << "\n";
   }

   uint64_t count = all_threads_stats.alloc_count;
   uint64_t hit_rate = (100 * all_threads_stats.alloc_hits) / count;
   uint64_t too_large = (100 * all_threads_stats.alloc_miss_too_large) / count;
   uint64_t empty = (100 * all_threads_stats.alloc_miss_empty) / count;
   std::cout << "\nALL THREADS:        "
             << all_threads_stats.bucket_total_memory / 1024 << "kiB"
             << "\t\tHit Rate = " << hit_rate << "%"
             << "\tToo Large = " << too_large << "%"
             << "\tEmpty = " << empty << "%"
             << "\t Count = " << count / 1000 << "k"
             << "\n";
 }

 void DisplayPerBucketData(ThreadCacheInspector& inspector) {
   std::cout << "Per-bucket stats (All Threads):"
             << "\nSize\tLimit\tCount\tMemory\t| Size\t\tLimit\tCount\tMemory\n"
             << std::string(80, '-') << "\n";

   size_t total_memory = 0;
   auto bucket_stats = inspector.AccumulateThreadCacheBuckets();
   for (size_t index = 0; index < bucket_stats.size() / 2; index++) {
     size_t bucket_index = index;
     auto& bucket = bucket_stats[bucket_index];
     total_memory += bucket.size * bucket.count;
     DisplayBucket(bucket,
                   inspector.largest_active_bucket_index() == bucket_index);

     std::cout << "\t| ";

     bucket_index = bucket_stats.size() / 2 + index;
     bucket = bucket_stats[bucket_index];
     total_memory += bucket.size * bucket.count;
     DisplayBucket(bucket_stats[bucket_index],
                   inspector.largest_active_bucket_index() == bucket_index);

     std::cout << "\n";
   }
   std::cout << "\nALL THREADS TOTAL: " << total_memory / 1024 << "kiB\n";
 }

 void DisplayRootData(PartitionRootInspector& root_inspector,
                      size_t detailed_bucket_index) {
   std::cout << "Per-bucket size / allocated slots / free slots / slot span "
                "count:\n";
   for (size_t i = 0; i < root_inspector.bucket_stats().size(); i++) {
     const auto& bucket_stats = root_inspector.bucket_stats()[i];

     std::string line = base::StringPrintf(
         "|% 5d % 6d % 6d % 4d|", static_cast<int>(bucket_stats.slot_size),
         static_cast<int>(bucket_stats.allocated_slots),
         static_cast<int>(bucket_stats.freelist_size),
         static_cast<int>(bucket_stats.active_slot_spans.size()));

     std::cout << line;
     if (i % 4 == 3)
       std::cout << "\n";
     else
       std::cout << "\t";
   }

   const auto& bucket_stats =
       root_inspector.bucket_stats()[detailed_bucket_index];
   std::cout << "\nFreelist size for active buckets of size = "
             << bucket_stats.slot_size << "\n";
   for (size_t freelist_size : bucket_stats.freelist_sizes)
     std::cout << freelist_size << " ";
   std::cout << "\n";

   auto* root = root_inspector.root();
   uint64_t syscall_count = root->syscall_count.load(std::memory_order_relaxed);
   uint64_t total_duration_ms =
       root->syscall_total_time_ns.load(std::memory_order_relaxed) / 1e6;

   uint64_t virtual_size =
       root->total_size_of_super_pages.load(std::memory_order_relaxed) +
       root->total_size_of_direct_mapped_pages.load(std::memory_order_relaxed);

   std::cout
       << "\n\nSyscall count = " << syscall_count
       << "\tTotal duration = " << total_duration_ms << "ms\n"
       << "Max committed size = "
       << root->max_size_of_committed_pages.load(std::memory_order_relaxed) /
              1024
       << "kiB\n"
       << "Allocated/Committed/Virtual = "
       << root->get_total_size_of_allocated_bytes() / 1024 << " / "
       << root->total_size_of_committed_pages.load(std::memory_order_relaxed) /
              1024
       << " / " << virtual_size / 1024 << " kiB\n";
   std::cout << "\nEmpty Slot Spans Dirty Size = "
             << TS_UNCHECKED_READ(root->empty_slot_spans_dirty_bytes) / 1024
             << "kiB";
 }

 base::Value Dump(PartitionRootInspector& root_inspector) {
   auto slot_span_to_value =
       [](const SlotSpanMetadata<ThreadSafe>& slot_span) -> base::Value {
     auto result = base::Value(base::Value::Type::DICTIONARY);

     result.SetKey("num_allocated_slots",
                   base::Value{slot_span.num_allocated_slots});
     result.SetKey("num_unprovisioned_slots",
                   base::Value{slot_span.num_unprovisioned_slots});
     return result;
   };

   auto bucket_to_value =
       [&](const PartitionRootInspector::BucketStats& stats) -> base::Value {
     auto result = base::Value(base::Value::Type::DICTIONARY);

     result.SetKey("slot_size", base::Value{static_cast<int>(stats.slot_size)});
     result.SetKey("num_system_pages_per_slot_span",
                   base::Value{stats.bucket.num_system_pages_per_slot_span});
     result.SetKey("num_full_slot_spans",
                   base::Value{stats.bucket.num_full_slot_spans});
     result.SetKey("allocated_slots",
                   base::Value{static_cast<int>(stats.allocated_slots)});
     result.SetKey("freelist_size",
                   base::Value{static_cast<int>(stats.freelist_size)});

     auto active_list = base::Value(base::Value::Type::LIST);
     for (auto& slot_span : stats.active_slot_spans) {
       active_list.Append(slot_span_to_value(slot_span));
     }
     result.SetKey("active_slot_spans", std::move(active_list));

     auto empty_list = base::Value(base::Value::Type::LIST);
     for (auto& slot_span : stats.empty_slot_spans) {
       empty_list.Append(slot_span_to_value(slot_span));
     }
     result.SetKey("empty_slot_spans", std::move(empty_list));

     auto decommitted_list = base::Value(base::Value::Type::LIST);
     for (auto& slot_span : stats.decommitted_slot_spans) {
       decommitted_list.Append(slot_span_to_value(slot_span));
     }
     result.SetKey("decommitted_slot_spans", std::move(decommitted_list));

     return result;
   };

   auto result = base::Value(base::Value::Type::DICTIONARY);

   auto bucket_stats = base::Value(base::Value::Type::LIST);
   for (const auto& stats : root_inspector.bucket_stats()) {
     bucket_stats.Append(bucket_to_value(stats));
   }

   result.SetKey("buckets", std::move(bucket_stats));
   return result;
 }
 }  // namespace partition_alloc::internal::tools

 int main(int argc, char** argv) {
   base::CommandLine::Init(argc, argv);

   if (!base::CommandLine::ForCurrentProcess()->HasSwitch("pid")) {
     LOG(ERROR) << "Usage:" << argv[0] << " --pid=<PID> [--json=<FILENAME>]";
     return 1;
   }

   int pid = atoi(base::CommandLine::ForCurrentProcess()
                      ->GetSwitchValueASCII("pid")
                      .c_str());
   LOG(WARNING) << "PID = " << pid;

   base::FilePath json_filename =
       base::CommandLine::ForCurrentProcess()->GetSwitchValuePath("json");

   auto mem_fd = partition_alloc::internal::tools::OpenProcMem(pid);
   // Scan the memory.
   uintptr_t registry_address =
       partition_alloc::internal::tools::FindThreadCacheRegistry(pid,
                                                                 mem_fd.get());
   CHECK(registry_address);

   LOG(INFO) << "Getting the thread cache registry";
   partition_alloc::internal::tools::ThreadCacheInspector thread_cache_inspector{
       registry_address, mem_fd.get(), pid};
   std::map<base::PlatformThreadId, std::string> tid_to_name;

   size_t iter = 0;
   while (true) {
     constexpr const char* kClearScreen = "\033[2J\033[1;1H";
     std::cout << kClearScreen;
     std::cout.flush();

     base::TimeTicks tick = base::TimeTicks::Now();
     bool ok = thread_cache_inspector.GetAllThreadCaches();
     if (!ok)
       continue;

     partition_alloc::internal::tools::PartitionRootInspector root_inspector{
         thread_cache_inspector.GetRootAddress(), mem_fd.get(), pid};
     bool has_bucket_stats = root_inspector.GatherStatistics();

     for (const auto& tcache : thread_cache_inspector.thread_caches()) {
       // Note: this is not robust when TIDs are reused, but here this is fine,
       // as at worst we would display wrong data, and TID reuse is very unlikely
       // in normal scenarios.
       if (tid_to_name.find(tcache.get()->thread_id()) == tid_to_name.end()) {
         tid_to_name = partition_alloc::internal::tools::ThreadNames(pid);
         break;
       }
     }
     int64_t gather_time_ms = (base::TimeTicks::Now() - tick).InMilliseconds();

     std::cout << "Time to gather data = " << gather_time_ms << "ms\n";
     DisplayPerThreadData(thread_cache_inspector, tid_to_name);

     std::cout << "\n\n";
     DisplayPerBucketData(thread_cache_inspector);

     if (has_bucket_stats) {
       std::cout << "\n\n";
       DisplayRootData(root_inspector,
                       (iter / 50) % root_inspector.bucket_stats().size());

       if (!json_filename.empty()) {
         base::Value dump = Dump(root_inspector);
         std::string json_string;
         ok = base::JSONWriter::WriteWithOptions(
             dump, base::JSONWriter::Options::OPTIONS_PRETTY_PRINT,
             &json_string);
         if (ok) {
           auto f =
               base::File(json_filename, base::File::Flags::FLAG_OPEN_ALWAYS |
                                             base::File::Flags::FLAG_WRITE);
           if (f.IsValid()) {
             f.WriteAtCurrentPos(json_string.c_str(), json_string.size());
             std::cout << "\n\nDumped JSON to " << json_filename << std::endl;
             return 0;
           }
         }
         std::cout << "\n\nFailed to dump JSON to " << json_filename
                   << std::endl;
         return 1;
       }
     }

     std::cout << std::endl;
     usleep(200'000);
     iter++;
   }
 }
	// Copyright 2021 The Chromium Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	// Connects to a running Chrome process, and outputs statistics about its thread
	// caches.

	#include <fcntl.h>
	#include <signal.h>
	#include <sys/stat.h>
	#include <sys/types.h>
	#include <unistd.h>
	#include <algorithm>
	#include <cstring>
	#include <ios>
	#include <iostream>
	#include <map>
	#include <string>
	#include <vector>

	#include "base/allocator/partition_allocator/partition_root.h"
	#include "base/allocator/partition_allocator/thread_cache.h"

	#include "base/check_op.h"
	#include "base/command_line.h"
	#include "base/debug/proc_maps_linux.h"
	#include "base/files/file.h"
	#include "base/files/file_enumerator.h"
	#include "base/files/scoped_file.h"
	#include "base/json/json_writer.h"
	#include "base/logging.h"
	#include "base/posix/eintr_wrapper.h"
	#include "base/strings/string_number_conversions.h"
	#include "base/strings/stringprintf.h"
	#include "base/thread_annotations.h"
	#include "base/time/time.h"
	#include "base/values.h"
	#include "build/build_config.h"
	#include "third_party/abseil-cpp/absl/types/optional.h"
	#include "tools/memory/partition_allocator/inspect_utils.h"

	namespace partition_alloc::internal::tools {
	namespace {

	// Scans the process memory to look for the thread cache registry address. This
	// does not need symbols.
	uintptr_t FindThreadCacheRegistry(pid_t pid, int mem_fd) {
	return IndexThreadCacheNeedleArray(pid, mem_fd, 1);
	}

	// Allows to access an object copied from remote memory "as if" it were
	// local. Of course, dereferencing any pointer from within it will at best
	// fault.
	template <typename T>
	class RawBuffer {
	public:
	RawBuffer() = default;
	const T* get() const { return reinterpret_cast<const T*>(buffer_); }
	char* get_buffer() { return buffer_; }

	static absl::optional<RawBuffer<T>> ReadFromMemFd(int mem_fd,
	uintptr_t address) {
	RawBuffer<T> buf;
	bool ok = ReadMemory(mem_fd, reinterpret_cast<unsigned long>(address),
	sizeof(T), buf.get_buffer());
	if (!ok)
	return absl::nullopt;

	return {buf};
	}

	private:
	alignas(T) char buffer_[sizeof(T)];
	};

	// List all thread names for a given PID.
	std::map<base::PlatformThreadId, std::string> ThreadNames(pid_t pid) {
	std::map<base::PlatformThreadId, std::string> result;

	base::FilePath root_path =
	base::FilePath(base::StringPrintf("/proc/%d/task", pid));
	base::FileEnumerator enumerator{root_path, false,
	base::FileEnumerator::DIRECTORIES};

	for (base::FilePath path = enumerator.Next(); !path.empty();
	path = enumerator.Next()) {
	auto stat_path = path.Append("stat");
	base::File stat_file{stat_path,
	base::File::FLAG_OPEN \| base::File::FLAG_READ};
	if (!stat_file.IsValid()) {
	LOG(WARNING) << "Invalid file: " << stat_path.value();
	continue;
	}

	char buffer[4096 + 1];
	int bytes_read = stat_file.ReadAtCurrentPos(buffer, 4096);
	if (bytes_read <= 0)
	continue;
	buffer[bytes_read] = '\0';

	int process_id, ppid, pgrp;
	char name[256];
	char state;
	sscanf(buffer, "%d %s %c %d %d", &process_id, name, &state, &ppid, &pgrp);
	result[base::PlatformThreadId(process_id)] = std::string(name);
	}

	return result;
	}

	} // namespace

	class ThreadCacheInspector {
	public:
	// Distinct from ThreadCache::Bucket because \|count\| is uint8_t.
	struct BucketStats {
	int count = 0;
	int per_thread_limit = 0;
	size_t size = 0;
	};

	ThreadCacheInspector(uintptr_t registry_addr, int mem_fd, pid_t pid);
	bool GetAllThreadCaches();
	size_t CachedMemory() const;
	uintptr_t GetRootAddress();

	const std::vector<RawBuffer<base::internal::ThreadCache>>& thread_caches()
	const {
	return thread_caches_;
	}

	static bool should_purge(
	const RawBuffer<base::internal::ThreadCache>& tcache) {
	return tcache.get()->should_purge_;
	}

	std::vector<BucketStats> AccumulateThreadCacheBuckets();
	std::uint8_t largest_active_bucket_index() {
	return registry_.get()->largest_active_bucket_index_;
	}

	private:
	uintptr_t registry_addr_;
	int mem_fd_;
	pid_t pid_;
	RawBuffer<base::internal::ThreadCacheRegistry> registry_;
	std::vector<RawBuffer<base::internal::ThreadCache>> thread_caches_;
	};

	class PartitionRootInspector {
	public:
	struct BucketStats {
	size_t slot_size = 0;
	size_t allocated_slots = 0;
	size_t freelist_size = 0;

	base::internal::PartitionBucket<base::internal::ThreadSafe> bucket;
	std::vector<size_t> freelist_sizes;
	// Flattened versions of the lists.
	std::vector<SlotSpanMetadata<ThreadSafe>> active_slot_spans;
	std::vector<SlotSpanMetadata<ThreadSafe>> empty_slot_spans;
	std::vector<SlotSpanMetadata<ThreadSafe>> decommitted_slot_spans;
	};

	PartitionRootInspector(uintptr_t root_addr, int mem_fd, pid_t pid)
	: root_addr_(root_addr), mem_fd_(mem_fd), pid_(pid) {}
	// Returns true for success.
	bool GatherStatistics();
	const std::vector<BucketStats>& bucket_stats() const { return bucket_stats_; }
	const PartitionRoot<base::internal::ThreadSafe>* root() {
	return root_.get();
	}

	private:
	void Update();

	uintptr_t root_addr_;
	int mem_fd_;
	pid_t pid_;
	RawBuffer<PartitionRoot<base::internal::ThreadSafe>> root_;
	std::vector<BucketStats> bucket_stats_;
	};

	ThreadCacheInspector::ThreadCacheInspector(uintptr_t registry_addr,
	int mem_fd,
	pid_t pid)
	: registry_addr_(registry_addr), mem_fd_(mem_fd), pid_(pid) {}

	// NO_THREAD_SAFETY_ANALYSIS: Well, reading a running process' memory is not
	// really thread-safe.
	bool ThreadCacheInspector::GetAllThreadCaches() NO_THREAD_SAFETY_ANALYSIS {
	thread_caches_.clear();

	// This is going to take a while, make sure that the metadata don't change.
	ScopedSigStopper stopper{pid_};

	auto registry = RawBuffer<base::internal::ThreadCacheRegistry>::ReadFromMemFd(
	mem_fd_, registry_addr_);
	if (!registry.has_value())
	return false;

	registry_ = *registry;
	base::internal::ThreadCache* head = registry_.get()->list_head_;
	while (head) {
	auto tcache = RawBuffer<base::internal::ThreadCache>::ReadFromMemFd(
	mem_fd_, reinterpret_cast<uintptr_t>(head));
	if (!tcache.has_value()) {
	LOG(WARNING) << "Failed to read a ThreadCache";
	return false;
	}
	thread_caches_.push_back(tcache.value());
	head = tcache->get()->next_;
	}
	return true;
	}

	size_t ThreadCacheInspector::CachedMemory() const {
	size_t total_memory = 0;

	for (auto& tcache : thread_caches_) {
	size_t cached_memory = tcache.get()->CachedMemory();
	total_memory += cached_memory;
	}

	return total_memory;
	}

	uintptr_t ThreadCacheInspector::GetRootAddress() {
	CHECK(!thread_caches_.empty());
	return reinterpret_cast<uintptr_t>(thread_caches_[0].get()->root_);
	}

	std::vector<ThreadCacheInspector::BucketStats>
	ThreadCacheInspector::AccumulateThreadCacheBuckets() {
	std::vector<BucketStats> result(base::internal::ThreadCache::kBucketCount);
	for (auto& tcache : thread_caches_) {
	for (int i = 0; i < base::internal::ThreadCache::kBucketCount; i++) {
	result[i].count += tcache.get()->buckets_[i].count;
	result[i].per_thread_limit = tcache.get()->buckets_[i].limit;
	}
	}

	base::internal::BucketIndexLookup lookup{};
	for (int i = 0; i < base::internal::ThreadCache::kBucketCount; i++) {
	result[i].size = lookup.bucket_sizes()[i];
	}
	return result;
	}

	void PartitionRootInspector::Update() {
	auto root =
	RawBuffer<PartitionRoot<base::internal::ThreadSafe>>::ReadFromMemFd(
	mem_fd_, root_addr_);
	if (root.has_value())
	root_ = *root;
	}

	namespace {

	bool CopySlotSpanList(
	std::vector<base::internal::SlotSpanMetadata<base::internal::ThreadSafe>>&
	list,
	uintptr_t head_address,
	int mem_fd) {
	absl::optional<RawBuffer<base::internal::SlotSpanMetadata<ThreadSafe>>>
	metadata;
	for (uintptr_t slot_span_address = head_address; slot_span_address;
	slot_span_address =
	reinterpret_cast<uintptr_t>(metadata->get()->next_slot_span)) {
	metadata = RawBuffer<base::internal::SlotSpanMetadata<
	base::internal::ThreadSafe>>::ReadFromMemFd(mem_fd, slot_span_address);
	if (!metadata.has_value())
	return false;
	list.push_back(*metadata->get());
	}

	return true;
	}

	} // namespace

	bool PartitionRootInspector::GatherStatistics() {
	// This is going to take a while, make sure that the metadata don't change.
	ScopedSigStopper stopper{pid_};

	Update();
	bucket_stats_.clear();

	for (auto& bucket : root_.get()->buckets) {
	BucketStats stats;
	stats.slot_size = bucket.slot_size;
	stats.bucket = bucket;

	// Only look at the small buckets.
	if (bucket.slot_size > 4096)
	return true;

	bool ok = CopySlotSpanList(
	stats.active_slot_spans,
	reinterpret_cast<uintptr_t>(bucket.active_slot_spans_head), mem_fd_);
	if (!ok)
	return false;

	ok = CopySlotSpanList(
	stats.empty_slot_spans,
	reinterpret_cast<uintptr_t>(bucket.empty_slot_spans_head), mem_fd_);
	if (!ok)
	return false;

	ok = CopySlotSpanList(
	stats.decommitted_slot_spans,
	reinterpret_cast<uintptr_t>(bucket.decommitted_slot_spans_head),
	mem_fd_);
	if (!ok)
	return false;

	for (const auto& active_slot_span : stats.active_slot_spans) {
	uint16_t allocated_slots = active_slot_span.num_allocated_slots;

	stats.allocated_slots += allocated_slots;
	size_t allocated_unprovisioned = active_slot_span.num_allocated_slots +
	active_slot_span.num_unprovisioned_slots;
	// Inconsistent data. This can happen since we stopped the process at an
	// arbitrary point.
	if (allocated_unprovisioned > bucket.get_slots_per_span())
	return false;

	size_t freelist_size =
	bucket.get_slots_per_span() - allocated_unprovisioned;

	stats.freelist_size += freelist_size;
	stats.freelist_sizes.push_back(freelist_size);
	}

	// Full slot spans are not in any list.
	stats.allocated_slots +=
	bucket.num_full_slot_spans * bucket.get_slots_per_span();
	bucket_stats_.push_back(stats);
	}

	// We should have found at least one bucket too large, and returned earlier.
	return false;
	}

	void DisplayBucket(const ThreadCacheInspector::BucketStats& bucket,
	bool is_limit) {
	size_t bucket_memory = bucket.size * bucket.count;

	std::string line = base::StringPrintf(
	"% 4d\t% 4d\t% 4d\t% 4dkiB", static_cast<int>(bucket.size),
	static_cast<int>(bucket.per_thread_limit), static_cast<int>(bucket.count),
	static_cast<int>(bucket_memory / 1024));

	std::cout << (is_limit ? "*" : " ") << line;
	}

	void DisplayPerThreadData(
	ThreadCacheInspector& inspector,
	std::map<base::PlatformThreadId, std::string>& tid_to_name) {
	std::cout << "Found " << inspector.thread_caches().size()
	<< " caches, total cached memory = "
	<< inspector.CachedMemory() / 1024 << "kiB"
	<< "\n";

	std::cout << "Per thread:\n"
	<< "Thread Name Size\tPurge\n"
	<< std::string(80, '-') << "\n";
	base::ThreadCacheStats all_threads_stats = {0};
	for (const auto& tcache : inspector.thread_caches()) {
	base::ThreadCacheStats stats = {0};
	// No alloc stats, they reach into tcache->root_, which is not valid.
	tcache.get()->AccumulateStats(&stats);
	tcache.get()->AccumulateStats(&all_threads_stats);
	uint64_t count = stats.alloc_count;
	uint64_t hit_rate = (100 * stats.alloc_hits) / count;
	uint64_t too_large = (100 * stats.alloc_miss_too_large) / count;
	uint64_t empty = (100 * stats.alloc_miss_empty) / count;

	std::string thread_name = tid_to_name[tcache.get()->thread_id()];
	std::string padding(20 - thread_name.size(), ' ');
	std::cout << thread_name << padding << tcache.get()->CachedMemory() / 1024
	<< "kiB\t" << (inspector.should_purge(tcache) ? 'X' : ' ')
	<< "\tHit Rate = " << hit_rate << "%"
	<< "\tToo Large = " << too_large << "%"
	<< "\tEmpty = " << empty << "%"
	<< "\t Count = " << count / 1000 << "k"
	<< "\n";
	}

	uint64_t count = all_threads_stats.alloc_count;
	uint64_t hit_rate = (100 * all_threads_stats.alloc_hits) / count;
	uint64_t too_large = (100 * all_threads_stats.alloc_miss_too_large) / count;
	uint64_t empty = (100 * all_threads_stats.alloc_miss_empty) / count;
	std::cout << "\nALL THREADS: "
	<< all_threads_stats.bucket_total_memory / 1024 << "kiB"
	<< "\t\tHit Rate = " << hit_rate << "%"
	<< "\tToo Large = " << too_large << "%"
	<< "\tEmpty = " << empty << "%"
	<< "\t Count = " << count / 1000 << "k"
	<< "\n";
	}

	void DisplayPerBucketData(ThreadCacheInspector& inspector) {
	std::cout << "Per-bucket stats (All Threads):"
	<< "\nSize\tLimit\tCount\tMemory\t\| Size\t\tLimit\tCount\tMemory\n"
	<< std::string(80, '-') << "\n";

	size_t total_memory = 0;
	auto bucket_stats = inspector.AccumulateThreadCacheBuckets();
	for (size_t index = 0; index < bucket_stats.size() / 2; index++) {
	size_t bucket_index = index;
	auto& bucket = bucket_stats[bucket_index];
	total_memory += bucket.size * bucket.count;
	DisplayBucket(bucket,
	inspector.largest_active_bucket_index() == bucket_index);

	std::cout << "\t\| ";

	bucket_index = bucket_stats.size() / 2 + index;
	bucket = bucket_stats[bucket_index];
	total_memory += bucket.size * bucket.count;
	DisplayBucket(bucket_stats[bucket_index],
	inspector.largest_active_bucket_index() == bucket_index);

	std::cout << "\n";
	}
	std::cout << "\nALL THREADS TOTAL: " << total_memory / 1024 << "kiB\n";
	}

	void DisplayRootData(PartitionRootInspector& root_inspector,
	size_t detailed_bucket_index) {
	std::cout << "Per-bucket size / allocated slots / free slots / slot span "
	"count:\n";
	for (size_t i = 0; i < root_inspector.bucket_stats().size(); i++) {
	const auto& bucket_stats = root_inspector.bucket_stats()[i];

	std::string line = base::StringPrintf(
	"\|% 5d % 6d % 6d % 4d\|", static_cast<int>(bucket_stats.slot_size),
	static_cast<int>(bucket_stats.allocated_slots),
	static_cast<int>(bucket_stats.freelist_size),
	static_cast<int>(bucket_stats.active_slot_spans.size()));

	std::cout << line;
	if (i % 4 == 3)
	std::cout << "\n";
	else
	std::cout << "\t";
	}

	const auto& bucket_stats =
	root_inspector.bucket_stats()[detailed_bucket_index];
	std::cout << "\nFreelist size for active buckets of size = "
	<< bucket_stats.slot_size << "\n";
	for (size_t freelist_size : bucket_stats.freelist_sizes)
	std::cout << freelist_size << " ";
	std::cout << "\n";

	auto* root = root_inspector.root();
	uint64_t syscall_count = root->syscall_count.load(std::memory_order_relaxed);
	uint64_t total_duration_ms =
	root->syscall_total_time_ns.load(std::memory_order_relaxed) / 1e6;

	uint64_t virtual_size =
	root->total_size_of_super_pages.load(std::memory_order_relaxed) +
	root->total_size_of_direct_mapped_pages.load(std::memory_order_relaxed);

	std::cout
	<< "\n\nSyscall count = " << syscall_count
	<< "\tTotal duration = " << total_duration_ms << "ms\n"
	<< "Max committed size = "
	<< root->max_size_of_committed_pages.load(std::memory_order_relaxed) /
	1024
	<< "kiB\n"
	<< "Allocated/Committed/Virtual = "
	<< root->get_total_size_of_allocated_bytes() / 1024 << " / "
	<< root->total_size_of_committed_pages.load(std::memory_order_relaxed) /
	1024
	<< " / " << virtual_size / 1024 << " kiB\n";
	std::cout << "\nEmpty Slot Spans Dirty Size = "
	<< TS_UNCHECKED_READ(root->empty_slot_spans_dirty_bytes) / 1024
	<< "kiB";
	}

	base::Value Dump(PartitionRootInspector& root_inspector) {
	auto slot_span_to_value =
	[](const SlotSpanMetadata<ThreadSafe>& slot_span) -> base::Value {
	auto result = base::Value(base::Value::Type::DICTIONARY);

	result.SetKey("num_allocated_slots",
	base::Value{slot_span.num_allocated_slots});
	result.SetKey("num_unprovisioned_slots",
	base::Value{slot_span.num_unprovisioned_slots});
	return result;
	};

	auto bucket_to_value =
	[&](const PartitionRootInspector::BucketStats& stats) -> base::Value {
	auto result = base::Value(base::Value::Type::DICTIONARY);

	result.SetKey("slot_size", base::Value{static_cast<int>(stats.slot_size)});
	result.SetKey("num_system_pages_per_slot_span",
	base::Value{stats.bucket.num_system_pages_per_slot_span});
	result.SetKey("num_full_slot_spans",
	base::Value{stats.bucket.num_full_slot_spans});
	result.SetKey("allocated_slots",
	base::Value{static_cast<int>(stats.allocated_slots)});
	result.SetKey("freelist_size",
	base::Value{static_cast<int>(stats.freelist_size)});

	auto active_list = base::Value(base::Value::Type::LIST);
	for (auto& slot_span : stats.active_slot_spans) {
	active_list.Append(slot_span_to_value(slot_span));
	}
	result.SetKey("active_slot_spans", std::move(active_list));

	auto empty_list = base::Value(base::Value::Type::LIST);
	for (auto& slot_span : stats.empty_slot_spans) {
	empty_list.Append(slot_span_to_value(slot_span));
	}
	result.SetKey("empty_slot_spans", std::move(empty_list));

	auto decommitted_list = base::Value(base::Value::Type::LIST);
	for (auto& slot_span : stats.decommitted_slot_spans) {
	decommitted_list.Append(slot_span_to_value(slot_span));
	}
	result.SetKey("decommitted_slot_spans", std::move(decommitted_list));

	return result;
	};

	auto result = base::Value(base::Value::Type::DICTIONARY);

	auto bucket_stats = base::Value(base::Value::Type::LIST);
	for (const auto& stats : root_inspector.bucket_stats()) {
	bucket_stats.Append(bucket_to_value(stats));
	}

	result.SetKey("buckets", std::move(bucket_stats));
	return result;
	}
	} // namespace partition_alloc::internal::tools

	int main(int argc, char** argv) {
	base::CommandLine::Init(argc, argv);

	if (!base::CommandLine::ForCurrentProcess()->HasSwitch("pid")) {
	LOG(ERROR) << "Usage:" << argv[0] << " --pid=<PID> [--json=<FILENAME>]";
	return 1;
	}

	int pid = atoi(base::CommandLine::ForCurrentProcess()
	->GetSwitchValueASCII("pid")
	.c_str());
	LOG(WARNING) << "PID = " << pid;

	base::FilePath json_filename =
	base::CommandLine::ForCurrentProcess()->GetSwitchValuePath("json");

	auto mem_fd = partition_alloc::internal::tools::OpenProcMem(pid);
	// Scan the memory.
	uintptr_t registry_address =
	partition_alloc::internal::tools::FindThreadCacheRegistry(pid,
	mem_fd.get());
	CHECK(registry_address);

	LOG(INFO) << "Getting the thread cache registry";
	partition_alloc::internal::tools::ThreadCacheInspector thread_cache_inspector{
	registry_address, mem_fd.get(), pid};
	std::map<base::PlatformThreadId, std::string> tid_to_name;

	size_t iter = 0;
	while (true) {
	constexpr const char* kClearScreen = "\033[2J\033[1;1H";
	std::cout << kClearScreen;
	std::cout.flush();

	base::TimeTicks tick = base::TimeTicks::Now();
	bool ok = thread_cache_inspector.GetAllThreadCaches();
	if (!ok)
	continue;

	partition_alloc::internal::tools::PartitionRootInspector root_inspector{
	thread_cache_inspector.GetRootAddress(), mem_fd.get(), pid};
	bool has_bucket_stats = root_inspector.GatherStatistics();

	for (const auto& tcache : thread_cache_inspector.thread_caches()) {
	// Note: this is not robust when TIDs are reused, but here this is fine,
	// as at worst we would display wrong data, and TID reuse is very unlikely
	// in normal scenarios.
	if (tid_to_name.find(tcache.get()->thread_id()) == tid_to_name.end()) {
	tid_to_name = partition_alloc::internal::tools::ThreadNames(pid);
	break;
	}
	}
	int64_t gather_time_ms = (base::TimeTicks::Now() - tick).InMilliseconds();

	std::cout << "Time to gather data = " << gather_time_ms << "ms\n";
	DisplayPerThreadData(thread_cache_inspector, tid_to_name);

	std::cout << "\n\n";
	DisplayPerBucketData(thread_cache_inspector);

	if (has_bucket_stats) {
	std::cout << "\n\n";
	DisplayRootData(root_inspector,
	(iter / 50) % root_inspector.bucket_stats().size());

	if (!json_filename.empty()) {
	base::Value dump = Dump(root_inspector);
	std::string json_string;
	ok = base::JSONWriter::WriteWithOptions(
	dump, base::JSONWriter::Options::OPTIONS_PRETTY_PRINT,
	&json_string);
	if (ok) {
	auto f =
	base::File(json_filename, base::File::Flags::FLAG_OPEN_ALWAYS \|
	base::File::Flags::FLAG_WRITE);
	if (f.IsValid()) {
	f.WriteAtCurrentPos(json_string.c_str(), json_string.size());
	std::cout << "\n\nDumped JSON to " << json_filename << std::endl;
	return 0;
	}
	}
	std::cout << "\n\nFailed to dump JSON to " << json_filename
	<< std::endl;
	return 1;
	}
	}

	std::cout << std::endl;
	usleep(200'000);
	iter++;
	}
	}