[partition_alloc] Change alternate distribution to use 8 buckets.

This CL adds an alternate bucket distribution to chrome, which uses 8 buckets per order instead of the current 4. This is in addition to the current 2 bucket distributions we have (the default one and the alternate one added in https://chromium-review.googlesource.com/c/chromium/src/+/3459515).

Bug: 1238858
Change-Id: Iccaf14f075f90baaefa7495fc77037700a1f7d7f
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/3894837
Reviewed-by: Benoit Lize <lizeb@chromium.org>
Commit-Queue: Thiabaud Engelbrecht <thiabaud@google.com>
Cr-Commit-Position: refs/heads/main@{#1047804}
diff --git a/base/allocator/partition_alloc_features.cc b/base/allocator/partition_alloc_features.cc
index 16491687..f6fe625 100644
--- a/base/allocator/partition_alloc_features.cc
+++ b/base/allocator/partition_alloc_features.cc
@@ -113,11 +113,21 @@
 const base::FeatureParam<bool> kBackupRefPtrAsanEnableInstantiationCheckParam{
     &kPartitionAllocBackupRefPtr, "asan-enable-instantiation-check", true};
 
-// If enabled, switches the bucket distribution to an alternate one. The
-// alternate distribution must have buckets that are a subset of the default
-// one.
-const Feature kPartitionAllocUseAlternateDistribution{
+// If enabled, switches the bucket distribution to an alternate one. Only one of
+// these features may b e enabled at a time.
+const BASE_EXPORT Feature kPartitionAllocUseAlternateDistribution{
     "PartitionAllocUseAlternateDistribution", FEATURE_DISABLED_BY_DEFAULT};
+const base::FeatureParam<AlternateBucketDistributionMode>::Option
+    kPartitionAllocAlternateDistributionOption[] = {
+        {AlternateBucketDistributionMode::kDefault, "default"},
+        {AlternateBucketDistributionMode::kCoarser, "coarser"},
+        {AlternateBucketDistributionMode::kDenser, "denser"},
+};
+const base::FeatureParam<AlternateBucketDistributionMode>
+    kPartitionAllocAlternateBucketDistributionParam{
+        &kPartitionAllocUseAlternateDistribution, "mode",
+        AlternateBucketDistributionMode::kCoarser,
+        &kPartitionAllocAlternateDistributionOption};
 
 // If enabled, switches PCScan scheduling to a mutator-aware scheduler. Does not
 // affect whether PCScan is enabled itself.
diff --git a/base/allocator/partition_alloc_features.h b/base/allocator/partition_alloc_features.h
index 04328aa..78415636 100644
--- a/base/allocator/partition_alloc_features.h
+++ b/base/allocator/partition_alloc_features.h
@@ -81,6 +81,12 @@
   kDisabledButSplitPartitions3Way,
 };
 
+enum class AlternateBucketDistributionMode : uint8_t {
+  kDefault,
+  kCoarser,
+  kDenser,
+};
+
 extern const BASE_EXPORT Feature kPartitionAllocBackupRefPtr;
 extern const BASE_EXPORT base::FeatureParam<BackupRefPtrEnabledProcesses>
     kBackupRefPtrEnabledProcessesParam;
@@ -92,14 +98,16 @@
     kBackupRefPtrAsanEnableExtractionCheckParam;
 extern const BASE_EXPORT base::FeatureParam<bool>
     kBackupRefPtrAsanEnableInstantiationCheckParam;
+extern const BASE_EXPORT base::FeatureParam<AlternateBucketDistributionMode>
+    kPartitionAllocAlternateBucketDistributionParam;
 
 extern const BASE_EXPORT Feature kPartitionAllocPCScanMUAwareScheduler;
 extern const BASE_EXPORT Feature kPartitionAllocPCScanStackScanning;
 extern const BASE_EXPORT Feature kPartitionAllocDCScan;
 extern const BASE_EXPORT Feature kPartitionAllocPCScanImmediateFreeing;
 extern const BASE_EXPORT Feature kPartitionAllocPCScanEagerClearing;
-extern const BASE_EXPORT Feature kPartitionAllocUseAlternateDistribution;
 extern const BASE_EXPORT Feature kPartitionAllocSortActiveSlotSpans;
+extern const BASE_EXPORT Feature kPartitionAllocUseAlternateDistribution;
 
 }  // namespace features
 }  // namespace base
diff --git a/base/allocator/partition_allocator/gwp_asan_support.cc b/base/allocator/partition_allocator/gwp_asan_support.cc
index 3af4a45bc..39a0d12 100644
--- a/base/allocator/partition_allocator/gwp_asan_support.cc
+++ b/base/allocator/partition_allocator/gwp_asan_support.cc
@@ -38,7 +38,7 @@
   const size_t kSlotSize = 2 * internal::SystemPageSize();
   uint16_t bucket_index =
       PartitionRoot<internal::ThreadSafe>::SizeToBucketIndex(
-          kSlotSize, root->flags.with_denser_bucket_distribution);
+          kSlotSize, root->GetBucketDistribution());
   auto* bucket = root->buckets + bucket_index;
 
   const size_t kSuperPagePayloadStartOffset =
diff --git a/base/allocator/partition_allocator/partition_alloc.cc b/base/allocator/partition_allocator/partition_alloc.cc
index 732e1d2..de4f4f9 100644
--- a/base/allocator/partition_allocator/partition_alloc.cc
+++ b/base/allocator/partition_allocator/partition_alloc.cc
@@ -69,7 +69,7 @@
   // Check that some of our zanier calculations worked out as expected.
   static_assert(internal::kSmallestBucket == internal::kAlignment,
                 "generic smallest bucket");
-  static_assert(internal::kMaxBucketed == 917504, "generic max bucketed");
+  static_assert(internal::kMaxBucketed == 983040, "generic max bucketed");
   STATIC_ASSERT_OR_PA_CHECK(
       internal::MaxSystemPagesPerRegularSlotSpan() <= 16,
       "System pages per slot span must be no greater than 16.");
diff --git a/base/allocator/partition_allocator/partition_alloc_constants.h b/base/allocator/partition_allocator/partition_alloc_constants.h
index e5c0d79..cbd5270 100644
--- a/base/allocator/partition_allocator/partition_alloc_constants.h
+++ b/base/allocator/partition_allocator/partition_alloc_constants.h
@@ -370,8 +370,10 @@
 constexpr size_t kMaxBucketedOrder = 20;
 constexpr size_t kNumBucketedOrders =
     (kMaxBucketedOrder - kMinBucketedOrder) + 1;
-// 4 buckets per order (for the higher orders).
-constexpr size_t kNumBucketsPerOrderBits = 2;
+// 8 buckets per order (for the higher orders).
+// Note: this is not what is used by default, but the maximum amount of buckets
+// per order. By default, only 4 are used.
+constexpr size_t kNumBucketsPerOrderBits = 3;
 constexpr size_t kNumBucketsPerOrder = 1 << kNumBucketsPerOrderBits;
 constexpr size_t kNumBuckets = kNumBucketedOrders * kNumBucketsPerOrder;
 constexpr size_t kSmallestBucket = 1 << (kMinBucketedOrder - 1);
diff --git a/base/allocator/partition_allocator/partition_alloc_unittest.cc b/base/allocator/partition_allocator/partition_alloc_unittest.cc
index 8f401430..310b0c5 100644
--- a/base/allocator/partition_allocator/partition_alloc_unittest.cc
+++ b/base/allocator/partition_allocator/partition_alloc_unittest.cc
@@ -206,6 +206,7 @@
 // For ease of reading, the tests are placed into the latter namespace.
 namespace partition_alloc::internal {
 
+using BucketDistribution = ThreadSafePartitionRoot::BucketDistribution;
 using SlotSpan = SlotSpanMetadata<ThreadSafe>;
 
 const size_t kTestAllocSize = 16;
@@ -222,6 +223,21 @@
 
 const char* type_name = nullptr;
 
+void SetDistributionForPartitionRoot(ThreadSafePartitionRoot* root,
+                                     BucketDistribution distribution) {
+  switch (distribution) {
+    case BucketDistribution::kDefault:
+      root->SwitchToDefaultBucketDistribution();
+      break;
+    case BucketDistribution::kCoarser:
+      root->ResetBucketDistributionForTesting();
+      break;
+    case BucketDistribution::kDenser:
+      root->SwitchToDenserBucketDistribution();
+      break;
+  }
+}
+
 class ScopedPageAllocation {
  public:
   ScopedPageAllocation(PartitionAllocator<internal::ThreadSafe>& allocator,
@@ -249,7 +265,7 @@
   char* ptr_;
 };
 
-class PartitionAllocTest : public testing::TestWithParam<bool> {
+class PartitionAllocTest : public testing::TestWithParam<BucketDistribution> {
  protected:
   PartitionAllocTest() = default;
 
@@ -294,16 +310,19 @@
     allocator.root()->UncapEmptySlotSpanMemoryForTesting();
     aligned_allocator.root()->UncapEmptySlotSpanMemoryForTesting();
 
-    if (GetParam())
-      allocator.root()->SwitchToDenserBucketDistribution();
-    else
-      allocator.root()->ResetBucketDistributionForTesting();
+    SetDistributionForPartitionRoot(allocator.root(), GetParam());
+    SetDistributionForPartitionRoot(aligned_allocator.root(), GetParam());
   }
 
   size_t SizeToIndex(size_t size) {
-    const bool with_denser_bucket_distribution = GetParam();
+    const auto distribution_to_use = GetParam();
     return PartitionRoot<internal::ThreadSafe>::SizeToBucketIndex(
-        size, with_denser_bucket_distribution);
+        size, distribution_to_use);
+  }
+
+  size_t SizeToBucketSize(size_t size) {
+    const auto index = SizeToIndex(size);
+    return allocator.root()->buckets[index].slot_size;
   }
 
   void TearDown() override {
@@ -471,7 +490,9 @@
 
 INSTANTIATE_TEST_SUITE_P(AlternateBucketDistribution,
                          PartitionAllocDeathTest,
-                         testing::Values(false, true));
+                         testing::Values(BucketDistribution::kDefault,
+                                         BucketDistribution::kCoarser,
+                                         BucketDistribution::kDenser));
 
 #endif
 
@@ -557,7 +578,9 @@
 
 INSTANTIATE_TEST_SUITE_P(AlternateBucketDistribution,
                          PartitionAllocTest,
-                         testing::Values(false, true));
+                         testing::Values(BucketDistribution::kDefault,
+                                         BucketDistribution::kCoarser,
+                                         BucketDistribution::kDenser));
 
 // Check that the most basic of allocate / free pairs work.
 TEST_P(PartitionAllocTest, Basic) {
@@ -1005,11 +1028,10 @@
   }
 
   {
+    // Single-slot slot span size.
     const size_t size =
-        (((PartitionPageSize() * kMaxPartitionPagesPerRegularSlotSpan) -
-          SystemPageSize()) /
-         2) -
-        kExtraAllocSize;
+        PartitionPageSize() * kMaxPartitionPagesPerRegularSlotSpan + 1;
+
     void* ptr = allocator.root()->Alloc(size, type_name);
     EXPECT_TRUE(ptr);
     memset(ptr, 'A', size);
@@ -2553,8 +2575,7 @@
                                   &dumper);
       EXPECT_TRUE(dumper.IsMemoryAllocationRecorded());
 
-      size_t slot_size =
-          requested_size + (requested_size / kNumBucketsPerOrder);
+      size_t slot_size = SizeToBucketSize(requested_size + 1);
       const PartitionBucketMemoryStats* stats =
           dumper.GetBucketStats(slot_size);
       ASSERT_TRUE(stats);
@@ -2565,7 +2586,9 @@
       EXPECT_EQ(1u, stats->active_count);
       EXPECT_EQ(slot_size, stats->resident_bytes);
       EXPECT_EQ(0u, stats->decommittable_bytes);
-      EXPECT_EQ(3 * SystemPageSize(), stats->discardable_bytes);
+      EXPECT_EQ((slot_size - (requested_size + 1)) / SystemPageSize() *
+                    SystemPageSize(),
+                stats->discardable_bytes);
       EXPECT_EQ(1u, stats->num_full_slot_spans);
       EXPECT_EQ(0u, stats->num_active_slot_spans);
       EXPECT_EQ(0u, stats->num_empty_slot_spans);
@@ -2580,8 +2603,7 @@
                                   &dumper);
       EXPECT_FALSE(dumper.IsMemoryAllocationRecorded());
 
-      size_t slot_size =
-          requested_size + (requested_size / kNumBucketsPerOrder);
+      size_t slot_size = SizeToBucketSize(requested_size + 1);
       const PartitionBucketMemoryStats* stats =
           dumper.GetBucketStats(slot_size);
       EXPECT_TRUE(stats);
@@ -2609,7 +2631,7 @@
       EXPECT_TRUE(dumper.IsMemoryAllocationRecorded());
 
       size_t slot_size =
-          requested_size + (requested_size / kNumBucketsPerOrder);
+          SizeToBucketSize(requested_size + SystemPageSize() + 1);
       const PartitionBucketMemoryStats* stats =
           dumper.GetBucketStats(slot_size);
       EXPECT_TRUE(stats);
@@ -2621,7 +2643,9 @@
       EXPECT_EQ(1u, stats->active_count);
       EXPECT_EQ(slot_size, stats->resident_bytes);
       EXPECT_EQ(0u, stats->decommittable_bytes);
-      EXPECT_EQ(2 * SystemPageSize(), stats->discardable_bytes);
+      EXPECT_EQ((slot_size - (requested_size + SystemPageSize() + 1)) /
+                    SystemPageSize() * SystemPageSize(),
+                stats->discardable_bytes);
       EXPECT_EQ(1u, stats->num_full_slot_spans);
       EXPECT_EQ(0u, stats->num_active_slot_spans);
       EXPECT_EQ(0u, stats->num_empty_slot_spans);
@@ -3225,7 +3249,7 @@
   // Need sizes big enough to be direct mapped and a delta small enough to
   // allow re-use of the slot span when cookied. These numbers fall out of the
   // test case in the indicated bug.
-  size_t kInitialSize = 983040;
+  size_t kInitialSize = 983050;
   size_t kDesiredSize = 983100;
   ASSERT_GT(kInitialSize, kMaxBucketed);
   ASSERT_GT(kDesiredSize, kMaxBucketed);
@@ -3528,6 +3552,17 @@
 
   // Single-slot slot spans...
   size_t big_size = kMaxBucketed - SystemPageSize();
+  // When the system page size is larger than 4KiB, we don't necessarily have
+  // enough space in the superpage to store two of the largest bucketed
+  // allocations, particularly when we reserve extra space for e.g. bitmaps. In
+  // this case, use a smaller size.
+  //
+  // TODO(lizeb): Fix it, perhaps by lowering the maximum order for bucketed
+  // allocations.
+  if (SystemPageSize() > (1 << 12)) {
+    big_size -= 4 * SystemPageSize();
+  }
+
   ASSERT_GT(big_size, MaxRegularSlotSpanSize());
   ASSERT_LE(big_size, kMaxBucketed);
   bucket_index = SizeToIndex(big_size - kExtraAllocSize);
@@ -4224,6 +4259,7 @@
       PartitionOptions::BackupRefPtrZapping::kDisabled,
       PartitionOptions::UseConfigurablePool::kNo,
   });
+  SetDistributionForPartitionRoot(new_root, GetParam());
 
   // Realloc from |allocator.root()| into |new_root|.
   void* ptr2 = new_root->ReallocWithFlags(AllocFlags::kReturnNull, ptr,
@@ -4428,6 +4464,7 @@
         PartitionOptions::UseConfigurablePool::kIfAvailable,
     });
     root->UncapEmptySlotSpanMemoryForTesting();
+    SetDistributionForPartitionRoot(root, GetParam());
 
     const size_t count = 250;
     std::vector<void*> allocations(count, nullptr);
@@ -4464,6 +4501,7 @@
       PartitionOptions::BackupRefPtrZapping::kDisabled,
       PartitionOptions::UseConfigurablePool::kNo,
   });
+  SetDistributionForPartitionRoot(&root, GetParam());
 
   // Allocate some memory, don't free it to keep committed memory.
   std::vector<void*> allocated_memory;
@@ -4522,6 +4560,7 @@
       PartitionOptions::UseConfigurablePool::kIfAvailable,
   });
   root.UncapEmptySlotSpanMemoryForTesting();
+  SetDistributionForPartitionRoot(&root, GetParam());
 
   std::vector<void*> single_slot_allocated_memory;
   constexpr size_t single_slot_count = kDefaultEmptySlotSpanRingSize + 10;
@@ -4850,7 +4889,7 @@
 
 TEST_P(PartitionAllocTest, SmallSlotSpanWaste) {
   for (PartitionRoot<ThreadSafe>::Bucket& bucket : allocator.root()->buckets) {
-    size_t slot_size = bucket.slot_size;
+    const size_t slot_size = bucket.slot_size;
     if (slot_size == kInvalidBucketSize)
       continue;
 
diff --git a/base/allocator/partition_allocator/partition_bucket.cc b/base/allocator/partition_allocator/partition_bucket.cc
index 4d55b3d0..2d760f4 100644
--- a/base/allocator/partition_allocator/partition_bucket.cc
+++ b/base/allocator/partition_allocator/partition_bucket.cc
@@ -555,10 +555,18 @@
 
 uint8_t ComputeSystemPagesPerSlotSpan(size_t slot_size,
                                       bool prefer_smaller_slot_spans) {
-  if (prefer_smaller_slot_spans)
-    return ComputeSystemPagesPerSlotSpanPreferSmall(slot_size);
-  else
-    return ComputeSystemPagesPerSlotSpanInternal(slot_size);
+  if (prefer_smaller_slot_spans) {
+    size_t system_page_count =
+        ComputeSystemPagesPerSlotSpanPreferSmall(slot_size);
+    size_t waste = (system_page_count * SystemPageSize()) % slot_size;
+    // In case the waste is too large (more than 5% of a page), don't try to use
+    // the "small" slot span formula. This happens when we have a lot of
+    // buckets, in some cases the formula doesn't find a nice, small size.
+    if (waste <= .05 * SystemPageSize())
+      return system_page_count;
+  }
+
+  return ComputeSystemPagesPerSlotSpanInternal(slot_size);
 }
 
 template <bool thread_safe>
diff --git a/base/allocator/partition_allocator/partition_bucket_lookup.h b/base/allocator/partition_allocator/partition_bucket_lookup.h
index ca81f36..034e76ea 100644
--- a/base/allocator/partition_allocator/partition_bucket_lookup.h
+++ b/base/allocator/partition_allocator/partition_bucket_lookup.h
@@ -106,6 +106,7 @@
  public:
   PA_ALWAYS_INLINE constexpr static uint16_t GetIndexForDenserBuckets(
       size_t size);
+  PA_ALWAYS_INLINE constexpr static uint16_t GetIndexFor8Buckets(size_t size);
   PA_ALWAYS_INLINE constexpr static uint16_t GetIndex(size_t size);
 
   constexpr BucketIndexLookup() {
@@ -221,6 +222,52 @@
   }
 }
 
+PA_ALWAYS_INLINE constexpr uint16_t RoundUpToOdd(uint16_t size) {
+  return (size % 2 == 0) + size;
+}
+
+// static
+PA_ALWAYS_INLINE constexpr uint16_t BucketIndexLookup::GetIndexFor8Buckets(
+    size_t size) {
+  // This forces the bucket table to be constant-initialized and immediately
+  // materialized in the binary.
+  constexpr BucketIndexLookup lookup{};
+  const size_t order =
+      kBitsPerSizeT -
+      static_cast<size_t>(base::bits::CountLeadingZeroBits(size));
+  // The order index is simply the next few bits after the most significant
+  // bit.
+  const size_t order_index =
+      (size >> kOrderIndexShift[order]) & (kNumBucketsPerOrder - 1);
+  // And if the remaining bits are non-zero we must bump the bucket up.
+  const size_t sub_order_index = size & kOrderSubIndexMask[order];
+  const uint16_t index =
+      lookup.bucket_index_lookup_[(order << kNumBucketsPerOrderBits) +
+                                  order_index + !!sub_order_index];
+  PA_DCHECK(index <= kNumBuckets);  // Last one is the sentinel bucket.
+  return index;
+}
+
+// static
+PA_ALWAYS_INLINE constexpr uint16_t BucketIndexLookup::GetIndexForDenserBuckets(
+    size_t size) {
+  const auto index = GetIndexFor8Buckets(size);
+  // Below the minimum size, 4 and 8 bucket distributions are the same, since we
+  // can't fit any more buckets per order; this is due to alignment
+  // requirements: each bucket must be a multiple of the alignment, which
+  // implies the difference between buckets must also be a multiple of the
+  // alignment. In smaller orders, this limits the number of buckets we can
+  // have per order. So, for these small order, we do not want to skip every
+  // second bucket.
+  //
+  // We also do not want to go about the index for the max bucketed size.
+  if (size > kAlignment * kNumBucketsPerOrder &&
+      index < GetIndexFor8Buckets(kMaxBucketed))
+    return RoundUpToOdd(index);
+  else
+    return index;
+}
+
 // static
 PA_ALWAYS_INLINE constexpr uint16_t BucketIndexLookup::GetIndex(size_t size) {
   // For any order 2^N, under the denser bucket distribution ("Distribution A"),
@@ -246,28 +293,6 @@
   return BucketIndexLookup::GetIndexForDenserBuckets(size);
 }
 
-// static
-PA_ALWAYS_INLINE constexpr uint16_t BucketIndexLookup::GetIndexForDenserBuckets(
-    size_t size) {
-  // This forces the bucket table to be constant-initialized and immediately
-  // materialized in the binary.
-  constexpr BucketIndexLookup lookup{};
-  const size_t order =
-      kBitsPerSizeT -
-      static_cast<size_t>(base::bits::CountLeadingZeroBits(size));
-  // The order index is simply the next few bits after the most significant
-  // bit.
-  const size_t order_index =
-      (size >> kOrderIndexShift[order]) & (kNumBucketsPerOrder - 1);
-  // And if the remaining bits are non-zero we must bump the bucket up.
-  const size_t sub_order_index = size & kOrderSubIndexMask[order];
-  const uint16_t index =
-      lookup.bucket_index_lookup_[(order << kNumBucketsPerOrderBits) +
-                                  order_index + !!sub_order_index];
-  PA_DCHECK(index <= kNumBuckets);  // Last one is the sentinel bucket.
-  return index;
-}
-
 }  // namespace partition_alloc::internal
 
 #endif  // BASE_ALLOCATOR_PARTITION_ALLOCATOR_PARTITION_BUCKET_LOOKUP_H_
diff --git a/base/allocator/partition_allocator/partition_root.h b/base/allocator/partition_allocator/partition_root.h
index 2d345cd4..f8a888f 100644
--- a/base/allocator/partition_allocator/partition_root.h
+++ b/base/allocator/partition_allocator/partition_root.h
@@ -247,6 +247,8 @@
     kEnabled,
   };
 
+  enum class BucketDistribution : uint8_t { kDefault, kCoarser, kDenser };
+
   // Flags accessed on fast paths.
   //
   // Careful! PartitionAlloc's performance is sensitive to its layout.  Please
@@ -259,8 +261,13 @@
     // Defines whether the root should be scanned.
     ScanMode scan_mode;
 
+    // It's important to default to the coarser distribution, otherwise a switch
+    // from dense -> coarse would leave some buckets with dirty memory forever,
+    // since no memory would be allocated from these, their freelist would
+    // typically not be empty, making these unreclaimable.
+    BucketDistribution bucket_distribution = BucketDistribution::kCoarser;
+
     bool with_thread_cache = false;
-    bool with_denser_bucket_distribution = false;
 
     bool allow_aligned_alloc;
     bool allow_cookie;
@@ -546,8 +553,12 @@
   static void DeleteForTesting(PartitionRoot* partition_root);
   void ResetBookkeepingForTesting();
 
+  PA_ALWAYS_INLINE BucketDistribution GetBucketDistribution() const {
+    return flags.bucket_distribution;
+  }
+
   static uint16_t SizeToBucketIndex(size_t size,
-                                    bool with_denser_bucket_distribution);
+                                    BucketDistribution bucket_distribution);
 
   PA_ALWAYS_INLINE void FreeInSlotSpan(uintptr_t slot_start,
                                        SlotSpan* slot_span)
@@ -571,15 +582,18 @@
   // more buckets, meaning any allocations we have done before the switch are
   // guaranteed to have a bucket under the new distribution when they are
   // eventually deallocated. We do not need synchronization here or below.
+  void SwitchToDefaultBucketDistribution() {
+    flags.bucket_distribution = BucketDistribution::kDefault;
+  }
   void SwitchToDenserBucketDistribution() {
-    flags.with_denser_bucket_distribution = true;
+    flags.bucket_distribution = BucketDistribution::kDenser;
   }
   // Switching back to the less dense bucket distribution is ok during tests.
   // At worst, we end up with deallocations that are sent to a bucket that we
   // cannot allocate from, which will not cause problems besides wasting
   // memory.
   void ResetBucketDistributionForTesting() {
-    flags.with_denser_bucket_distribution = false;
+    flags.bucket_distribution = BucketDistribution::kCoarser;
   }
 
   ThreadCache* thread_cache_for_testing() const {
@@ -1693,10 +1707,15 @@
 template <bool thread_safe>
 PA_ALWAYS_INLINE uint16_t PartitionRoot<thread_safe>::SizeToBucketIndex(
     size_t size,
-    bool with_denser_bucket_distribution) {
-  if (with_denser_bucket_distribution)
-    return internal::BucketIndexLookup::GetIndexForDenserBuckets(size);
-  return internal::BucketIndexLookup::GetIndex(size);
+    BucketDistribution bucket_distribution) {
+  switch (bucket_distribution) {
+    case BucketDistribution::kDefault:
+      return internal::BucketIndexLookup::GetIndexForDenserBuckets(size);
+    case BucketDistribution::kCoarser:
+      return internal::BucketIndexLookup::GetIndex(size);
+    case BucketDistribution::kDenser:
+      return internal::BucketIndexLookup::GetIndexFor8Buckets(size);
+  }
 }
 
 template <bool thread_safe>
@@ -1777,11 +1796,11 @@
   PA_CHECK(raw_size >= requested_size);  // check for overflows
 
   // We should only call |SizeToBucketIndex| at most once when allocating.
-  // Otherwise, we risk having |with_denser_bucket_distribution| changed
+  // Otherwise, we risk having |bucket_distribution| changed
   // underneath us (between calls to |SizeToBucketIndex| during the same call),
   // which would result in an inconsistent state.
   uint16_t bucket_index =
-      SizeToBucketIndex(raw_size, this->flags.with_denser_bucket_distribution);
+      SizeToBucketIndex(raw_size, this->GetBucketDistribution());
   size_t usable_size;
   bool is_already_zeroed = false;
   uintptr_t slot_start = 0;
@@ -2079,8 +2098,7 @@
 #else
   PA_DCHECK(PartitionRoot<thread_safe>::initialized);
   size = AdjustSizeForExtrasAdd(size);
-  auto& bucket =
-      bucket_at(SizeToBucketIndex(size, flags.with_denser_bucket_distribution));
+  auto& bucket = bucket_at(SizeToBucketIndex(size, GetBucketDistribution()));
   PA_DCHECK(!bucket.slot_size || bucket.slot_size >= size);
   PA_DCHECK(!(bucket.slot_size % internal::kSmallestBucket));
 
diff --git a/base/allocator/partition_allocator/shim/allocator_shim.h b/base/allocator/partition_allocator/shim/allocator_shim.h
index bf0fe634..1b1b6715 100644
--- a/base/allocator/partition_allocator/shim/allocator_shim.h
+++ b/base/allocator/partition_allocator/shim/allocator_shim.h
@@ -9,6 +9,7 @@
 #include <stdint.h>
 
 #include "base/allocator/buildflags.h"
+#include "base/allocator/partition_alloc_features.h"
 #include "base/allocator/partition_allocator/partition_alloc_config.h"
 #include "base/base_export.h"
 #include "base/types/strong_alias.h"
@@ -178,7 +179,7 @@
 using UseDedicatedAlignedPartition =
     base::StrongAlias<class UseDedicatedAlignedPartitionTag, bool>;
 using AlternateBucketDistribution =
-    base::StrongAlias<class AlternateBucketDistributionTag, bool>;
+    base::features::AlternateBucketDistributionMode;
 
 // If |thread_cache_on_non_quarantinable_partition| is specified, the
 // thread-cache will be enabled on the non-quarantinable partition. The
diff --git a/base/allocator/partition_allocator/shim/allocator_shim_default_dispatch_to_partition_alloc.cc b/base/allocator/partition_allocator/shim/allocator_shim_default_dispatch_to_partition_alloc.cc
index 1cceec7..f36804e 100644
--- a/base/allocator/partition_allocator/shim/allocator_shim_default_dispatch_to_partition_alloc.cc
+++ b/base/allocator/partition_allocator/shim/allocator_shim_default_dispatch_to_partition_alloc.cc
@@ -582,9 +582,18 @@
   auto* current_aligned_root = g_aligned_root.Get();
 
   if (!split_main_partition) {
-    if (!use_alternate_bucket_distribution) {
-      current_root->SwitchToDenserBucketDistribution();
-      current_aligned_root->SwitchToDenserBucketDistribution();
+    switch (use_alternate_bucket_distribution) {
+      case AlternateBucketDistribution::kDefault:
+        current_root->SwitchToDefaultBucketDistribution();
+        current_aligned_root->SwitchToDefaultBucketDistribution();
+        break;
+      case AlternateBucketDistribution::kCoarser:
+        // We are already using the coarse distribution when we create a root.
+        break;
+      case AlternateBucketDistribution::kDenser:
+        current_root->SwitchToDenserBucketDistribution();
+        current_aligned_root->SwitchToDenserBucketDistribution();
+        break;
     }
     PA_DCHECK(!enable_brp);
     PA_DCHECK(!use_dedicated_aligned_partition);
@@ -658,9 +667,18 @@
       partition_alloc::PurgeFlags::kDecommitEmptySlotSpans |
       partition_alloc::PurgeFlags::kDiscardUnusedSystemPages);
 
-  if (!use_alternate_bucket_distribution) {
-    g_root.Get()->SwitchToDenserBucketDistribution();
-    g_aligned_root.Get()->SwitchToDenserBucketDistribution();
+  switch (use_alternate_bucket_distribution) {
+    case AlternateBucketDistribution::kDefault:
+      g_root.Get()->SwitchToDefaultBucketDistribution();
+      g_aligned_root.Get()->SwitchToDefaultBucketDistribution();
+      break;
+    case AlternateBucketDistribution::kCoarser:
+      // We are already using the coarse distribution when we create a root.
+      break;
+    case AlternateBucketDistribution::kDenser:
+      g_root.Get()->SwitchToDenserBucketDistribution();
+      g_aligned_root.Get()->SwitchToDenserBucketDistribution();
+      break;
   }
 }
 
diff --git a/base/allocator/partition_allocator/starscan/pcscan_unittest.cc b/base/allocator/partition_allocator/starscan/pcscan_unittest.cc
index 859cc7bb8..785fa34 100644
--- a/base/allocator/partition_allocator/starscan/pcscan_unittest.cc
+++ b/base/allocator/partition_allocator/starscan/pcscan_unittest.cc
@@ -130,8 +130,8 @@
   PA_CHECK(0u == root.get_total_size_of_committed_pages());
 
   const size_t raw_size = root.AdjustSizeForExtrasAdd(object_size);
-  const size_t bucket_index = root.SizeToBucketIndex(
-      raw_size, root.flags.with_denser_bucket_distribution);
+  const size_t bucket_index =
+      root.SizeToBucketIndex(raw_size, root.GetBucketDistribution());
   ThreadSafePartitionRoot::Bucket& bucket = root.buckets[bucket_index];
   const size_t num_slots = (bucket.get_bytes_per_span()) / bucket.slot_size;
 
diff --git a/base/allocator/partition_allocator/thread_cache.cc b/base/allocator/partition_allocator/thread_cache.cc
index 7a68beef..0805d481 100644
--- a/base/allocator/partition_allocator/thread_cache.cc
+++ b/base/allocator/partition_allocator/thread_cache.cc
@@ -424,7 +424,9 @@
   if (size > ThreadCache::kLargeSizeThreshold)
     size = ThreadCache::kLargeSizeThreshold;
   largest_active_bucket_index_ =
-      PartitionRoot<internal::ThreadSafe>::SizeToBucketIndex(size, false);
+      PartitionRoot<internal::ThreadSafe>::SizeToBucketIndex(
+          size,
+          PartitionRoot<internal::ThreadSafe>::BucketDistribution::kCoarser);
   PA_CHECK(largest_active_bucket_index_ < kBucketCount);
   ThreadCacheRegistry::Instance().SetLargestActiveBucketIndex(
       largest_active_bucket_index_);
@@ -447,9 +449,9 @@
   size_t usable_size;
   bool already_zeroed;
 
-  auto* bucket = root->buckets +
-                 PartitionRoot<internal::ThreadSafe>::SizeToBucketIndex(
-                     raw_size, root->flags.with_denser_bucket_distribution);
+  auto* bucket =
+      root->buckets + PartitionRoot<internal::ThreadSafe>::SizeToBucketIndex(
+                          raw_size, root->GetBucketDistribution());
   uintptr_t buffer = root->RawAlloc(bucket, AllocFlags::kZeroFill, raw_size,
                                     internal::PartitionPageSize(), &usable_size,
                                     &already_zeroed);
diff --git a/base/allocator/partition_allocator/thread_cache_unittest.cc b/base/allocator/partition_allocator/thread_cache_unittest.cc
index 68ed4d40..24d24d9 100644
--- a/base/allocator/partition_allocator/thread_cache_unittest.cc
+++ b/base/allocator/partition_allocator/thread_cache_unittest.cc
@@ -29,6 +29,7 @@
 
 namespace partition_alloc {
 
+using BucketDistribution = ThreadSafePartitionRoot::BucketDistribution;
 namespace {
 
 constexpr size_t kSmallSize = 12;
@@ -84,7 +85,9 @@
 
 }  // namespace
 
-class PartitionAllocThreadCacheTest : public ::testing::TestWithParam<bool> {
+class PartitionAllocThreadCacheTest
+    : public ::testing::TestWithParam<
+          PartitionRoot<internal::ThreadSafe>::BucketDistribution> {
  public:
   PartitionAllocThreadCacheTest() : root_(CreatePartitionRoot()) {}
 
@@ -101,10 +104,17 @@
 
  protected:
   void SetUp() override {
-    if (GetParam())
-      root_->SwitchToDenserBucketDistribution();
-    else
-      root_->ResetBucketDistributionForTesting();
+    switch (GetParam()) {
+      case BucketDistribution::kDefault:
+        root_->SwitchToDefaultBucketDistribution();
+        break;
+      case BucketDistribution::kCoarser:
+        root_->ResetBucketDistributionForTesting();
+        break;
+      case BucketDistribution::kDenser:
+        root_->SwitchToDenserBucketDistribution();
+        break;
+    }
 
 #if defined(PA_HAS_64_BITS_POINTERS)
     // Another test can uninitialize the pools, so make sure they are
@@ -141,8 +151,9 @@
   // Returns the size of the smallest bucket fitting an allocation of
   // |sizeof(ThreadCache)| bytes.
   size_t GetBucketSizeForThreadCache() {
-    size_t tc_bucket_index =
-        root_->SizeToBucketIndex(sizeof(ThreadCache), false);
+    size_t tc_bucket_index = root_->SizeToBucketIndex(
+        sizeof(ThreadCache),
+        ThreadSafePartitionRoot::BucketDistribution::kCoarser);
     auto* tc_bucket = &root_->buckets[tc_bucket_index];
     return tc_bucket->slot_size;
   }
@@ -186,7 +197,9 @@
 
 INSTANTIATE_TEST_SUITE_P(AlternateBucketDistribution,
                          PartitionAllocThreadCacheTest,
-                         ::testing::Values(false, true));
+                         ::testing::Values(BucketDistribution::kDefault,
+                                           BucketDistribution::kCoarser,
+                                           BucketDistribution::kDenser));
 
 TEST_P(PartitionAllocThreadCacheTest, Simple) {
   // There is a cache.
@@ -355,11 +368,11 @@
 
 size_t FillThreadCacheAndReturnIndex(ThreadSafePartitionRoot* root,
                                      size_t size,
-                                     bool with_denser_bucket_distribution,
+                                     BucketDistribution bucket_distribution,
                                      size_t count = 1) {
   uint16_t bucket_index =
       PartitionRoot<internal::ThreadSafe>::SizeToBucketIndex(
-          size, with_denser_bucket_distribution);
+          size, bucket_distribution);
   std::vector<void*> allocated_data;
 
   for (size_t i = 0; i < count; ++i) {
@@ -382,15 +395,14 @@
  public:
   ThreadDelegateForMultipleThreadCaches(ThreadCache* parent_thread_cache,
                                         ThreadSafePartitionRoot* root,
-                                        bool with_denser_bucket_distribution)
+                                        BucketDistribution bucket_distribution)
       : parent_thread_tcache_(parent_thread_cache),
         root_(root),
-        with_denser_bucket_distribution_(with_denser_bucket_distribution) {}
+        bucket_distribution_(bucket_distribution) {}
 
   void ThreadMain() override {
     EXPECT_FALSE(root_->thread_cache_for_testing());  // No allocations yet.
-    FillThreadCacheAndReturnIndex(root_, kMediumSize,
-                                  with_denser_bucket_distribution_);
+    FillThreadCacheAndReturnIndex(root_, kMediumSize, bucket_distribution_);
     auto* tcache = root_->thread_cache_for_testing();
     EXPECT_TRUE(tcache);
 
@@ -400,7 +412,7 @@
  private:
   ThreadCache* parent_thread_tcache_ = nullptr;
   ThreadSafePartitionRoot* root_ = nullptr;
-  bool with_denser_bucket_distribution_;
+  PartitionRoot<internal::ThreadSafe>::BucketDistribution bucket_distribution_;
 };
 
 }  // namespace
@@ -479,15 +491,14 @@
  public:
   ThreadDelegateForThreadCacheRegistry(ThreadCache* parent_thread_cache,
                                        ThreadSafePartitionRoot* root,
-                                       bool with_denser_bucket_distribution)
+                                       BucketDistribution bucket_distribution)
       : parent_thread_tcache_(parent_thread_cache),
         root_(root),
-        with_denser_bucket_distribution_(with_denser_bucket_distribution) {}
+        bucket_distribution_(bucket_distribution) {}
 
   void ThreadMain() override {
     EXPECT_FALSE(root_->thread_cache_for_testing());  // No allocations yet.
-    FillThreadCacheAndReturnIndex(root_, kSmallSize,
-                                  with_denser_bucket_distribution_);
+    FillThreadCacheAndReturnIndex(root_, kSmallSize, bucket_distribution_);
     auto* tcache = root_->thread_cache_for_testing();
     EXPECT_TRUE(tcache);
 
@@ -499,7 +510,7 @@
  private:
   ThreadCache* parent_thread_tcache_ = nullptr;
   ThreadSafePartitionRoot* root_ = nullptr;
-  bool with_denser_bucket_distribution_;
+  BucketDistribution bucket_distribution_;
 };
 
 }  // namespace
@@ -584,15 +595,15 @@
   ThreadDelegateForMultipleThreadCachesAccounting(
       ThreadSafePartitionRoot* root,
       int alloc_ount,
-      bool with_denser_bucket_distribution)
+      BucketDistribution bucket_distribution)
       : root_(root),
         alloc_count_(alloc_count),
-        with_denser_bucket_distribution_(with_denser_bucket_distribution) {}
+        bucket_distribution_(bucket_distribution) {}
 
   void ThreadMain() override {
     EXPECT_FALSE(root_->thread_cache_for_testing());  // No allocations yet.
-    size_t bucket_index = FillThreadCacheAndReturnIndex(
-        root_, kMediumSize, with_denser_bucket_distribution_);
+    size_t bucket_index =
+        FillThreadCacheAndReturnIndex(root_, kMediumSize, bucket_distribution_);
 
     ThreadCacheStats stats;
     ThreadCacheRegistry::Instance().DumpStats(false, &stats);
@@ -610,7 +621,7 @@
  private:
  private:
   ThreadSafePartitionRoot* root_ = nullptr;
-  bool with_denser_bucket_distribution_;
+  BucketDistribution bucket_distribution_;
   const int alloc_count_;
 };
 
@@ -648,17 +659,16 @@
                             std::atomic<bool>& other_thread_started,
                             std::atomic<bool>& purge_called,
                             int bucket_index,
-                            bool with_denser_bucket_distribution)
+                            BucketDistribution bucket_distribution)
       : root_(root),
         other_thread_tcache_(other_thread_tcache),
         other_thread_started_(other_thread_started),
         purge_called_(purge_called),
         bucket_index_(bucket_index),
-        with_denser_bucket_distribution_(with_denser_bucket_distribution) {}
+        bucket_distribution_(bucket_distribution) {}
 
   void ThreadMain() override PA_NO_THREAD_SAFETY_ANALYSIS {
-    FillThreadCacheAndReturnIndex(root_, kSmallSize,
-                                  with_denser_bucket_distribution_);
+    FillThreadCacheAndReturnIndex(root_, kSmallSize, bucket_distribution_);
     other_thread_tcache_ = root_->thread_cache_for_testing();
 
     other_thread_started_.store(true, std::memory_order_release);
@@ -684,7 +694,7 @@
   std::atomic<bool>& other_thread_started_;
   std::atomic<bool>& purge_called_;
   const int bucket_index_;
-  bool with_denser_bucket_distribution_;
+  BucketDistribution bucket_distribution_;
 };
 
 }  // namespace
@@ -776,13 +786,13 @@
 
 void FillThreadCacheWithMemory(ThreadSafePartitionRoot* root,
                                size_t target_cached_memory,
-                               bool with_denser_bucket_distribution) {
+                               BucketDistribution bucket_distribution) {
   for (int batch : {1, 2, 4, 8, 16}) {
     for (size_t allocation_size = 1;
          allocation_size <= ThreadCache::kLargeSizeThreshold;
          allocation_size++) {
-      FillThreadCacheAndReturnIndex(root, allocation_size, batch,
-                                    with_denser_bucket_distribution);
+      FillThreadCacheAndReturnIndex(root, allocation_size, bucket_distribution,
+                                    batch);
 
       if (ThreadCache::Get()->CachedMemory() >= target_cached_memory)
         return;
@@ -799,16 +809,16 @@
       ThreadSafePartitionRoot* root,
       std::atomic<int>& allocations_done,
       std::atomic<bool>& can_finish,
-      bool with_denser_bucket_distribution)
+      BucketDistribution bucket_distribution)
       : root_(root),
         allocations_done_(allocations_done),
         can_finish_(can_finish),
-        with_denser_bucket_distribution_(with_denser_bucket_distribution) {}
+        bucket_distribution_(bucket_distribution) {}
 
   void ThreadMain() override {
     FillThreadCacheWithMemory(
         root_, 5 * ThreadCacheRegistry::kMinCachedMemoryForPurging,
-        with_denser_bucket_distribution_);
+        bucket_distribution_);
     allocations_done_.fetch_add(1, std::memory_order_release);
 
     // This thread needs to be alive when the next periodic purge task runs.
@@ -820,7 +830,7 @@
   ThreadSafePartitionRoot* root_ = nullptr;
   std::atomic<int>& allocations_done_;
   std::atomic<bool>& can_finish_;
-  bool with_denser_bucket_distribution_;
+  BucketDistribution bucket_distribution_;
 };
 
 }  // namespace
@@ -966,16 +976,15 @@
       std::atomic<bool>& other_thread_started,
       std::atomic<bool>& threshold_changed,
       int bucket_index,
-      bool with_denser_bucket_distribution)
+      BucketDistribution bucket_distribution)
       : root_(root),
         other_thread_started_(other_thread_started),
         threshold_changed_(threshold_changed),
         bucket_index_(bucket_index),
-        with_denser_bucket_distribution_(with_denser_bucket_distribution) {}
+        bucket_distribution_(bucket_distribution) {}
 
   void ThreadMain() override {
-    FillThreadCacheAndReturnIndex(root_, kSmallSize,
-                                  with_denser_bucket_distribution_,
+    FillThreadCacheAndReturnIndex(root_, kSmallSize, bucket_distribution_,
                                   kDefaultCountForSmallBucket + 10);
     auto* this_thread_tcache = root_->thread_cache_for_testing();
     // More than the default since the multiplier has changed.
@@ -999,7 +1008,7 @@
   std::atomic<bool>& other_thread_started_;
   std::atomic<bool>& threshold_changed_;
   const int bucket_index_;
-  bool with_denser_bucket_distribution_;
+  PartitionRoot<internal::ThreadSafe>::BucketDistribution bucket_distribution_;
 };
 
 }  // namespace
@@ -1141,7 +1150,9 @@
 
   // The ThreadCache is allocated before we change buckets, so its size is
   // always based on the sparser distribution.
-  size_t tc_bucket_index = root_->SizeToBucketIndex(sizeof(ThreadCache), false);
+  size_t tc_bucket_index = root_->SizeToBucketIndex(
+      sizeof(ThreadCache),
+      ThreadSafePartitionRoot::BucketDistribution::kCoarser);
   auto* tc_bucket = &root_->buckets[tc_bucket_index];
   size_t expected_allocated_size =
       tc_bucket->slot_size;  // For the ThreadCache itself.
@@ -1248,11 +1259,12 @@
       size_t n = i * (4 + offset) / 4;
       EXPECT_EQ(BucketIndexLookup::GetIndex(n),
                 BucketIndexLookup::GetIndexForDenserBuckets(n));
-      EXPECT_EQ(BucketIndexLookup::GetIndex(n), expected_index++);
+      EXPECT_EQ(BucketIndexLookup::GetIndex(n), expected_index);
+      expected_index += 2;
     }
     // The last two buckets in the order are "rounded up" to the same bucket
     // as the next power of two.
-    expected_index += 2;
+    expected_index += 4;
     for (size_t offset = 2; offset < 4; offset++) {
       size_t n = i * (4 + offset) / 4;
       // These two are rounded up in the alternate distribution, so we expect
diff --git a/content/common/partition_alloc_support.cc b/content/common/partition_alloc_support.cc
index f559842..ab8244d0 100644
--- a/content/common/partition_alloc_support.cc
+++ b/content/common/partition_alloc_support.cc
@@ -313,8 +313,9 @@
       allocator_shim::SplitMainPartition(split_main_partition),
       allocator_shim::UseDedicatedAlignedPartition(
           use_dedicated_aligned_partition),
-      allocator_shim::AlternateBucketDistribution(base::FeatureList::IsEnabled(
-          base::features::kPartitionAllocUseAlternateDistribution)));
+      allocator_shim::AlternateBucketDistribution(
+          base::features::kPartitionAllocAlternateBucketDistributionParam
+              .Get()));
 #endif  // BUILDFLAG(USE_PARTITION_ALLOC_AS_MALLOC)
 
   // If BRP is not enabled, check if any of PCScan flags is enabled.