Add a central free list for kMaxPages-sized spans

Previously, the central free list with index '0' was always unused,
since freelist index 'i' tracked spans of length 'i' and there are no
spans of length 0. This meant that there was no freelist for spans of
length 'kMaxPages'. In the default configuration, this corresponds to
1MB, which is a relatively common allocation size in a lot of
applications.

This changes the free list indexing so that index 'i' tracks spans of
length 'i + 1', meaning that free list index 0 is now used and
freelist[kMaxPages - 1] tracks allocations of kMaxPages size (1MB by
default).

This also fixes the stats output to indicate '>128' for the large spans
stats rather than the incorrect '>255' which must have referred to a
historical value of kMaxPages.

No new tests are added since this code is covered by existing tests.
diff --git a/docs/pageheap.dot b/docs/pageheap.dot
index ac84b90..5e9aec8 100644
--- a/docs/pageheap.dot
+++ b/docs/pageheap.dot
@@ -3,7 +3,7 @@
 node [shape=box, width=0.3, height=0.3]
 nodesep=.05
 
-heap [shape=record, height=3, label="<f0>1 page|<f1>2 pages|<f2>3 pages|...|<f127>127 pages"]
+heap [shape=record, height=3, label="<f0>1 page|<f1>2 pages|<f2>3 pages|...|<f128>128 pages"]
 O0 [shape=record, label=""]
 O1 [shape=record, label=""]
 O2 [shape=record, label="{|}"]
@@ -20,6 +20,6 @@
 heap:f0 -> O0 -> O1 -> sep1
 heap:f1 -> O2 -> O3 -> sep2
 heap:f2 -> O4 -> O5 -> sep3
-heap:f127 -> O6 -> O7 -> sep4
+heap:f128 -> O6 -> O7 -> sep4
 
 }
diff --git a/docs/pageheap.gif b/docs/pageheap.gif
index 76b62e8..5cf00bd 100644
--- a/docs/pageheap.gif
+++ b/docs/pageheap.gif
Binary files differ
diff --git a/docs/tcmalloc.html b/docs/tcmalloc.html
index 62dae72..33b8cc5 100644
--- a/docs/tcmalloc.html
+++ b/docs/tcmalloc.html
@@ -201,7 +201,7 @@
 <p>A medium object size (256K &le; size &le; 1MB) is rounded up to a page
 size (8K) and is handled by a central page heap. The central page heap
 includes an array of 128 free lists.  The <code>k</code>th entry is a
-free list of runs that consist of <code>k</code> pages:</p>
+free list of runs that consist of <code>k + 1</code> pages:</p>
 <center><img src="pageheap.gif"></center>
 
 <p>An allocation for <code>k</code> pages is satisfied by looking in
diff --git a/src/common.h b/src/common.h
index 02a2a30..cb45315 100644
--- a/src/common.h
+++ b/src/common.h
@@ -87,8 +87,7 @@
 static const size_t kPageSize   = 1 << kPageShift;
 static const size_t kMaxSize    = 256 * 1024;
 static const size_t kAlignment  = 8;
-static const size_t kLargeSizeClass = 0;
-// For all span-lengths < kMaxPages we keep an exact-size list.
+// For all span-lengths <= kMaxPages we keep an exact-size list in PageHeap.
 static const size_t kMaxPages = 1 << (20 - kPageShift);
 
 // Default bound on the total amount of thread caches.
diff --git a/src/page_heap.cc b/src/page_heap.cc
index 9a8fb63..7dd5646 100644
--- a/src/page_heap.cc
+++ b/src/page_heap.cc
@@ -80,15 +80,15 @@
   ASSERT(n > 0);
 
   // Find first size >= n that has a non-empty list
-  for (Length s = n; s < kMaxPages; s++) {
-    Span* ll = &free_[s].normal;
+  for (Length s = n; s <= kMaxPages; s++) {
+    Span* ll = &free_[s - 1].normal;
     // If we're lucky, ll is non-empty, meaning it has a suitable span.
     if (!DLL_IsEmpty(ll)) {
       ASSERT(ll->next->location == Span::ON_NORMAL_FREELIST);
       return Carve(ll->next, n);
     }
     // Alternatively, maybe there's a usable returned span.
-    ll = &free_[s].returned;
+    ll = &free_[s - 1].returned;
     if (!DLL_IsEmpty(ll)) {
       // We did not call EnsureLimit before, to avoid releasing the span
       // that will be taken immediately back.
@@ -402,7 +402,7 @@
   else
     stats_.unmapped_bytes += (span->length << kPageShift);
 
-  if (span->length >= kMaxPages) {
+  if (span->length > kMaxPages) {
     SpanSet *set = &large_normal_;
     if (span->location == Span::ON_RETURNED_FREELIST)
       set = &large_returned_;
@@ -413,7 +413,7 @@
     return;
   }
 
-  SpanList* list = &free_[span->length];
+  SpanList* list = &free_[span->length - 1];
   if (span->location == Span::ON_NORMAL_FREELIST) {
     DLL_Prepend(&list->normal, span);
   } else {
@@ -428,7 +428,7 @@
   } else {
     stats_.unmapped_bytes -= (span->length << kPageShift);
   }
-  if (span->length >= kMaxPages) {
+  if (span->length > kMaxPages) {
     SpanSet *set = &large_normal_;
     if (span->location == Span::ON_RETURNED_FREELIST)
       set = &large_returned_;
@@ -562,9 +562,9 @@
 }
 
 void PageHeap::GetSmallSpanStats(SmallSpanStats* result) {
-  for (int s = 0; s < kMaxPages; s++) {
-    result->normal_length[s] = DLL_Length(&free_[s].normal);
-    result->returned_length[s] = DLL_Length(&free_[s].returned);
+  for (int i = 0; i < kMaxPages; i++) {
+    result->normal_length[i] = DLL_Length(&free_[i].normal);
+    result->returned_length[i] = DLL_Length(&free_[i].returned);
   }
 }
 
@@ -685,18 +685,16 @@
 }
 
 bool PageHeap::Check() {
-  ASSERT(free_[0].normal.next == &free_[0].normal);
-  ASSERT(free_[0].returned.next == &free_[0].returned);
   return true;
 }
 
 bool PageHeap::CheckExpensive() {
   bool result = Check();
-  CheckSet(&large_normal_, kMaxPages, Span::ON_NORMAL_FREELIST);
-  CheckSet(&large_returned_, kMaxPages, Span::ON_RETURNED_FREELIST);
-  for (Length s = 1; s < kMaxPages; s++) {
-    CheckList(&free_[s].normal, s, s, Span::ON_NORMAL_FREELIST);
-    CheckList(&free_[s].returned, s, s, Span::ON_RETURNED_FREELIST);
+  CheckSet(&large_normal_, kMaxPages + 1, Span::ON_NORMAL_FREELIST);
+  CheckSet(&large_returned_, kMaxPages + 1, Span::ON_RETURNED_FREELIST);
+  for (int s = 1; s <= kMaxPages; s++) {
+    CheckList(&free_[s - 1].normal, s, s, Span::ON_NORMAL_FREELIST);
+    CheckList(&free_[s - 1].returned, s, s, Span::ON_RETURNED_FREELIST);
   }
   return result;
 }
diff --git a/src/page_heap.h b/src/page_heap.h
index 34f18bb..bf50394 100644
--- a/src/page_heap.h
+++ b/src/page_heap.h
@@ -178,6 +178,8 @@
   struct SmallSpanStats {
     // For each free list of small spans, the length (in spans) of the
     // normal and returned free lists for that size.
+    //
+    // NOTE: index 'i' accounts the number of spans of length 'i + 1'.
     int64 normal_length[kMaxPages];
     int64 returned_length[kMaxPages];
   };
@@ -265,7 +267,7 @@
     Span        returned;
   };
 
-  // Sets of spans with length >= kMaxPages.
+  // Sets of spans with length > kMaxPages.
   //
   // Rather than using a linked list, we use sets here for efficient
   // best-fit search.
@@ -273,6 +275,8 @@
   SpanSet large_returned_;
 
   // Array mapping from span length to a doubly linked list of free spans
+  //
+  // NOTE: index 'i' stores spans of length 'i + 1'.
   SpanList free_[kMaxPages];
 
   // Statistics on system, free, and unmapped bytes
diff --git a/src/tcmalloc.cc b/src/tcmalloc.cc
index be0a6ee..1f22dfb 100644
--- a/src/tcmalloc.cc
+++ b/src/tcmalloc.cc
@@ -496,9 +496,9 @@
     out->printf("------------------------------------------------\n");
     uint64_t total_normal = 0;
     uint64_t total_returned = 0;
-    for (int s = 0; s < kMaxPages; s++) {
-      const int n_length = small.normal_length[s];
-      const int r_length = small.returned_length[s];
+    for (int s = 1; s <= kMaxPages; s++) {
+      const int n_length = small.normal_length[s - 1];
+      const int r_length = small.returned_length[s - 1];
       if (n_length + r_length > 0) {
         uint64_t n_pages = s * n_length;
         uint64_t r_pages = s * r_length;
@@ -517,8 +517,9 @@
 
     total_normal += large.normal_pages;
     total_returned += large.returned_pages;
-    out->printf(">255   large * %6u spans ~ %6.1f MiB; %6.1f MiB cum"
+    out->printf(">128   large * %6u spans ~ %6.1f MiB; %6.1f MiB cum"
                 "; unmapped: %6.1f MiB; %6.1f MiB cum\n",
+                kMaxPages,
                 static_cast<unsigned int>(large.spans),
                 PagesToMiB(large.normal_pages + large.returned_pages),
                 PagesToMiB(total_normal + total_returned),
@@ -990,17 +991,17 @@
     v->push_back(span_info);
 
     // small spans
-    for (int s = 1; s < kMaxPages; s++) {
+    for (int s = 1; s <= kMaxPages; s++) {
       MallocExtension::FreeListInfo i;
       i.max_object_size = (s << kPageShift);
       i.min_object_size = ((s - 1) << kPageShift);
 
       i.type = kPageHeapType;
-      i.total_bytes_free = (s << kPageShift) * small.normal_length[s];
+      i.total_bytes_free = (s << kPageShift) * small.normal_length[s - 1];
       v->push_back(i);
 
       i.type = kPageHeapUnmappedType;
-      i.total_bytes_free = (s << kPageShift) * small.returned_length[s];
+      i.total_bytes_free = (s << kPageShift) * small.returned_length[s - 1];
       v->push_back(i);
     }
   }