drop page heap lock when returning memory back to kernel

Fixes issue #754.
diff --git a/src/page_heap.cc b/src/page_heap.cc
index 44ad654..85849cd 100644
--- a/src/page_heap.cc
+++ b/src/page_heap.cc
@@ -241,12 +241,22 @@
   stats_.total_commit_bytes += (span->length << kPageShift);
 }
 
-bool PageHeap::DecommitSpan(Span* span) {
+bool PageHeap::TryDecommitWithoutLock(Span* span) {
   ++stats_.decommit_count;
+  ASSERT(span->location == Span::ON_NORMAL_FREELIST);
+  span->location = Span::IN_USE;
 
+  Static::pageheap_lock()->Unlock();
   bool rv = TCMalloc_SystemRelease(reinterpret_cast<void*>(span->start << kPageShift),
                                    static_cast<size_t>(span->length << kPageShift));
+  Static::pageheap_lock()->Lock();
+
+  // We're not really on any free list at this point, but lets
+  // indicate if we're returned or not.
+  span->location = Span::ON_NORMAL_FREELIST;
+
   if (rv) {
+    span->location = Span::ON_RETURNED_FREELIST;
     stats_.committed_bytes -= span->length << kPageShift;
     stats_.total_decommit_bytes += (span->length << kPageShift);
   }
@@ -320,15 +330,19 @@
   if (other == NULL) {
     return other;
   }
-  // if we're in aggressive decommit mode and span is decommitted,
-  // then we try to decommit adjacent span.
+
+  // If we're in aggressive decommit mode and span is decommitted,
+  // then we try to decommit adjacent span. We also remove from it's
+  // free list as part of that.
   if (aggressive_decommit_ && other->location == Span::ON_NORMAL_FREELIST
       && span->location == Span::ON_RETURNED_FREELIST) {
-    bool worked = DecommitSpan(other);
-    if (!worked) {
-      return NULL;
+    if (ReleaseSpan(other) != 0) {
+      return other;
     }
-  } else if (other->location != span->location) {
+    return NULL;
+  }
+
+  if (other->location != span->location) {
     return NULL;
   }
 
@@ -360,7 +374,7 @@
   const Length n = span->length;
 
   if (aggressive_decommit_ && span->location == Span::ON_NORMAL_FREELIST) {
-    if (DecommitSpan(span)) {
+    if (TryDecommitWithoutLock(span)) {
       span->location = Span::ON_RETURNED_FREELIST;
     }
   }
@@ -467,18 +481,35 @@
   }
 }
 
-Length PageHeap::ReleaseSpan(Span* s) {
-  ASSERT(s->location == Span::ON_NORMAL_FREELIST);
+Length PageHeap::ReleaseSpan(Span* span) {
+  // We're dropping very important and otherwise contended
+  // pageheap_lock around call to potentially very slow syscall to
+  // release pages. Those syscalls can be slow even with "advanced"
+  // things such as MADV_FREE and its better equivalents, because they
+  // have to walk actual page tables, and we sometimes deal with large
+  // spans, which sometimes takes lots of time to walk. Plus Linux
+  // grabs per-address space mmap_sem lock which could be extremely
+  // contended at times. So it is best if we avoid holding one
+  // contended lock while waiting for another.
+  //
+  // Note, we set span location to in-use, because our span could be found via
+  // pagemap in e.g. MergeIntoFreeList while we're not holding the lock. By
+  // marking it in-use we prevent this possibility. So span is removed from free
+  // list and marked "unmergable" and that guarantees safety during unlock-ful
+  // release.
+  ASSERT(span->location == Span::ON_NORMAL_FREELIST);
+  RemoveFromFreeList(span);
 
-  if (DecommitSpan(s)) {
-    RemoveFromFreeList(s);
-    const Length n = s->length;
-    s->location = Span::ON_RETURNED_FREELIST;
-    MergeIntoFreeList(s);  // Coalesces if possible.
-    return n;
+  Length n = span->length;
+  if (TryDecommitWithoutLock(span)) {
+    span->location = Span::ON_RETURNED_FREELIST;
+  } else {
+    n = 0; // Mark that we failed to return.
+    span->location = Span::ON_NORMAL_FREELIST;
   }
 
-  return 0;
+  MergeIntoFreeList(span);  // Coalesces if possible.
+  return n;
 }
 
 Length PageHeap::ReleaseAtLeastNPages(Length num_pages) {
diff --git a/src/page_heap.h b/src/page_heap.h
index bf50394..b234a7b 100644
--- a/src/page_heap.h
+++ b/src/page_heap.h
@@ -314,7 +314,7 @@
   void CommitSpan(Span* span);
 
   // Decommit the span.
-  bool DecommitSpan(Span* span);
+  bool TryDecommitWithoutLock(Span* span);
 
   // Prepends span to appropriate free list, and adjusts stats.
   void PrependToFreeList(Span* span);
@@ -326,12 +326,12 @@
   // IncrementalScavenge(n) is called whenever n pages are freed.
   void IncrementalScavenge(Length n);
 
-  // Attempts to decommit 's' and move it to the returned freelist.
+  // Attempts to decommit 'span' and move it to the returned freelist.
   //
   // Returns the length of the Span or zero if release failed.
   //
-  // REQUIRES: 's' must be on the NORMAL freelist.
-  Length ReleaseSpan(Span *s);
+  // REQUIRES: 'span' must be on the NORMAL freelist.
+  Length ReleaseSpan(Span *span);
 
   // Checks if we are allowed to take more memory from the system.
   // If limit is reached and allowRelease is true, tries to release
diff --git a/src/tests/page_heap_test.cc b/src/tests/page_heap_test.cc
index 3caacc0..b99a6ad 100644
--- a/src/tests/page_heap_test.cc
+++ b/src/tests/page_heap_test.cc
@@ -11,15 +11,19 @@
 
 #include <memory>
 
+#include "base/logging.h"
+#include "base/spinlock.h"
+#include "common.h"
 #include "page_heap.h"
 #include "system-alloc.h"
-#include "base/logging.h"
-#include "common.h"
+#include "static_vars.h"
 
 DECLARE_int64(tcmalloc_heap_limit_mb);
 
 namespace {
 
+using tcmalloc::Static;
+
 // The system will only release memory if the block size is equal or hight than
 // system page size.
 static bool HaveSystemRelease =
@@ -44,6 +48,7 @@
 
 static void TestPageHeap_Stats() {
   std::unique_ptr<tcmalloc::PageHeap> ph(new tcmalloc::PageHeap());
+  SpinLockHolder h(Static::pageheap_lock());
 
   // Empty page heap
   CheckStats(ph.get(), 0, 0, 0);
@@ -79,6 +84,8 @@
   // Make a separate PageHeap from the main test so the test can start without
   // any pages in the lists.
   std::unique_ptr<tcmalloc::PageHeap> ph(new tcmalloc::PageHeap());
+  SpinLockHolder h(Static::pageheap_lock());
+
   tcmalloc::Span *spans[kNumberMaxPagesSpans * 2];
   for (int i = 0; i < kNumberMaxPagesSpans; ++i) {
     spans[i] = ph->New(kMaxPages);
@@ -100,6 +107,7 @@
   AllocateAllPageTables();
 
   std::unique_ptr<tcmalloc::PageHeap> ph(new tcmalloc::PageHeap());
+  SpinLockHolder h(Static::pageheap_lock());
 
   CHECK_EQ(kMaxPages, 1 << (20 - kPageShift));
 
@@ -188,6 +196,8 @@
 }  // namespace
 
 int main(int argc, char **argv) {
+  Static::InitStaticVars();
+
   TestPageHeap_Stats();
   TestPageHeap_Limit();
   printf("PASS\n");