[base] Inline LockImpl::Lock() and LockImpl::Try().

LockImpl::Unlock() is inlined, but not LockImpl::Lock(). Separately,
Lock() calls Try() before locking, to increase uncontended
performance. This code is duplicated between POSIX and Windows. This
commit:
- Merges the two paths
- Inlines Lock() and Try(), partly for symmetry, and partly for
  performance

On a Linux Xeon "Haswell" workstation (E5-2690v4), this improves
uncontended acquire/release pairs by 8%. These numbers are collected
from a non-PGO build, and the impact on a PGO one is likely smaller.

This was assessed by running the performance test 50 times with:
$ out/Release-desktop/base_perftests \
    --gtest_filter="LockPerfTest.Simple" --gtest_repeat=50 \
    | grep RESULT | sed -e 's/.*= //;s/ .*//'

Results are summarized below (numbers are runs/s, higher is better):
trunk:   Mean = 4.534e+07  Standard Deviation = 6.234e+05
This CL: Mean = 4.900e+07  Standard Deviation = 6.681e+05

That is, this patch makes it 8.08% faster, and given the standard
deviation, the difference is likely significant.

Bug: 1061437
Change-Id: Ibfb3f1f07598c22f1c9446c02e16a883bf328d20
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2386738
Reviewed-by: Wez <wez@chromium.org>
Reviewed-by: Bruce Dawson <brucedawson@chromium.org>
Commit-Queue: Benoit L <lizeb@chromium.org>
Cr-Commit-Position: refs/heads/master@{#804963}
diff --git a/base/synchronization/lock_impl.h b/base/synchronization/lock_impl.h
index c79db40..cfe53db2 100644
--- a/base/synchronization/lock_impl.h
+++ b/base/synchronization/lock_impl.h
@@ -52,10 +52,10 @@
 
   // If the lock is not held, take it and return true.  If the lock is already
   // held by something else, immediately return false.
-  bool Try();
+  inline bool Try();
 
   // Take the lock, blocking until it is available if necessary.
-  void Lock();
+  inline void Lock();
 
   // Release the lock.  This must only be called by the lock's holder: after
   // a successful call to Try, or a call to Lock.
@@ -71,16 +71,47 @@
   static bool PriorityInheritanceAvailable();
 #endif
 
+  void LockInternalWithTracking();
   NativeHandle native_handle_;
 
   DISALLOW_COPY_AND_ASSIGN(LockImpl);
 };
 
+void LockImpl::Lock() {
+  // The ScopedLockAcquireActivity in LockInternalWithTracking() (not inlined
+  // here because of circular includes) is relatively expensive and so its
+  // actions can become significant due to the very large number of locks that
+  // tend to be used throughout the build. It is also not needed unless the lock
+  // is contended.
+  //
+  // To avoid this cost in the vast majority of the calls, simply "try" the lock
+  // first and only do the (tracked) blocking call if that fails. |Try()| is
+  // cheap on platforms with futex-type locks, as it doesn't call into the
+  // kernel.
+  if (LIKELY(Try()))
+    return;
+
+  LockInternalWithTracking();
+}
+
 #if defined(OS_WIN)
+bool LockImpl::Try() {
+  return !!::TryAcquireSRWLockExclusive(
+      reinterpret_cast<PSRWLOCK>(&native_handle_));
+}
+
 void LockImpl::Unlock() {
   ::ReleaseSRWLockExclusive(reinterpret_cast<PSRWLOCK>(&native_handle_));
 }
+
 #elif defined(OS_POSIX) || defined(OS_FUCHSIA)
+
+bool LockImpl::Try() {
+  int rv = pthread_mutex_trylock(&native_handle_);
+  DCHECK(rv == 0 || rv == EBUSY) << ". " << strerror(rv);
+  return rv == 0;
+}
+
 void LockImpl::Unlock() {
   int rv = pthread_mutex_unlock(&native_handle_);
   DCHECK_EQ(rv, 0) << ". " << strerror(rv);
diff --git a/base/synchronization/lock_impl_posix.cc b/base/synchronization/lock_impl_posix.cc
index 24dc1072..0793661d 100644
--- a/base/synchronization/lock_impl_posix.cc
+++ b/base/synchronization/lock_impl_posix.cc
@@ -80,24 +80,7 @@
   DCHECK_EQ(rv, 0) << ". " << SystemErrorCodeToString(rv);
 }
 
-bool LockImpl::Try() {
-  int rv = pthread_mutex_trylock(&native_handle_);
-  DCHECK(rv == 0 || rv == EBUSY) << ". " << SystemErrorCodeToString(rv);
-  return rv == 0;
-}
-
-void LockImpl::Lock() {
-  // The ScopedLockAcquireActivity below is relatively expensive and so its
-  // actions can become significant due to the very large number of locks that
-  // tend to be used throughout the build. It is also not needed unless the lock
-  // is contended.
-  //
-  // To avoid this cost in the vast majority of the calls, simply "try" the lock
-  // first and only do the (tracked) blocking call if that fails. |Try()| is
-  // cheap on platforms with futex(), as it doesn't call into the kernel.
-  if (Try())
-    return;
-
+void LockImpl::LockInternalWithTracking() {
   base::debug::ScopedLockAcquireActivity lock_activity(this);
   int rv = pthread_mutex_lock(&native_handle_);
   DCHECK_EQ(rv, 0) << ". " << SystemErrorCodeToString(rv);
diff --git a/base/synchronization/lock_impl_win.cc b/base/synchronization/lock_impl_win.cc
index 6a219d4..a040eb2 100644
--- a/base/synchronization/lock_impl_win.cc
+++ b/base/synchronization/lock_impl_win.cc
@@ -15,23 +15,7 @@
 
 LockImpl::~LockImpl() = default;
 
-bool LockImpl::Try() {
-  return !!::TryAcquireSRWLockExclusive(
-      reinterpret_cast<PSRWLOCK>(&native_handle_));
-}
-
-void LockImpl::Lock() {
-  // The ScopedLockAcquireActivity below is relatively expensive and so its
-  // actions can become significant due to the very large number of locks that
-  // tend to be used throughout the build. It is also not needed unless the lock
-  // is contended.
-  //
-  // To avoid this cost in the vast majority of the calls, simply "try" the lock
-  // first and only do the (tracked) blocking call if that fails. |Try()| is
-  // cheap, as it doesn't call into the kernel.
-  if (Try())
-    return;
-
+void LockImpl::LockInternalWithTracking() {
   base::debug::ScopedLockAcquireActivity lock_activity(this);
   ::AcquireSRWLockExclusive(reinterpret_cast<PSRWLOCK>(&native_handle_));
 }
diff --git a/base/win/windows_types.h b/base/win/windows_types.h
index 702531c..9b8c421e 100644
--- a/base/win/windows_types.h
+++ b/base/win/windows_types.h
@@ -215,9 +215,10 @@
 #define WINAPI __stdcall
 #define CALLBACK __stdcall
 
-// Needed for optimal lock performance.
+// Needed for LockImpl.
 WINBASEAPI _Releases_exclusive_lock_(*SRWLock) VOID WINAPI
     ReleaseSRWLockExclusive(_Inout_ PSRWLOCK SRWLock);
+WINBASEAPI BOOLEAN WINAPI TryAcquireSRWLockExclusive(_Inout_ PSRWLOCK SRWLock);
 
 // Needed to support protobuf's GetMessage macro magic.
 WINUSERAPI BOOL WINAPI GetMessageW(_Out_ LPMSG lpMsg,