Improve Chrome's DLL prefetcher to save memory

Change the in-proc prefetcher to load the code into Image pages and not MapFile pages
to save CPU and improve memory usage, and do nothing on OS builds that enable OS
PreFetch of larger files. Note this require the PreFetchVirtualMemory API, so we'll
still read the file as data/MapFile on Win7.

Bug: 547794
Change-Id: I0e9ec9615131daa065a8d0ccbbb897de059524c8
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1534983
Commit-Queue: Joe Laughlin <joel@microsoft.com>
Reviewed-by: Greg Thompson <grt@chromium.org>
Reviewed-by: Lei Zhang <thestig@chromium.org>
Reviewed-by: Bruce Dawson <brucedawson@chromium.org>
Cr-Commit-Position: refs/heads/master@{#656187}
diff --git a/base/files/memory_mapped_file.cc b/base/files/memory_mapped_file.cc
index e7f344d0..6c8a0d9 100644
--- a/base/files/memory_mapped_file.cc
+++ b/base/files/memory_mapped_file.cc
@@ -46,6 +46,13 @@
     case READ_WRITE_EXTEND:
       // Can't open with "extend" because no maximum size is known.
       NOTREACHED();
+      break;
+#if defined(OS_WIN)
+    case READ_CODE_IMAGE:
+      flags |= File::FLAG_OPEN | File::FLAG_READ | File::FLAG_EXCLUSIVE_WRITE |
+               File::FLAG_EXECUTE;
+      break;
+#endif
   }
   file_.Initialize(file_name, flags);
 
@@ -90,6 +97,13 @@
         return false;
       }
       break;
+#if defined(OS_WIN)
+    case READ_CODE_IMAGE:
+      // Can't open with "READ_CODE_IMAGE", not supported outside Windows
+      // or with a |region|.
+      NOTREACHED();
+      break;
+#endif
   }
 
   if (IsValid())
diff --git a/base/files/memory_mapped_file.h b/base/files/memory_mapped_file.h
index 1f73bcc..8a8c320 100644
--- a/base/files/memory_mapped_file.h
+++ b/base/files/memory_mapped_file.h
@@ -44,6 +44,14 @@
     // needed. Note, however, that the maximum size will still be reserved
     // in the process address space.
     READ_WRITE_EXTEND,
+
+#if defined(OS_WIN)
+    // This provides read access, but as executable code used for prefetching
+    // DLLs into RAM to avoid inefficient hard fault patterns such as during
+    // process startup. The accessing thread could be paused while data from
+    // the file is read into memory (if needed).
+    READ_CODE_IMAGE,
+#endif
   };
 
   // The default constructor sets all members to invalid/null values.
@@ -84,10 +92,10 @@
     return Initialize(std::move(file), READ_ONLY);
   }
 
-  // As above, but works with a region of an already-opened file. All forms of
-  // |access| are allowed. If READ_WRITE_EXTEND is specified then |region|
-  // provides the maximum size of the file. If the memory mapping fails, it
-  // return false.
+  // As above, but works with a region of an already-opened file. |access|
+  // must not be READ_CODE_IMAGE. If READ_WRITE_EXTEND is specified then
+  // |region| provides the maximum size of the file. If the memory mapping
+  // fails, it return false.
   WARN_UNUSED_RESULT bool Initialize(File file,
                                      const Region& region,
                                      Access access);
@@ -115,6 +123,12 @@
                                            size_t* aligned_size,
                                            int32_t* offset);
 
+#if defined(OS_WIN)
+  // Maps the executable file to memory, set |data_| to that memory address.
+  // Return true on success.
+  bool MapImageToMemory(Access access);
+#endif
+
   // Map the file to memory, set data_ to that memory address. Return true on
   // success, false on any kind of failure. This is a helper for Initialize().
   bool MapFileRegionToMemory(const Region& region, Access access);
diff --git a/base/files/memory_mapped_file_win.cc b/base/files/memory_mapped_file_win.cc
index dbd39468..9735aeb4 100644
--- a/base/files/memory_mapped_file_win.cc
+++ b/base/files/memory_mapped_file_win.cc
@@ -20,11 +20,44 @@
 MemoryMappedFile::MemoryMappedFile() : data_(NULL), length_(0) {
 }
 
+bool MemoryMappedFile::MapImageToMemory(Access access) {
+  ScopedBlockingCall scoped_blocking_call(FROM_HERE, BlockingType::MAY_BLOCK);
+
+  // The arguments to the calls of ::CreateFile(), ::CreateFileMapping(), and
+  // ::MapViewOfFile() need to be self consistent as far as access rights and
+  // type of mapping or one or more of them will fail in non-obvious ways.
+
+  if (!file_.IsValid())
+    return false;
+
+  file_mapping_.Set(::CreateFileMapping(file_.GetPlatformFile(), nullptr,
+                                        PAGE_EXECUTE_READ | SEC_IMAGE, 0, 0,
+                                        NULL));
+  if (!file_mapping_.IsValid())
+    return false;
+
+  data_ = static_cast<uint8_t*>(
+      ::MapViewOfFile(file_mapping_.Get(),
+                      FILE_MAP_READ | FILE_MAP_EXECUTE | SEC_IMAGE, 0, 0, 0));
+  if (!data_)
+    return false;
+
+  // We need to know how large the mapped file is in some cases
+  int64_t file_len = file_.GetLength();
+  if (!IsValueInRangeForNumericType<size_t>(file_len))
+    return false;
+
+  length_ = static_cast<size_t>(file_len);
+  return true;
+}
+
 bool MemoryMappedFile::MapFileRegionToMemory(
     const MemoryMappedFile::Region& region,
     Access access) {
   ScopedBlockingCall scoped_blocking_call(FROM_HERE, BlockingType::MAY_BLOCK);
 
+  DCHECK(access != READ_CODE_IMAGE || region == Region::kWholeFile);
+
   if (!file_.IsValid())
     return false;
 
@@ -41,6 +74,8 @@
       flags |= PAGE_READWRITE;
       size.QuadPart = region.size;
       break;
+    case READ_CODE_IMAGE:
+      return MapImageToMemory(access);
   }
 
   file_mapping_.Set(::CreateFileMapping(file_.GetPlatformFile(), NULL, flags,
@@ -68,8 +103,8 @@
     // which contains |region| and then add up the |data_offset| displacement.
     int64_t aligned_start = 0;
     size_t ignored = 0U;
-    CalculateVMAlignedBoundaries(
-        region.offset, region.size, &aligned_start, &ignored, &data_offset);
+    CalculateVMAlignedBoundaries(region.offset, region.size, &aligned_start,
+                                 &ignored, &data_offset);
     int64_t full_map_size = region.size + data_offset;
 
     // Ensure that the casts below in the MapViewOfFile call are sane.
diff --git a/chrome/app/file_pre_reader_win.cc b/chrome/app/file_pre_reader_win.cc
index c62c8c0d..38f5439 100644
--- a/chrome/app/file_pre_reader_win.cc
+++ b/chrome/app/file_pre_reader_win.cc
@@ -6,24 +6,66 @@
 
 #include <windows.h>
 
+#include <memoryapi.h>  // NOLINT(build/include_order)
+
 #include "base/files/file.h"
+#include "base/files/memory_mapped_file.h"
+#include "base/win/windows_version.h"
 
 void PreReadFile(const base::FilePath& file_path) {
-  base::File file(file_path, base::File::FLAG_OPEN | base::File::FLAG_READ |
-                                 base::File::FLAG_SEQUENTIAL_SCAN);
-  if (!file.IsValid())
-    return;
+  // On Win10 RS6 and higher with the increased prefetch limit we don't need
+  // to do in process prefetch. On OS releases Win8/Server 2012 to Win10 RS5
+  // use ::PrefetchVirtualMemory(). This is better than a simple data file
+  // read, more from a RAM perspective than CPU. This is because reading the
+  // file as data results in double mapping to Image/executable pages for all
+  // pages of code executed. On Win7 just do a simple file read as data.
 
-  // This could be replaced with ::PrefetchVirtualMemory once we drop support
-  // for Win7. The performance of ::PrefetchVirtualMemory is roughly equivalent
-  // to these buffered reads.
-  const DWORD kStepSize = 1024 * 1024;
-  char* buffer = reinterpret_cast<char*>(
-      ::VirtualAlloc(nullptr, kStepSize, MEM_COMMIT, PAGE_READWRITE));
-  if (!buffer)
-    return;
+  if (base::win::GetVersion() == base::win::Version::WIN7) {
+    // On Win7 read in the file as data since the OS doesn't have
+    // the support for better options.
 
-  while (file.ReadAtCurrentPos(buffer, kStepSize) > 0) {}
+    constexpr DWORD kStepSize = 1024 * 1024;
 
-  ::VirtualFree(buffer, 0, MEM_RELEASE);
+    base::File file(file_path, base::File::FLAG_OPEN | base::File::FLAG_READ |
+                                   base::File::FLAG_SEQUENTIAL_SCAN);
+    if (!file.IsValid())
+      return;
+
+    char* buffer = reinterpret_cast<char*>(
+        ::VirtualAlloc(nullptr, kStepSize, MEM_COMMIT, PAGE_READWRITE));
+    if (!buffer)
+      return;
+
+    while (file.ReadAtCurrentPos(buffer, kStepSize) > 0) {
+    }
+
+    ::VirtualFree(buffer, 0, MEM_RELEASE);
+  } else {
+    // NB: Creating the file mapping before the ::LoadLibrary() of the file is
+    // more efficient memory wise, but we must be sure no other threads try to
+    // loadlibrary the file while we are doing the mapping and prefetching or
+    // the process will get a private copy of the DLL via COW.
+
+    base::MemoryMappedFile mapped_file;
+    if (mapped_file.Initialize(file_path,
+                               base::MemoryMappedFile::READ_CODE_IMAGE)) {
+      // RefSet data indicates we touch only the first half of the DLL
+      // so prefetch approximately the first half.
+
+      _WIN32_MEMORY_RANGE_ENTRY address_range = {mapped_file.data(),
+                                                 mapped_file.length() / 2};
+
+      // ::PrefetchVirtualMemory() isn't available on Win7.
+      HMODULE kernel32_library = GetModuleHandleA("kernel32.dll");
+
+      auto prefetch_virtual_memory =
+          reinterpret_cast<decltype(&::PrefetchVirtualMemory)>(
+              GetProcAddress(kernel32_library, "PrefetchVirtualMemory"));
+
+      // NB: PrefetchVirtualMemory requires the file to be opened with
+      // only read access or it will fail.
+
+      (*prefetch_virtual_memory)(GetCurrentProcess(), 1, &address_range, 0);
+    }
+  }
 }