blob: f3c1c53bb1d81edd40c8af70ab6e68a2dbd50deb [file] [log] [blame]
// Copyright (c) 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_ALLOCATOR_PARTITION_ALLOCATOR_PAGE_ALLOCATOR_INTERNALS_POSIX_H_
#define BASE_ALLOCATOR_PARTITION_ALLOCATOR_PAGE_ALLOCATOR_INTERNALS_POSIX_H_
#include <errno.h>
#include <sys/mman.h>
#include "base/allocator/partition_allocator/partition_alloc_check.h"
#include "base/check_op.h"
#include "base/notreached.h"
#include "base/posix/eintr_wrapper.h"
#include "build/build_config.h"
#if defined(OS_APPLE)
#include "base/mac/foundation_util.h"
#include "base/mac/mac_util.h"
#include "base/mac/scoped_cftyperef.h"
#include <Availability.h>
#include <Security/Security.h>
#include <mach/mach.h>
#endif
#if defined(OS_ANDROID)
#include <sys/prctl.h>
#endif
#if defined(OS_LINUX) || defined(OS_CHROMEOS)
#include <sys/resource.h>
#include <algorithm>
#endif
#include "base/allocator/partition_allocator/page_allocator.h"
#ifndef MAP_ANONYMOUS
#define MAP_ANONYMOUS MAP_ANON
#endif
#if defined(OS_APPLE)
// SecTaskGetCodeSignStatus is marked as unavailable on macOS, although it’s
// available on iOS and other Apple operating systems. It is, in fact, present
// on the system since macOS 10.12.
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wavailability"
uint32_t SecTaskGetCodeSignStatus(SecTaskRef task)
#if __MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_10_12
// When redeclaring something previously declared as unavailable, the
// weak_import attribute won’t be applied unless manually set.
__attribute__((weak_import))
#endif // DT < 10.12
API_AVAILABLE(macos(10.12));
#pragma clang diagnostic pop
#endif // OS_APPLE
namespace base {
namespace {
#if defined(OS_ANDROID)
const char* PageTagToName(PageTag tag) {
// Important: All the names should be string literals. As per prctl.h in
// //third_party/android_ndk the kernel keeps a pointer to the name instead
// of copying it.
//
// Having the name in .rodata ensures that the pointer remains valid as
// long as the mapping is alive.
switch (tag) {
case PageTag::kBlinkGC:
return "blink_gc";
case PageTag::kPartitionAlloc:
return "partition_alloc";
case PageTag::kChromium:
return "chromium";
case PageTag::kV8:
return "v8";
default:
PA_DCHECK(false);
return "";
}
}
#endif // defined(OS_ANDROID)
#if defined(OS_APPLE)
// Tests whether the version of macOS supports the MAP_JIT flag and if the
// current process is signed with the hardened runtime and the allow-jit
// entitlement, returning whether MAP_JIT should be used to allocate regions
// that will contain JIT-compiled executable code.
bool UseMapJit() {
if (!mac::IsAtLeastOS10_14()) {
// MAP_JIT existed before macOS 10.14, but had somewhat different semantics.
// Only one MAP_JIT region was permitted per process, but calling code here
// will very likely require more than one such region. Since MAP_JIT is not
// strictly necessary to write code to a region and then execute it on these
// older OSes, don’t use it at all.
return false;
}
// Until determining that the hardened runtime is enabled, early returns will
// return true, so that MAP_JIT will be used. This is important on arm64,
// which only allows pages to be simultaneously writable and executable when
// in a region allocated with MAP_JIT, regardless of code signing options. On
// arm64, an attempt to set a non-MAP_JIT page as simultaneously writable and
// executable fails with EPERM. Although this is not enforced on x86_64,
// MAP_JIT is harmless in that case.
ScopedCFTypeRef<SecTaskRef> task(SecTaskCreateFromSelf(kCFAllocatorDefault));
if (!task) {
return true;
}
uint32_t flags = SecTaskGetCodeSignStatus(task);
if (!(flags & kSecCodeSignatureRuntime)) {
// The hardened runtime is not enabled. Note that kSecCodeSignatureRuntime
// == CS_RUNTIME.
return true;
}
// The hardened runtime is enabled. From this point on, early returns must
// return false, indicating that MAP_JIT is not to be used. It’s an error
// (EINVAL) to use MAP_JIT with the hardened runtime unless the JIT
// entitlement is specified.
ScopedCFTypeRef<CFTypeRef> jit_entitlement(SecTaskCopyValueForEntitlement(
task.get(), CFSTR("com.apple.security.cs.allow-jit"), nullptr));
if (!jit_entitlement)
return false;
return mac::CFCast<CFBooleanRef>(jit_entitlement.get()) == kCFBooleanTrue;
}
#endif // defined(OS_APPLE)
} // namespace
// |mmap| uses a nearby address if the hint address is blocked.
constexpr bool kHintIsAdvisory = true;
std::atomic<int32_t> s_allocPageErrorCode{0};
int GetAccessFlags(PageAccessibilityConfiguration accessibility) {
switch (accessibility) {
case PageRead:
return PROT_READ;
case PageReadWrite:
return PROT_READ | PROT_WRITE;
case PageReadExecute:
return PROT_READ | PROT_EXEC;
case PageReadWriteExecute:
return PROT_READ | PROT_WRITE | PROT_EXEC;
default:
NOTREACHED();
FALLTHROUGH;
case PageInaccessible:
return PROT_NONE;
}
}
void* SystemAllocPagesInternal(void* hint,
size_t length,
PageAccessibilityConfiguration accessibility,
PageTag page_tag) {
#if defined(OS_APPLE)
// Use a custom tag to make it easier to distinguish Partition Alloc regions
// in vmmap(1). Tags between 240-255 are supported.
PA_DCHECK(PageTag::kFirst <= page_tag);
PA_DCHECK(PageTag::kLast >= page_tag);
int fd = VM_MAKE_TAG(static_cast<int>(page_tag));
#else
int fd = -1;
#endif
int access_flag = GetAccessFlags(accessibility);
int map_flags = MAP_ANONYMOUS | MAP_PRIVATE;
#if defined(OS_APPLE)
// On macOS 10.14 and higher, executables that are code signed with the
// "runtime" option cannot execute writable memory by default. They can opt
// into this capability by specifying the "com.apple.security.cs.allow-jit"
// code signing entitlement and allocating the region with the MAP_JIT flag.
static const bool kUseMapJit = UseMapJit();
if (page_tag == PageTag::kV8 && kUseMapJit) {
map_flags |= MAP_JIT;
}
#endif
void* ret = mmap(hint, length, access_flag, map_flags, fd, 0);
if (ret == MAP_FAILED) {
s_allocPageErrorCode = errno;
ret = nullptr;
}
#if defined(OS_ANDROID)
// On Android, anonymous mappings can have a name attached to them. This is
// useful for debugging, and double-checking memory attribution.
if (ret) {
// No error checking on purpose, testing only.
prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, ret, length,
PageTagToName(page_tag));
}
#endif
return ret;
}
bool TrySetSystemPagesAccessInternal(
void* address,
size_t length,
PageAccessibilityConfiguration accessibility) {
return 0 ==
HANDLE_EINTR(mprotect(address, length, GetAccessFlags(accessibility)));
}
void SetSystemPagesAccessInternal(
void* address,
size_t length,
PageAccessibilityConfiguration accessibility) {
PA_PCHECK(0 == HANDLE_EINTR(
mprotect(address, length, GetAccessFlags(accessibility))));
}
void FreePagesInternal(void* address, size_t length) {
PA_PCHECK(0 == munmap(address, length));
}
void* TrimMappingInternal(void* base,
size_t base_length,
size_t trim_length,
PageAccessibilityConfiguration accessibility,
size_t pre_slack,
size_t post_slack) {
void* ret = base;
// We can resize the allocation run. Release unneeded memory before and after
// the aligned range.
if (pre_slack) {
FreePages(base, pre_slack);
ret = reinterpret_cast<char*>(base) + pre_slack;
}
if (post_slack) {
FreePages(reinterpret_cast<char*>(ret) + trim_length, post_slack);
}
return ret;
}
void DecommitSystemPagesInternal(
void* address,
size_t length,
PageAccessibilityDisposition accessibility_disposition) {
// In POSIX, there is no decommit concept. Discarding is an effective way of
// implementing the Windows semantics where the OS is allowed to not swap the
// pages in the region.
DiscardSystemPages(address, length);
// Make pages inaccessible, unless the caller requested to keep permissions.
//
// Note, there is a small window between these calls when the pages can be
// incorrectly touched and brought back to memory. Not ideal, but doing those
// operaions in the opposite order resulted in PMF regression on Mac (see
// crbug.com/1153021).
if (accessibility_disposition == PageUpdatePermissions) {
SetSystemPagesAccess(address, length, PageInaccessible);
}
}
void RecommitSystemPagesInternal(
void* address,
size_t length,
PageAccessibilityConfiguration accessibility,
PageAccessibilityDisposition accessibility_disposition) {
// On POSIX systems, the caller needs to simply read the memory to recommit
// it. However, if decommit changed the permissions, recommit has to change
// them back.
if (accessibility_disposition == PageUpdatePermissions) {
SetSystemPagesAccess(address, length, accessibility);
}
#if defined(OS_APPLE)
// On macOS, to update accounting, we need to make another syscall. For more
// details, see https://crbug.com/823915.
madvise(address, length, MADV_FREE_REUSE);
#endif
}
void DiscardSystemPagesInternal(void* address, size_t length) {
#if defined(OS_APPLE)
int ret = madvise(address, length, MADV_FREE_REUSABLE);
if (ret) {
// MADV_FREE_REUSABLE sometimes fails, so fall back to MADV_DONTNEED.
ret = madvise(address, length, MADV_DONTNEED);
}
PA_PCHECK(ret == 0);
#else
// We have experimented with other flags, but with suboptimal results.
//
// MADV_FREE (Linux): Makes our memory measurements less predictable;
// performance benefits unclear.
//
// Therefore, we just do the simple thing: MADV_DONTNEED.
PA_PCHECK(0 == madvise(address, length, MADV_DONTNEED));
#endif
}
} // namespace base
#endif // BASE_ALLOCATOR_PARTITION_ALLOCATOR_PAGE_ALLOCATOR_INTERNALS_POSIX_H_