| // Copyright 2020 The Chromium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "chromeos/ash/components/memory/userspace_swap/userfaultfd.h" |
| |
| #include <fcntl.h> |
| #include <sys/ioctl.h> |
| #include <sys/mman.h> |
| #include <sys/syscall.h> |
| #include <unistd.h> |
| |
| #if defined(__NR_userfaultfd) |
| #define HAS_USERFAULTFD |
| #include <linux/userfaultfd.h> |
| #endif |
| |
| #include "base/compiler_specific.h" |
| #include "base/files/file_descriptor_watcher_posix.h" |
| #include "base/files/scoped_file.h" |
| #include "base/functional/bind.h" |
| #include "base/logging.h" |
| #include "base/memory/ptr_util.h" |
| #include "base/posix/eintr_wrapper.h" |
| #include "base/task/sequenced_task_runner.h" |
| #include "base/task/thread_pool.h" |
| #include "base/threading/platform_thread.h" |
| #include "base/threading/scoped_blocking_call.h" |
| #include "chromeos/ash/components/memory/aligned_memory.h" |
| |
| namespace ash { |
| namespace memory { |
| namespace userspace_swap { |
| |
| UserfaultFD::~UserfaultFD() { |
| // We need to make sure we stop receiving events before we shutdown. |
| CloseAndStopWaitingForEvents(); |
| } |
| |
| UserfaultFD::UserfaultFD(base::ScopedFD fd) : fd_(std::move(fd)) {} |
| |
| // static |
| bool UserfaultFD::KernelSupportsUserfaultFD() { |
| #if defined(HAS_USERFAULTFD) |
| static bool supported = []() -> bool { |
| // Invoke the syscall with invalid arguments. If it's not supported the |
| // kernel will return ENOSYS, if it's supported we will get invalid |
| // arguments EINVAL. Doing this will never actually create a userfaultfd. |
| int ret = syscall(__NR_userfaultfd, ~(O_CLOEXEC | O_NONBLOCK)); |
| CHECK(ret == -1) << "Syscall succeeded unexpectedly"; |
| DPCHECK(errno == EINVAL || errno == ENOSYS) |
| << "Syscall returned an unexpected errno"; |
| return errno == EINVAL; |
| }(); |
| |
| return supported; |
| #else // defined(HAS_USERFAULTFD) |
| // We didn't even build chrome with support for it in this case. |
| errno = ENOSYS; |
| return false; |
| #endif |
| } |
| |
| bool UserfaultFD::RegisterRange(RegisterMode mode, |
| uintptr_t range_start, |
| uint64_t len) { |
| #if defined(HAS_USERFAULTFD) |
| CHECK(IsPageAligned(range_start)); |
| CHECK(IsPageAligned(len)); |
| |
| uffdio_register reg = {}; |
| reg.range.start = range_start; |
| reg.range.len = len; |
| |
| reg.mode = 0; |
| if (mode & kRegisterMissing) |
| reg.mode |= UFFDIO_REGISTER_MODE_MISSING; |
| |
| if (HANDLE_EINTR(ioctl(fd_.get(), UFFDIO_REGISTER, ®)) == -1) { |
| return false; |
| } |
| |
| // Make sure we're getting back at least the features we require, these |
| // features were all introduced with userfaultfd so they should remain |
| // supported indefinitely. |
| constexpr uint64_t kRequiredFeatures = |
| (static_cast<uint64_t>(1) << _UFFDIO_WAKE) | |
| (static_cast<uint64_t>(1) << _UFFDIO_COPY) | |
| (static_cast<uint64_t>(1) << _UFFDIO_ZEROPAGE); |
| CHECK((reg.ioctls & kRequiredFeatures) == kRequiredFeatures); |
| |
| return true; |
| #else // defined(HAS_USERFAULTFD) |
| errno = ENOSYS; |
| return false; |
| #endif |
| } |
| |
| bool UserfaultFD::UnregisterRange(uintptr_t range_start, uint64_t len) { |
| #if defined(HAS_USERFAULTFD) |
| CHECK(IsPageAligned(range_start)); |
| CHECK(IsPageAligned(len)); |
| |
| uffdio_range range = {}; |
| range.start = range_start; |
| range.len = len; |
| |
| if (HANDLE_EINTR(ioctl(fd_.get(), UFFDIO_UNREGISTER, &range)) < 0) { |
| return false; |
| } |
| |
| return true; |
| #else // defined(HAS_USERFAULTFD) |
| errno = ENOSYS; |
| return false; |
| #endif |
| } |
| |
| bool UserfaultFD::CopyToRange(uintptr_t dest_range_start, |
| uint64_t len, |
| uintptr_t src_range_start, |
| int64_t* copied) { |
| #if defined(HAS_USERFAULTFD) |
| // NOTE: The source doesn't need to be page aligned. |
| CHECK(IsPageAligned(dest_range_start)); |
| CHECK(IsPageAligned(len)); |
| CHECK(copied); |
| |
| uffdio_copy fault_copy = {}; |
| fault_copy.dst = dest_range_start; |
| fault_copy.len = len; |
| fault_copy.mode = 0; // WAKE |
| fault_copy.src = src_range_start; |
| fault_copy.copy = 0; |
| *copied = 0; |
| |
| int res = HANDLE_EINTR(ioctl(fd_.get(), UFFDIO_COPY, &fault_copy)); |
| *copied = fault_copy.copy; |
| |
| return res >= 0; |
| #else // defined(HAS_USERFAULTFD) |
| errno = ENOSYS; |
| return false; |
| #endif |
| } |
| |
| bool UserfaultFD::ZeroRange(uintptr_t range_start, |
| uint64_t len, |
| int64_t* zeroed) { |
| #if defined(HAS_USERFAULTFD) |
| CHECK(IsPageAligned(range_start)); |
| CHECK(IsPageAligned(len)); |
| CHECK(zeroed); |
| |
| uffdio_zeropage zp = {}; |
| zp.range.start = range_start; |
| zp.range.len = len; |
| zp.mode = 0; // WAKE |
| zp.zeropage = 0; |
| *zeroed = 0; |
| |
| int res = HANDLE_EINTR(ioctl(fd_.get(), UFFDIO_ZEROPAGE, &zp)); |
| *zeroed = zp.zeropage; |
| |
| return res >= 0; |
| #else // defined(HAS_USERFAULTFD) |
| errno = ENOSYS; |
| return false; |
| #endif |
| } |
| |
| bool UserfaultFD::WakeRange(uintptr_t range_start, uint64_t len) { |
| #if defined(HAS_USERFAULTFD) |
| CHECK(IsPageAligned(range_start)); |
| CHECK(IsPageAligned(len)); |
| |
| uffdio_range range = {}; |
| range.start = range_start; |
| range.len = len; |
| |
| if (HANDLE_EINTR(ioctl(fd_.get(), UFFDIO_WAKE, &range)) < 0) { |
| return false; |
| } |
| |
| return true; |
| #else // defined(HAS_USERFAULTFD) |
| errno = ENOSYS; |
| return false; |
| #endif |
| } |
| |
| // Static |
| std::unique_ptr<UserfaultFD> UserfaultFD::WrapFD(base::ScopedFD fd) { |
| #if defined(HAS_USERFAULTFD) |
| // Using new to access non-public constructor rather than make_unique. |
| return base::WrapUnique(new UserfaultFD(std::move(fd))); |
| #else // defined(HAS_USERFAULTFD) |
| errno = ENOSYS; |
| return nullptr; |
| #endif |
| } |
| |
| // Static |
| std::unique_ptr<UserfaultFD> UserfaultFD::Create(Features features) { |
| #if defined(HAS_USERFAULTFD) |
| base::ScopedFD fd(syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK)); |
| if (!fd.is_valid()) { |
| // We likely received ENOSYS in this situation for no kernel support, we |
| // don't need to do anything special although the caller can still check |
| // errno. Although we do log an error since the caller should have checked |
| // that it's supported before attempting to create a userfaultfd. |
| PLOG(ERROR) << "Unable to create userfaultfd"; |
| return nullptr; |
| } |
| |
| uffdio_api uffdio_api = {}; |
| uffdio_api.api = UFFD_API; |
| |
| if (features & kFeatureRemap) |
| uffdio_api.features |= UFFD_FEATURE_EVENT_REMAP; |
| |
| if (features & kFeatureUnmap) |
| uffdio_api.features |= UFFD_FEATURE_EVENT_UNMAP; |
| |
| if (features & kFeatureRemove) |
| uffdio_api.features |= UFFD_FEATURE_EVENT_REMOVE; |
| |
| if (features & kFeatureThreadID) |
| uffdio_api.features |= UFFD_FEATURE_THREAD_ID; |
| |
| if (HANDLE_EINTR(ioctl(fd.get(), UFFDIO_API, &uffdio_api)) < 0) { |
| PLOG(ERROR) << "UFFDIO_API ioctl failed"; |
| return nullptr; |
| } |
| |
| return WrapFD(std::move(fd)); |
| #else // defined(HAS_USERFAULTFD) |
| errno = ENOSYS; |
| return nullptr; |
| #endif |
| } |
| |
| bool UserfaultFD::DispatchMessage(const uffd_msg& msg) { |
| #if defined(HAS_USERFAULTFD) |
| if (msg.event == UFFD_EVENT_UNMAP) { |
| handler_->Unmapped(msg.arg.remove.start, msg.arg.remove.end); |
| } else if (msg.event == UFFD_EVENT_REMOVE) { |
| handler_->Removed(msg.arg.remove.start, msg.arg.remove.end); |
| } else if (msg.event == UFFD_EVENT_REMAP) { |
| handler_->Remapped(msg.arg.remap.from, msg.arg.remap.to, msg.arg.remap.len); |
| } else if (msg.event == UFFD_EVENT_PAGEFAULT) { |
| pending_faults_.push_back(msg); |
| } else { |
| DLOG(ERROR) << "Unknown userfaultfd event: " << msg.event; |
| } |
| |
| return DrainPendingFaults(); |
| #else |
| return true; |
| #endif |
| } |
| |
| bool UserfaultFD::DrainPendingFaults() { |
| while (!pending_faults_.empty()) { |
| const uffd_msg& pending_fault = pending_faults_.front(); |
| CHECK(pending_fault.event == UFFD_EVENT_PAGEFAULT); |
| if (!handler_->Pagefault( |
| pending_fault.arg.pagefault.address, |
| pending_fault.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE |
| ? UserfaultFDHandler::PagefaultFlags::kWriteFault |
| : UserfaultFDHandler::PagefaultFlags::kReadFault, |
| base::PlatformThreadId( |
| static_cast<base::PlatformThreadId::UnderlyingType>( |
| pending_fault.arg.pagefault.feat.ptid)))) { |
| // It'll get retried later (it wasn't popped). |
| return false; |
| } |
| |
| // And we successfully handled it, let's remove it and move along. |
| pending_faults_.pop_front(); |
| } |
| return true; |
| } |
| |
| void UserfaultFD::UserfaultFDReadable() { |
| #if defined(HAS_USERFAULTFD) |
| base::ScopedBlockingCall scoped_blocking_call(FROM_HERE, |
| base::BlockingType::WILL_BLOCK); |
| uffd_msg msg; |
| |
| // It's very important that messages are posted in the order they were read |
| // otherwise, if another thread also attempted to handle the |
| // UserfaultFDReadable event we could post the messages out of order which may |
| // result in ambiguity. We protect the read and the posts by a mutex. |
| base::ReleasableAutoLock read_locker(&read_lock_); |
| |
| do { |
| UNSAFE_TODO(memset(&msg, 0, sizeof(msg))); |
| |
| // We start by draining all messages and then we process them in order. |
| int bytes_read = HANDLE_EINTR(read(fd_.get(), &msg, sizeof(msg))); |
| |
| if (bytes_read <= 0) { |
| // We either got an EOF or an EBADF to indicate that we're done. |
| if (bytes_read == 0 || errno == EBADF) { |
| handler_->Closed(0); // EBADF will indicate closed at this point. |
| } else if (errno == EWOULDBLOCK) { |
| // No problems here. |
| |
| // But make sure before we exit this loop that if there are still queued |
| // messages that we attempt to drain them. |
| DrainPendingFaults(); |
| |
| return; |
| } else { |
| PLOG(ERROR) << "Userfaultfd encountered an expected error"; |
| handler_->Closed(errno); |
| } |
| |
| CloseAndStopWaitingForEvents(); |
| return; |
| } |
| |
| // Partial reads CANNOT happen. |
| CHECK_EQ(bytes_read, static_cast<int>(sizeof(msg))); |
| DispatchMessage(msg); |
| } while (true); |
| #endif |
| } |
| |
| bool UserfaultFD::StartWaitingForEvents( |
| std::unique_ptr<UserfaultFDHandler> handler) { |
| #if defined(HAS_USERFAULTFD) |
| |
| if (!fd_.is_valid() || !handler) { |
| return false; |
| } |
| |
| if (watcher_controller_) { |
| LOG(WARNING) << "Fault handling has already started"; |
| return true; |
| } |
| |
| handler_ = std::move(handler); |
| |
| watcher_controller_ = base::FileDescriptorWatcher::WatchReadable( |
| fd_.get(), base::BindRepeating(&UserfaultFD::UserfaultFDReadable, |
| base::Unretained(this))); |
| |
| return true; |
| #else // defined(HAS_USERFAULTFD) |
| errno = ENOSYS; |
| return false; |
| #endif |
| } |
| |
| void UserfaultFD::CloseAndStopWaitingForEvents() { |
| watcher_controller_.reset(); |
| fd_.reset(); |
| } |
| |
| base::ScopedFD UserfaultFD::ReleaseFD() { |
| return std::move(fd_); |
| } |
| |
| } // namespace userspace_swap |
| } // namespace memory |
| } // namespace ash |