blob: bdbc8832896f88212852bab1b95b0faa3152c8d3 [file]
// Copyright 2022 The ChromiumOS Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#![allow(clippy::undocumented_unsafe_blocks)] // FIXME
#![allow(clippy::manual_div_ceil)] // FIXME
use core::ffi::c_void;
use std::cmp::Reverse;
use std::collections::BTreeMap;
use std::collections::BinaryHeap;
use std::convert::TryInto;
use std::sync::Arc;
use base::error;
use base::info;
use base::pagesize;
use base::AsRawDescriptor;
use base::Error;
use base::Event;
use base::MappedRegion;
use base::MmapError;
use base::Protection;
use base::RawDescriptor;
use base::Result;
use base::SafeDescriptor;
use base::SendTube;
use fnv::FnvHashMap;
use libc::EEXIST;
use libc::EFAULT;
use libc::EINVAL;
use libc::EIO;
use libc::ENODEV;
use libc::ENOENT;
use libc::ENOSPC;
use libc::ENOTSUP;
use libc::EOVERFLOW;
use sync::Mutex;
use vm_memory::GuestAddress;
use vm_memory::GuestMemory;
use winapi::shared::winerror::ERROR_BUSY;
use winapi::shared::winerror::ERROR_SUCCESS;
use winapi::um::memoryapi::OfferVirtualMemory;
use winapi::um::memoryapi::ReclaimVirtualMemory;
use winapi::um::memoryapi::VmOfferPriorityBelowNormal;
use winapi::um::winnt::RtlZeroMemory;
use super::types::*;
use super::*;
use crate::host_phys_addr_bits;
use crate::whpx::whpx_sys::*;
use crate::BalloonEvent;
use crate::ClockState;
use crate::Datamatch;
use crate::DeliveryMode;
use crate::DestinationMode;
use crate::DeviceKind;
use crate::HypervisorKind;
use crate::IoEventAddress;
use crate::LapicState;
use crate::MemCacheType;
use crate::MemSlot;
use crate::TriggerMode;
use crate::VcpuX86_64;
use crate::Vm;
use crate::VmCap;
use crate::VmX86_64;
pub struct WhpxVm {
whpx: Whpx,
// reference counted, since we need to implement try_clone or some variation.
// There is only ever 1 create/1 delete partition unlike dup/close handle variations.
vm_partition: Arc<SafePartition>,
guest_mem: GuestMemory,
mem_regions: Arc<Mutex<BTreeMap<MemSlot, (GuestAddress, Box<dyn MappedRegion>)>>>,
/// A min heap of MemSlot numbers that were used and then removed and can now be re-used
mem_slot_gaps: Arc<Mutex<BinaryHeap<Reverse<MemSlot>>>>,
// WHPX's implementation of ioevents makes several assumptions about how crosvm uses ioevents:
// 1. All ioevents are registered during device setup, and thus can be cloned when the vm is
// cloned instead of locked in an Arc<Mutex<>>. This will make handling ioevents in each
// vcpu thread easier because no locks will need to be acquired.
// 2. All ioevents use Datamatch::AnyLength. We don't bother checking the datamatch, which
// will make this faster.
// 3. We only ever register one eventfd to each address. This simplifies our data structure.
ioevents: FnvHashMap<IoEventAddress, Event>,
// Tube to send events to control.
vm_evt_wrtube: Option<SendTube>,
}
impl WhpxVm {
pub fn new(
whpx: &Whpx,
cpu_count: usize,
guest_mem: GuestMemory,
cpuid: CpuId,
apic_emulation: bool,
vm_evt_wrtube: Option<SendTube>,
) -> WhpxResult<WhpxVm> {
let partition = SafePartition::new()?;
// setup partition defaults.
let mut property: WHV_PARTITION_PROPERTY = Default::default();
property.ProcessorCount = cpu_count as u32;
// safe because we own this partition, and the partition property is allocated on the stack.
check_whpx!(unsafe {
WHvSetPartitionProperty(
partition.partition,
WHV_PARTITION_PROPERTY_CODE_WHvPartitionPropertyCodeProcessorCount,
&property as *const _ as *const c_void,
std::mem::size_of::<WHV_PARTITION_PROPERTY>() as UINT32,
)
})
.map_err(WhpxError::SetProcessorCount)?;
// Pre-set any cpuid results in cpuid.
let mut cpuid_results: Vec<WHV_X64_CPUID_RESULT> = cpuid
.cpu_id_entries
.iter()
.map(WHV_X64_CPUID_RESULT::from)
.collect();
// Leaf HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS tells linux that it's running under Hyper-V.
cpuid_results.push(WHV_X64_CPUID_RESULT {
Function: HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS,
Reserved: [0u32; 3],
// HYPERV_CPUID_MIN is the minimum leaf that we need to support returning to the guest
Eax: HYPERV_CPUID_MIN,
Ebx: u32::from_le_bytes([b'M', b'i', b'c', b'r']),
Ecx: u32::from_le_bytes([b'o', b's', b'o', b'f']),
Edx: u32::from_le_bytes([b't', b' ', b'H', b'v']),
});
// HYPERV_CPUID_FEATURES leaf tells linux which Hyper-V features we support
cpuid_results.push(WHV_X64_CPUID_RESULT {
Function: HYPERV_CPUID_FEATURES,
Reserved: [0u32; 3],
// We only support frequency MSRs and the HV_ACCESS_TSC_INVARIANT feature, which means
// TSC scaling/offseting is handled in hardware, not the guest.
Eax: HV_ACCESS_FREQUENCY_MSRS
| HV_ACCESS_TSC_INVARIANT
| HV_MSR_REFERENCE_TSC_AVAILABLE,
Ebx: 0,
Edx: HV_FEATURE_FREQUENCY_MSRS_AVAILABLE,
Ecx: 0,
});
// safe because we own this partition, and the cpuid_results vec is local to this function.
check_whpx!(unsafe {
WHvSetPartitionProperty(
partition.partition,
WHV_PARTITION_PROPERTY_CODE_WHvPartitionPropertyCodeCpuidResultList,
cpuid_results.as_ptr() as *const _ as *const c_void,
(std::mem::size_of::<WHV_X64_CPUID_RESULT>() * cpuid_results.len()) as UINT32,
)
})
.map_err(WhpxError::SetCpuidResultList)?;
// Setup exiting for cpuid leaves that we want crosvm to adjust, but that we can't pre-set.
// We can't pre-set leaves that rely on irqchip information, and we cannot pre-set leaves
// that return different results per-cpu.
let exit_list: Vec<u32> = vec![0x1, 0x4, 0xB, 0x1F, 0x15];
// safe because we own this partition, and the exit_list vec local to this function.
check_whpx!(unsafe {
WHvSetPartitionProperty(
partition.partition,
WHV_PARTITION_PROPERTY_CODE_WHvPartitionPropertyCodeCpuidExitList,
exit_list.as_ptr() as *const _ as *const c_void,
(std::mem::size_of::<u32>() * exit_list.len()) as UINT32,
)
})
.map_err(WhpxError::SetCpuidExitList)?;
// Setup exits for CPUID instruction.
let mut property: WHV_PARTITION_PROPERTY = Default::default();
// safe because we own this partition, and the partition property is allocated on the stack.
unsafe {
property
.ExtendedVmExits
.__bindgen_anon_1
.set_X64CpuidExit(1);
// X64MsrExit essentially causes WHPX to exit to crosvm when it would normally fail an
// MSR access and inject a GP fault. Crosvm, in turn, now handles select MSR accesses
// related to Hyper-V (see the handle_msr_* functions in vcpu.rs) and injects a GP
// fault for any unhandled MSR accesses.
property.ExtendedVmExits.__bindgen_anon_1.set_X64MsrExit(1);
}
// safe because we own this partition, and the partition property is allocated on the stack.
check_whpx!(unsafe {
WHvSetPartitionProperty(
partition.partition,
WHV_PARTITION_PROPERTY_CODE_WHvPartitionPropertyCodeExtendedVmExits,
&property as *const _ as *const c_void,
std::mem::size_of::<WHV_PARTITION_PROPERTY>() as UINT32,
)
})
.map_err(WhpxError::SetExtendedVmExits)?;
if apic_emulation && !Whpx::check_whpx_feature(WhpxFeature::LocalApicEmulation)? {
return Err(WhpxError::LocalApicEmulationNotSupported);
}
// Setup apic emulation mode
let mut property: WHV_PARTITION_PROPERTY = Default::default();
property.LocalApicEmulationMode = if apic_emulation {
// TODO(b/180966070): figure out if x2apic emulation mode is available on the host and
// enable it if it is.
WHV_X64_LOCAL_APIC_EMULATION_MODE_WHvX64LocalApicEmulationModeXApic
} else {
WHV_X64_LOCAL_APIC_EMULATION_MODE_WHvX64LocalApicEmulationModeNone
};
// safe because we own this partition, and the partition property is allocated on the stack.
check_whpx!(unsafe {
WHvSetPartitionProperty(
partition.partition,
WHV_PARTITION_PROPERTY_CODE_WHvPartitionPropertyCodeLocalApicEmulationMode,
&property as *const _ as *const c_void,
std::mem::size_of::<WHV_PARTITION_PROPERTY>() as UINT32,
)
})
.map_err(WhpxError::SetLocalApicEmulationMode)?;
// safe because we own this partition
check_whpx!(unsafe { WHvSetupPartition(partition.partition) })
.map_err(WhpxError::SetupPartition)?;
for region in guest_mem.regions() {
unsafe {
// Safe because the guest regions are guaranteed not to overlap.
set_user_memory_region(
&partition,
false, // read_only
false, // track dirty pages
region.guest_addr.offset(),
region.size as u64,
region.host_addr as *mut u8,
)
}
.map_err(WhpxError::MapGpaRange)?;
}
Ok(WhpxVm {
whpx: whpx.clone(),
vm_partition: Arc::new(partition),
guest_mem,
mem_regions: Arc::new(Mutex::new(BTreeMap::new())),
mem_slot_gaps: Arc::new(Mutex::new(BinaryHeap::new())),
ioevents: FnvHashMap::default(),
vm_evt_wrtube,
})
}
/// Get the current state of the specified VCPU's local APIC
pub fn get_vcpu_lapic_state(&self, vcpu_id: usize) -> Result<LapicState> {
let buffer = WhpxLapicState { regs: [0u32; 1024] };
let mut written_size = 0u32;
let size = std::mem::size_of::<WhpxLapicState>();
check_whpx!(unsafe {
WHvGetVirtualProcessorInterruptControllerState(
self.vm_partition.partition,
vcpu_id as u32,
buffer.regs.as_ptr() as *mut c_void,
size as u32,
&mut written_size,
)
})?;
Ok(LapicState::from(&buffer))
}
/// Set the current state of the specified VCPU's local APIC
pub fn set_vcpu_lapic_state(&mut self, vcpu_id: usize, state: &LapicState) -> Result<()> {
let buffer = WhpxLapicState::from(state);
check_whpx!(unsafe {
WHvSetVirtualProcessorInterruptControllerState(
self.vm_partition.partition,
vcpu_id as u32,
buffer.regs.as_ptr() as *mut c_void,
std::mem::size_of::<WhpxLapicState>() as u32,
)
})?;
Ok(())
}
/// Request an interrupt be delivered to one or more virtualized interrupt controllers. This
/// should only be used with ApicEmulationModeXApic or ApicEmulationModeX2Apic.
pub fn request_interrupt(
&self,
vector: u8,
dest_id: u8,
dest_mode: DestinationMode,
trigger: TriggerMode,
delivery: DeliveryMode,
) -> Result<()> {
// The WHV_INTERRUPT_CONTROL does not seem to support the dest_shorthand
let mut interrupt = WHV_INTERRUPT_CONTROL {
Destination: dest_id as u32,
Vector: vector as u32,
..Default::default()
};
interrupt.set_DestinationMode(match dest_mode {
DestinationMode::Physical => {
WHV_INTERRUPT_DESTINATION_MODE_WHvX64InterruptDestinationModePhysical
}
DestinationMode::Logical => {
WHV_INTERRUPT_DESTINATION_MODE_WHvX64InterruptDestinationModeLogical
}
} as u64);
interrupt.set_TriggerMode(match trigger {
TriggerMode::Edge => WHV_INTERRUPT_TRIGGER_MODE_WHvX64InterruptTriggerModeEdge,
TriggerMode::Level => WHV_INTERRUPT_TRIGGER_MODE_WHvX64InterruptTriggerModeLevel,
} as u64);
interrupt.set_Type(match delivery {
DeliveryMode::Fixed => WHV_INTERRUPT_TYPE_WHvX64InterruptTypeFixed,
DeliveryMode::Lowest => WHV_INTERRUPT_TYPE_WHvX64InterruptTypeLowestPriority,
DeliveryMode::SMI => {
error!("WHPX does not support requesting an SMI");
return Err(Error::new(ENOTSUP));
}
DeliveryMode::RemoteRead => {
// This is also no longer supported by intel.
error!("Remote Read interrupts are not supported by WHPX");
return Err(Error::new(ENOTSUP));
}
DeliveryMode::NMI => WHV_INTERRUPT_TYPE_WHvX64InterruptTypeNmi,
DeliveryMode::Init => WHV_INTERRUPT_TYPE_WHvX64InterruptTypeInit,
DeliveryMode::Startup => WHV_INTERRUPT_TYPE_WHvX64InterruptTypeSipi,
DeliveryMode::External => {
error!("WHPX does not support requesting an external interrupt");
return Err(Error::new(ENOTSUP));
}
} as u64);
check_whpx!(unsafe {
WHvRequestInterrupt(
self.vm_partition.partition,
&interrupt,
std::mem::size_of::<WHV_INTERRUPT_CONTROL>() as u32,
)
})
}
/// In order to fully unmap a memory range such that the host can reclaim the memory,
/// we unmap it from the hypervisor partition, and then mark crosvm's process as uninterested
/// in the memory.
///
/// This will make crosvm unable to access the memory, and allow Windows to reclaim it for other
/// uses when memory is in demand.
fn handle_inflate(&mut self, guest_address: GuestAddress, size: u64) -> Result<()> {
info!(
"Balloon: Requested WHPX unmap of addr: {:?}, size: {:?}",
guest_address, size
);
// Safe because WHPX does proper error checking, even if an out-of-bounds address is
// provided.
unsafe {
check_whpx!(WHvUnmapGpaRange(
self.vm_partition.partition,
guest_address.offset(),
size,
))?;
}
let host_address = self
.guest_mem
.get_host_address(guest_address)
.map_err(|_| Error::new(1))? as *mut c_void;
// Safe because we have just successfully unmapped this range from the
// guest partition, so we know it's unused.
let result =
unsafe { OfferVirtualMemory(host_address, size as usize, VmOfferPriorityBelowNormal) };
if result != ERROR_SUCCESS {
let err = Error::new(result);
error!("Freeing memory failed with error: {}", err);
return Err(err);
}
Ok(())
}
/// Remap memory that has previously been unmapped with #handle_inflate. Note
/// that attempts to remap pages that were not previously unmapped, or addresses that are not
/// page-aligned, will result in failure.
///
/// To do this, reclaim the memory from Windows first, then remap it into the hypervisor
/// partition. Remapped memory has no guarantee of content, and the guest should not expect
/// it to.
fn handle_deflate(&mut self, guest_address: GuestAddress, size: u64) -> Result<()> {
info!(
"Balloon: Requested WHPX unmap of addr: {:?}, size: {:?}",
guest_address, size
);
let host_address = self
.guest_mem
.get_host_address(guest_address)
.map_err(|_| Error::new(1))? as *const c_void;
// Note that we aren't doing any validation here that this range was previously unmapped.
// However, we can avoid that expensive validation by relying on Windows error checking for
// ReclaimVirtualMemory. The call will fail if:
// - If the range is not currently "offered"
// - The range is outside of current guest mem (GuestMemory will fail to convert the
// address)
// In short, security is guaranteed by ensuring the guest can never reclaim ranges it
// hadn't previously forfeited (and even then, the contents will be zeroed).
//
// Safe because the memory ranges in question are managed by Windows, not Rust.
// Also, ReclaimVirtualMemory has built-in error checking for bad parameters.
let result = unsafe { ReclaimVirtualMemory(host_address, size as usize) };
if result == ERROR_BUSY || result == ERROR_SUCCESS {
// In either of these cases, the contents of the reclaimed memory
// are preserved or undefined. Regardless, zero the memory
// to ensure no unintentional memory contents are shared.
//
// Safe because we just reclaimed the region in question and haven't yet remapped
// it to the guest partition, so we know it's unused.
unsafe { RtlZeroMemory(host_address as RawDescriptor, size as usize) };
} else {
let err = Error::new(result);
error!("Reclaiming memory failed with error: {}", err);
return Err(err);
}
// Safe because no-overlap is guaranteed by the success of ReclaimVirtualMemory,
// Which would fail if it was called on areas which were not unmapped.
unsafe {
set_user_memory_region(
&self.vm_partition,
false, // read_only
false, // track dirty pages
guest_address.offset(),
size,
host_address as *mut u8,
)
}
}
}
// Wrapper around WHvMapGpaRange, which creates, modifies, or deletes a mapping
// from guest physical to host user pages.
//
// Safe when the guest regions are guaranteed not to overlap.
unsafe fn set_user_memory_region(
partition: &SafePartition,
read_only: bool,
track_dirty_pages: bool,
guest_addr: u64,
memory_size: u64,
userspace_addr: *mut u8,
) -> Result<()> {
let mut flags = WHV_MAP_GPA_RANGE_FLAGS_WHvMapGpaRangeFlagRead
| WHV_MAP_GPA_RANGE_FLAGS_WHvMapGpaRangeFlagExecute;
if !read_only {
flags |= WHV_MAP_GPA_RANGE_FLAGS_WHvMapGpaRangeFlagWrite
}
if track_dirty_pages {
flags |= WHV_MAP_GPA_RANGE_FLAGS_WHvMapGpaRangeFlagTrackDirtyPages;
}
let ret = WHvMapGpaRange(
partition.partition,
userspace_addr as *mut c_void,
guest_addr,
memory_size,
flags,
);
check_whpx!(ret)
}
/// Helper function to determine the size in bytes of a dirty log bitmap for the given memory region
/// size.
///
/// # Arguments
///
/// * `size` - Number of bytes in the memory region being queried.
pub fn dirty_log_bitmap_size(size: usize) -> usize {
let page_size = pagesize();
(((size + page_size - 1) / page_size) + 7) / 8
}
impl Vm for WhpxVm {
/// Makes a shallow clone of this `Vm`.
fn try_clone(&self) -> Result<Self> {
let mut ioevents = FnvHashMap::default();
for (addr, evt) in self.ioevents.iter() {
ioevents.insert(*addr, evt.try_clone()?);
}
Ok(WhpxVm {
whpx: self.whpx.try_clone()?,
vm_partition: self.vm_partition.clone(),
guest_mem: self.guest_mem.clone(),
mem_regions: self.mem_regions.clone(),
mem_slot_gaps: self.mem_slot_gaps.clone(),
ioevents,
vm_evt_wrtube: self
.vm_evt_wrtube
.as_ref()
.map(|t| t.try_clone().expect("could not clone vm_evt_wrtube")),
})
}
fn try_clone_descriptor(&self) -> Result<SafeDescriptor> {
Err(Error::new(ENOTSUP))
}
fn hypervisor_kind(&self) -> HypervisorKind {
HypervisorKind::Whpx
}
fn check_capability(&self, c: VmCap) -> bool {
match c {
VmCap::DirtyLog => Whpx::check_whpx_feature(WhpxFeature::DirtyPageTracking)
.unwrap_or_else(|e| {
error!(
"failed to check whpx feature {:?}: {}",
WhpxFeature::DirtyPageTracking,
e
);
false
}),
// there is a pvclock like thing already done w/ hyperv, but we can't get the state.
VmCap::PvClock => false,
VmCap::Protected => false,
// whpx initializes cpuid early during VM creation.
VmCap::EarlyInitCpuid => true,
#[cfg(target_arch = "x86_64")]
VmCap::BusLockDetect => false,
VmCap::ReadOnlyMemoryRegion => true,
VmCap::MemNoncoherentDma => false,
}
}
fn get_memory(&self) -> &GuestMemory {
&self.guest_mem
}
fn add_memory_region(
&mut self,
guest_addr: GuestAddress,
mem: Box<dyn MappedRegion>,
read_only: bool,
log_dirty_pages: bool,
_cache: MemCacheType,
) -> Result<MemSlot> {
let size = mem.size() as u64;
let end_addr = guest_addr.checked_add(size).ok_or(Error::new(EOVERFLOW))?;
if self.guest_mem.range_overlap(guest_addr, end_addr) {
return Err(Error::new(ENOSPC));
}
let mut regions = self.mem_regions.lock();
let mut gaps = self.mem_slot_gaps.lock();
let slot = match gaps.pop() {
Some(gap) => gap.0,
None => (regions.len() + self.guest_mem.num_regions() as usize) as MemSlot,
};
// Safe because we check that the given guest address is valid and has no overlaps. We also
// know that the pointer and size are correct because the MemoryMapping interface ensures
// this. We take ownership of the memory mapping so that it won't be unmapped until the slot
// is removed.
let res = unsafe {
set_user_memory_region(
&self.vm_partition,
read_only,
log_dirty_pages,
guest_addr.offset(),
size,
mem.as_ptr(),
)
};
if let Err(e) = res {
gaps.push(Reverse(slot));
return Err(e);
}
regions.insert(slot, (guest_addr, mem));
Ok(slot)
}
fn msync_memory_region(&mut self, slot: MemSlot, offset: usize, size: usize) -> Result<()> {
let mut regions = self.mem_regions.lock();
let (_, mem) = regions.get_mut(&slot).ok_or(Error::new(ENOENT))?;
mem.msync(offset, size).map_err(|err| match err {
MmapError::InvalidAddress => Error::new(EFAULT),
MmapError::NotPageAligned => Error::new(EINVAL),
MmapError::SystemCallFailed(e) => e,
_ => Error::new(EIO),
})
}
fn remove_memory_region(&mut self, slot: MemSlot) -> Result<Box<dyn MappedRegion>> {
let mut regions = self.mem_regions.lock();
if !regions.contains_key(&slot) {
return Err(Error::new(ENOENT));
}
if let Some((guest_addr, mem)) = regions.get(&slot) {
// Safe because the slot is checked against the list of memory slots.
unsafe {
check_whpx!(WHvUnmapGpaRange(
self.vm_partition.partition,
guest_addr.offset(),
mem.size() as u64,
))?;
}
self.mem_slot_gaps.lock().push(Reverse(slot));
Ok(regions.remove(&slot).unwrap().1)
} else {
Err(Error::new(ENOENT))
}
}
fn create_device(&self, _kind: DeviceKind) -> Result<SafeDescriptor> {
// Whpx does not support in-kernel devices
Err(Error::new(libc::ENXIO))
}
fn get_dirty_log(&self, slot: u32, dirty_log: &mut [u8]) -> Result<()> {
let regions = self.mem_regions.lock();
if let Some((guest_addr, mem)) = regions.get(&slot) {
// Ensures that there are as many bytes in dirty_log as there are pages in the mmap.
if dirty_log_bitmap_size(mem.size()) > dirty_log.len() {
return Err(Error::new(EINVAL));
}
let bitmap_size = if dirty_log.len() % 8 == 0 {
dirty_log.len() / 8
} else {
dirty_log.len() / 8 + 1
};
let mut bitmap = vec![0u64; bitmap_size];
check_whpx!(unsafe {
WHvQueryGpaRangeDirtyBitmap(
self.vm_partition.partition,
guest_addr.offset(),
mem.size() as u64,
bitmap.as_mut_ptr(),
(bitmap.len() * 8) as u32,
)
})?;
// safe because we have allocated a vec of u64, which we can cast to a u8 slice.
let buffer = unsafe {
std::slice::from_raw_parts(bitmap.as_ptr() as *const u8, bitmap.len() * 8)
};
dirty_log.copy_from_slice(&buffer[..dirty_log.len()]);
Ok(())
} else {
Err(Error::new(ENOENT))
}
}
fn register_ioevent(
&mut self,
evt: &Event,
addr: IoEventAddress,
datamatch: Datamatch,
) -> Result<()> {
if datamatch != Datamatch::AnyLength {
error!("WHPX currently only supports Datamatch::AnyLength");
return Err(Error::new(ENOTSUP));
}
if self.ioevents.contains_key(&addr) {
error!("WHPX does not support multiple ioevents for the same address");
return Err(Error::new(EEXIST));
}
self.ioevents.insert(addr, evt.try_clone()?);
Ok(())
}
fn unregister_ioevent(
&mut self,
evt: &Event,
addr: IoEventAddress,
datamatch: Datamatch,
) -> Result<()> {
if datamatch != Datamatch::AnyLength {
error!("WHPX only supports Datamatch::AnyLength");
return Err(Error::new(ENOTSUP));
}
match self.ioevents.get(&addr) {
Some(existing_evt) => {
// evt should match the existing evt associated with addr
if evt != existing_evt {
return Err(Error::new(ENOENT));
}
self.ioevents.remove(&addr);
}
None => {
return Err(Error::new(ENOENT));
}
};
Ok(())
}
/// Trigger any io events based on the memory mapped IO at `addr`. If the hypervisor does
/// in-kernel IO event delivery, this is a no-op.
fn handle_io_events(&self, addr: IoEventAddress, _data: &[u8]) -> Result<()> {
match self.ioevents.get(&addr) {
None => {}
Some(evt) => {
evt.signal()?;
}
};
Ok(())
}
fn enable_hypercalls(&mut self, _nr: u64, _count: usize) -> Result<()> {
Err(Error::new(ENOTSUP))
}
fn get_pvclock(&self) -> Result<ClockState> {
Err(Error::new(ENODEV))
}
fn set_pvclock(&self, _state: &ClockState) -> Result<()> {
Err(Error::new(ENODEV))
}
fn add_fd_mapping(
&mut self,
slot: u32,
offset: usize,
size: usize,
fd: &dyn AsRawDescriptor,
fd_offset: u64,
prot: Protection,
) -> Result<()> {
let mut regions = self.mem_regions.lock();
let (_, region) = regions.get_mut(&slot).ok_or(Error::new(EINVAL))?;
match region.add_fd_mapping(offset, size, fd, fd_offset, prot) {
Ok(()) => Ok(()),
Err(MmapError::SystemCallFailed(e)) => Err(e),
Err(_) => Err(Error::new(EIO)),
}
}
fn remove_mapping(&mut self, slot: u32, offset: usize, size: usize) -> Result<()> {
let mut regions = self.mem_regions.lock();
let (_, region) = regions.get_mut(&slot).ok_or(Error::new(EINVAL))?;
match region.remove_mapping(offset, size) {
Ok(()) => Ok(()),
Err(MmapError::SystemCallFailed(e)) => Err(e),
Err(_) => Err(Error::new(EIO)),
}
}
fn handle_balloon_event(&mut self, event: BalloonEvent) -> Result<()> {
match event {
BalloonEvent::Inflate(m) => self.handle_inflate(m.guest_address, m.size),
BalloonEvent::Deflate(m) => self.handle_deflate(m.guest_address, m.size),
BalloonEvent::BalloonTargetReached(_) => Ok(()),
}
}
fn get_guest_phys_addr_bits(&self) -> u8 {
// Assume the guest physical address size is the same as the host.
host_phys_addr_bits()
}
}
impl VmX86_64 for WhpxVm {
fn get_hypervisor(&self) -> &dyn HypervisorX86_64 {
&self.whpx
}
fn create_vcpu(&self, id: usize) -> Result<Box<dyn VcpuX86_64>> {
Ok(Box::new(WhpxVcpu::new(
self.vm_partition.clone(),
id.try_into().unwrap(),
)?))
}
/// Sets the address of the three-page region in the VM's address space.
/// This function is only necessary for unrestricted_guest_mode=0, which we do not support for
/// WHPX.
fn set_tss_addr(&self, _addr: GuestAddress) -> Result<()> {
Ok(())
}
/// Sets the address of a one-page region in the VM's address space.
/// This function is only necessary for unrestricted_guest_mode=0, which we do not support for
/// WHPX.
fn set_identity_map_addr(&self, _addr: GuestAddress) -> Result<()> {
Ok(())
}
fn load_protected_vm_firmware(
&mut self,
_fw_addr: GuestAddress,
_fw_max_size: u64,
) -> Result<()> {
// WHPX does not support protected VMs
Err(Error::new(libc::ENXIO))
}
}
// NOTE: WHPX Tests need to be run serially as otherwise it barfs unless we map new regions of guest
// memory.
#[cfg(test)]
mod tests {
use std::thread;
use std::time::Duration;
use base::EventWaitResult;
use base::MemoryMappingBuilder;
use base::SharedMemory;
use super::*;
fn new_vm(cpu_count: usize, mem: GuestMemory) -> WhpxVm {
let whpx = Whpx::new().expect("failed to instantiate whpx");
let local_apic_supported = Whpx::check_whpx_feature(WhpxFeature::LocalApicEmulation)
.expect("failed to get whpx features");
WhpxVm::new(
&whpx,
cpu_count,
mem,
CpuId::new(0),
local_apic_supported,
None,
)
.expect("failed to create whpx vm")
}
#[test]
fn create_vm() {
if !Whpx::is_enabled() {
return;
}
let cpu_count = 1;
let mem =
GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
new_vm(cpu_count, mem);
}
#[test]
fn create_vcpu() {
if !Whpx::is_enabled() {
return;
}
let cpu_count = 1;
let mem =
GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
let vm = new_vm(cpu_count, mem);
vm.create_vcpu(0).expect("failed to create vcpu");
}
#[test]
fn try_clone() {
if !Whpx::is_enabled() {
return;
}
let cpu_count = 1;
let mem =
GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
let vm = new_vm(cpu_count, mem);
let _vm_clone = vm.try_clone().expect("failed to clone whpx vm");
}
#[test]
fn send_vm() {
if !Whpx::is_enabled() {
return;
}
let cpu_count = 1;
let mem =
GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
let vm = new_vm(cpu_count, mem);
thread::spawn(move || {
let _vm = vm;
})
.join()
.unwrap();
}
#[test]
fn check_vm_capability() {
if !Whpx::is_enabled() {
return;
}
let cpu_count = 1;
let mem =
GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
let vm = new_vm(cpu_count, mem);
assert!(vm.check_capability(VmCap::DirtyLog));
assert!(!vm.check_capability(VmCap::PvClock));
}
#[test]
fn dirty_log_size() {
let page_size = pagesize();
assert_eq!(dirty_log_bitmap_size(0), 0);
assert_eq!(dirty_log_bitmap_size(page_size), 1);
assert_eq!(dirty_log_bitmap_size(page_size * 8), 1);
assert_eq!(dirty_log_bitmap_size(page_size * 8 + 1), 2);
assert_eq!(dirty_log_bitmap_size(page_size * 100), 13);
}
#[test]
fn register_ioevent() {
if !Whpx::is_enabled() {
return;
}
let cpu_count = 1;
let mem =
GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
let mut vm = new_vm(cpu_count, mem);
let evt = Event::new().expect("failed to create event");
let otherevt = Event::new().expect("failed to create event");
vm.register_ioevent(&evt, IoEventAddress::Pio(0xf4), Datamatch::AnyLength)
.unwrap();
vm.register_ioevent(&evt, IoEventAddress::Mmio(0x1000), Datamatch::AnyLength)
.unwrap();
vm.register_ioevent(
&otherevt,
IoEventAddress::Mmio(0x1000),
Datamatch::AnyLength,
)
.expect_err("WHPX should not allow you to register two events for the same address");
vm.register_ioevent(
&otherevt,
IoEventAddress::Mmio(0x1000),
Datamatch::U8(None),
)
.expect_err(
"WHPX should not allow you to register ioevents with Datamatches other than AnyLength",
);
vm.register_ioevent(
&otherevt,
IoEventAddress::Mmio(0x1000),
Datamatch::U32(Some(0xf6)),
)
.expect_err(
"WHPX should not allow you to register ioevents with Datamatches other than AnyLength",
);
vm.unregister_ioevent(&otherevt, IoEventAddress::Pio(0xf4), Datamatch::AnyLength)
.expect_err("unregistering an unknown event should fail");
vm.unregister_ioevent(&evt, IoEventAddress::Pio(0xf5), Datamatch::AnyLength)
.expect_err("unregistering an unknown PIO address should fail");
vm.unregister_ioevent(&evt, IoEventAddress::Pio(0x1000), Datamatch::AnyLength)
.expect_err("unregistering an unknown PIO address should fail");
vm.unregister_ioevent(&evt, IoEventAddress::Mmio(0xf4), Datamatch::AnyLength)
.expect_err("unregistering an unknown MMIO address should fail");
vm.unregister_ioevent(&evt, IoEventAddress::Pio(0xf4), Datamatch::AnyLength)
.unwrap();
vm.unregister_ioevent(&evt, IoEventAddress::Mmio(0x1000), Datamatch::AnyLength)
.unwrap();
}
#[test]
fn handle_io_events() {
if !Whpx::is_enabled() {
return;
}
let cpu_count = 1;
let mem =
GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
let mut vm = new_vm(cpu_count, mem);
let evt = Event::new().expect("failed to create event");
let evt2 = Event::new().expect("failed to create event");
vm.register_ioevent(&evt, IoEventAddress::Pio(0x1000), Datamatch::AnyLength)
.unwrap();
vm.register_ioevent(&evt2, IoEventAddress::Mmio(0x1000), Datamatch::AnyLength)
.unwrap();
// Check a pio address
vm.handle_io_events(IoEventAddress::Pio(0x1000), &[])
.expect("failed to handle_io_events");
assert_ne!(
evt.wait_timeout(Duration::from_millis(10))
.expect("failed to read event"),
EventWaitResult::TimedOut
);
assert_eq!(
evt2.wait_timeout(Duration::from_millis(10))
.expect("failed to read event"),
EventWaitResult::TimedOut
);
// Check an mmio address
vm.handle_io_events(IoEventAddress::Mmio(0x1000), &[])
.expect("failed to handle_io_events");
assert_eq!(
evt.wait_timeout(Duration::from_millis(10))
.expect("failed to read event"),
EventWaitResult::TimedOut
);
assert_ne!(
evt2.wait_timeout(Duration::from_millis(10))
.expect("failed to read event"),
EventWaitResult::TimedOut
);
// Check an address that does not match any registered ioevents
vm.handle_io_events(IoEventAddress::Pio(0x1001), &[])
.expect("failed to handle_io_events");
assert_eq!(
evt.wait_timeout(Duration::from_millis(10))
.expect("failed to read event"),
EventWaitResult::TimedOut
);
assert_eq!(
evt2.wait_timeout(Duration::from_millis(10))
.expect("failed to read event"),
EventWaitResult::TimedOut
);
}
#[test]
fn add_memory_ro() {
if !Whpx::is_enabled() {
return;
}
let cpu_count = 1;
let mem =
GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
let mut vm = new_vm(cpu_count, mem);
let mem_size = 0x1000;
let shm = SharedMemory::new("test", mem_size as u64).unwrap();
let mem = MemoryMappingBuilder::new(mem_size)
.from_shared_memory(&shm)
.build()
.unwrap();
vm.add_memory_region(
GuestAddress(0x1000),
Box::new(mem),
true,
false,
MemCacheType::CacheCoherent,
)
.unwrap();
}
#[test]
fn remove_memory() {
if !Whpx::is_enabled() {
return;
}
let cpu_count = 1;
let mem =
GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
let mut vm = new_vm(cpu_count, mem);
let mem_size = 0x1000;
let shm = SharedMemory::new("test", mem_size as u64).unwrap();
let mem = MemoryMappingBuilder::new(mem_size)
.from_shared_memory(&shm)
.build()
.unwrap();
let mem_ptr = mem.as_ptr();
let slot = vm
.add_memory_region(
GuestAddress(0x1000),
Box::new(mem),
false,
false,
MemCacheType::CacheCoherent,
)
.unwrap();
let removed_mem = vm.remove_memory_region(slot).unwrap();
assert_eq!(removed_mem.size(), mem_size);
assert_eq!(removed_mem.as_ptr(), mem_ptr);
}
#[test]
fn remove_invalid_memory() {
if !Whpx::is_enabled() {
return;
}
let cpu_count = 1;
let mem =
GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
let mut vm = new_vm(cpu_count, mem);
assert!(vm.remove_memory_region(0).is_err());
}
#[test]
fn overlap_memory() {
if !Whpx::is_enabled() {
return;
}
let cpu_count = 1;
let mem =
GuestMemory::new(&[(GuestAddress(0), 0x10000)]).expect("failed to create guest memory");
let mut vm = new_vm(cpu_count, mem);
let mem_size = 0x2000;
let shm = SharedMemory::new("test", mem_size as u64).unwrap();
let mem = MemoryMappingBuilder::new(mem_size)
.from_shared_memory(&shm)
.build()
.unwrap();
assert!(vm
.add_memory_region(
GuestAddress(0x2000),
Box::new(mem),
false,
false,
MemCacheType::CacheCoherent
)
.is_err());
}
#[test]
fn sync_memory() {
if !Whpx::is_enabled() {
return;
}
let cpu_count = 1;
let mem =
GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
let mut vm = new_vm(cpu_count, mem);
let mem_size = 0x1000;
let shm = SharedMemory::new("test", mem_size as u64).unwrap();
let mem = MemoryMappingBuilder::new(mem_size)
.from_shared_memory(&shm)
.build()
.unwrap();
let slot = vm
.add_memory_region(
GuestAddress(0x10000),
Box::new(mem),
false,
false,
MemCacheType::CacheCoherent,
)
.unwrap();
vm.msync_memory_region(slot, mem_size - 1, 0).unwrap();
vm.msync_memory_region(slot, 0, mem_size).unwrap();
assert!(vm.msync_memory_region(slot, mem_size, 0).is_err());
assert!(vm.msync_memory_region(slot + 1, mem_size, 0).is_err());
}
}