Merge with upstream 2026-04-21 1/2 2e1df462c1b base: Add more safety nets for checking cpu online status 2fccff2c766 base: Consolidate vcpu properties into one unified data structure 513b08b62ac Roll recipe dependencies (trivial). 525839a1bf3 Roll recipe dependencies (trivial). 666ae802456 devices: usb: add isochronous IN transfer handler 16aebad45d6 Reland "devices: usb: add build_isochronous_transfer to trait" f407b047bee Reland "devices: usb: add methods to get max payload length" 125adf13172 devices: usb: allow triggering ring controller early 9b1952608a5 devices: usb: ensure all transfers complete before detach bf36139328b infra: Update README for luci 36a1614852d crosvm: add GuestPanic exit state for SystemEventCrash https://chromium.googlesource.com/crosvm/crosvm/+log/eec58549a5379347e51f880982e1d42f89876f02..2e1df462c1b95edef475dc8d7e210c672aca08cd BUG=b:417615888 BUG=b:499190661 BUG=498448415, 502869543 Change-Id: I0e20ac2a6dcafc91dfb9c29b13b83875ffb09b4c Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/7780039 Bot-Commit: crosvm LUCI CI <crosvm-luci-ci-builder@crosvm-infra.iam.gserviceaccount.com> Commit-Queue: Vineeth Pillai <vineethrp@google.com>
diff --git a/aarch64/src/fdt.rs b/aarch64/src/fdt.rs index 5f7c2bd..f6687cb 100644 --- a/aarch64/src/fdt.rs +++ b/aarch64/src/fdt.rs
@@ -88,9 +88,7 @@ num_vcpus: u32, cpu_mpidr_generator: &impl Fn(usize) -> Option<u64>, vcpu_clusters: Vec<CpuSet>, - vcpu_capacity: BTreeMap<usize, u32>, - dynamic_power_coefficient: BTreeMap<usize, u32>, - vcpu_frequencies: BTreeMap<usize, Vec<u32>>, + vcpu_properties: &BTreeMap<usize, arch::VcpuProperties>, ) -> Result<()> { let root_node = fdt.root_mut(); let cpus_node = root_node.subnode_mut("cpus")?; @@ -112,22 +110,24 @@ cpu_node.set_prop("reg", reg)?; cpu_node.set_prop("phandle", PHANDLE_CPU0 + vcpu_id)?; - if let Some(pwr_coefficient) = dynamic_power_coefficient.get(&(vcpu_id as usize)) { - cpu_node.set_prop("dynamic-power-coefficient", *pwr_coefficient)?; - } - if let Some(capacity) = vcpu_capacity.get(&(vcpu_id as usize)) { - cpu_node.set_prop("capacity-dmips-mhz", *capacity)?; - } - // Placed inside cpu nodes for ease of parsing for some secure firmwares(PvmFw). - if let Some(frequencies) = vcpu_frequencies.get(&(vcpu_id as usize)) { - cpu_node.set_prop("operating-points-v2", PHANDLE_OPP_DOMAIN_BASE + vcpu_id)?; - let opp_table_node = cpu_node.subnode_mut(&format!("opp_table{vcpu_id}"))?; - opp_table_node.set_prop("phandle", PHANDLE_OPP_DOMAIN_BASE + vcpu_id)?; - opp_table_node.set_prop("compatible", "operating-points-v2")?; - for freq in frequencies.iter() { - let opp_hz = (*freq) as u64 * 1000; - let opp_node = opp_table_node.subnode_mut(&format!("opp{opp_hz}"))?; - opp_node.set_prop("opp-hz", opp_hz)?; + if let Some(props) = vcpu_properties.get(&(vcpu_id as usize)) { + if let Some(pwr_coefficient) = props.dynamic_power_coefficient { + cpu_node.set_prop("dynamic-power-coefficient", pwr_coefficient)?; + } + if let Some(capacity) = props.capacity { + cpu_node.set_prop("capacity-dmips-mhz", capacity)?; + } + // Placed inside cpu nodes for ease of parsing for some secure firmwares(PvmFw). + if !props.frequencies.is_empty() { + cpu_node.set_prop("operating-points-v2", PHANDLE_OPP_DOMAIN_BASE + vcpu_id)?; + let opp_table_node = cpu_node.subnode_mut(&format!("opp_table{vcpu_id}"))?; + opp_table_node.set_prop("phandle", PHANDLE_OPP_DOMAIN_BASE + vcpu_id)?; + opp_table_node.set_prop("compatible", "operating-points-v2")?; + for freq in props.frequencies.iter() { + let opp_hz = (*freq) as u64 * 1000; + let opp_node = opp_table_node.subnode_mut(&format!("opp{opp_hz}"))?; + opp_node.set_prop("opp-hz", opp_hz)?; + } } } } @@ -637,8 +637,7 @@ num_vcpus: u32, cpu_mpidr_generator: &impl Fn(usize) -> Option<u64>, vcpu_clusters: Vec<CpuSet>, - vcpu_capacity: BTreeMap<usize, u32>, - vcpu_frequencies: BTreeMap<usize, Vec<u32>>, + vcpu_properties: BTreeMap<usize, arch::VcpuProperties>, fdt_address: GuestAddress, cmdline: &str, kernel_region: AddressRange, @@ -653,7 +652,6 @@ vmwdt_cfg: VmWdtConfig, dump_device_tree_blob: Option<PathBuf>, vm_generator: &impl Fn(&mut Fdt, &BTreeMap<&str, u32>) -> cros_fdt::Result<()>, - dynamic_power_coefficient: BTreeMap<usize, u32>, device_tree_overlays: Vec<DtbOverlay>, serial_devices: &[SerialDeviceInfo], virt_cpufreq_v2: bool, @@ -704,9 +702,7 @@ num_vcpus, cpu_mpidr_generator, vcpu_clusters, - vcpu_capacity, - dynamic_power_coefficient, - vcpu_frequencies.clone(), + &vcpu_properties, )?; create_gic_node(&mut fdt, is_gicv3, has_vgic_its, num_vcpus as u64)?; create_timer_node(&mut fdt, num_vcpus)?; @@ -730,7 +726,7 @@ create_vmwdt_node(&mut fdt, vmwdt_cfg, num_vcpus)?; create_kvm_cpufreq_node(&mut fdt)?; vm_generator(&mut fdt, &phandles)?; - if !vcpu_frequencies.is_empty() { + if vcpu_properties.values().any(|p| !p.frequencies.is_empty()) { if virt_cpufreq_v2 { create_virt_cpufreq_v2_node(&mut fdt, num_vcpus as u64)?; } else {
diff --git a/aarch64/src/lib.rs b/aarch64/src/lib.rs index a623f42..9a70e0c 100644 --- a/aarch64/src/lib.rs +++ b/aarch64/src/lib.rs
@@ -655,7 +655,7 @@ let mut use_pmu = vm.check_capability(VmCap::ArmPmuV3); use_pmu &= !no_pmu; - let vcpu_count = components.vcpu_count; + let vcpu_count = components.vcpu_properties.len(); let mut has_pvtime = true; let mut vcpus = Vec::with_capacity(vcpu_count); let mut vcpu_init = Vec::with_capacity(vcpu_count); @@ -862,19 +862,32 @@ Tube::pair().map_err(Error::CreateTube)?; let vcpufreq_shared_tube = Arc::new(Mutex::new(vcpufreq_control_tube)); #[cfg(any(target_os = "android", target_os = "linux"))] - if !components.vcpu_frequencies.is_empty() { + if components + .vcpu_properties + .values() + .any(|p| !p.frequencies.is_empty()) + { let mut freq_domain_vcpus: BTreeMap<u32, Vec<usize>> = BTreeMap::new(); let mut freq_domain_perfs: BTreeMap<u32, Arc<AtomicU32>> = BTreeMap::new(); let mut vcpu_affinities: Vec<u32> = Vec::new(); for vcpu in 0..vcpu_count { - let freq_domain = *components.vcpu_domains.get(&vcpu).unwrap_or(&(vcpu as u32)); + let props = components + .vcpu_properties + .get(&vcpu) + .expect("missing vcpu props"); + let freq_domain = props.vcpu_domain.unwrap_or(vcpu as u32); freq_domain_vcpus.entry(freq_domain).or_default().push(vcpu); - let vcpu_affinity = match components.vcpu_affinity.clone() { - Some(VcpuAffinity::Global(v)) => v, - Some(VcpuAffinity::PerVcpu(mut m)) => m.remove(&vcpu).unwrap_or_default(), + let pcpu = match &components.vcpu_affinity { + Some(VcpuAffinity::Global(s)) => { + s.iter().next().copied().expect("empty global affinity") + } + Some(VcpuAffinity::PerVcpu(m)) => m + .get(&vcpu) + .and_then(|s| s.iter().next().copied()) + .expect("missing or empty per-vcpu affinity for vcpu"), None => panic!("vcpu_affinity needs to be set for VirtCpufreq"), }; - vcpu_affinities.push(vcpu_affinity[0].try_into().unwrap()); + vcpu_affinities.push(pcpu.try_into().unwrap()); } for domain in freq_domain_vcpus.keys() { let domain_perf = Arc::new(AtomicU32::new(0)); @@ -882,16 +895,20 @@ } let largest_vcpu_affinity_idx = *vcpu_affinities.iter().max().unwrap() as usize; for (vcpu, pcpu) in vcpu_affinities.iter().enumerate() { + let props = components + .vcpu_properties + .get(&vcpu) + .expect("missing vcpu props"); let mut virtfreq_size = AARCH64_VIRTFREQ_SIZE; if components.virt_cpufreq_v2 { - let domain = *components.vcpu_domains.get(&vcpu).unwrap_or(&(vcpu as u32)); + let domain = props.vcpu_domain.unwrap_or(vcpu as u32); virtfreq_size = AARCH64_VIRTFREQ_V2_SIZE; let virt_cpufreq = Arc::new(Mutex::new(VirtCpufreqV2::new( *pcpu, - components.vcpu_frequencies.get(&vcpu).unwrap().clone(), - components.vcpu_domain_paths.get(&vcpu).cloned(), + props.frequencies.clone(), + props.vcpu_domain_path.clone(), domain, - *components.normalized_cpu_ipc_ratios.get(&vcpu).unwrap(), + props.normalized_cpu_ipc_ratio.unwrap(), largest_vcpu_affinity_idx, vcpufreq_shared_tube.clone(), freq_domain_vcpus.get(&domain).unwrap().clone(), @@ -907,14 +924,8 @@ } else { let virt_cpufreq = Arc::new(Mutex::new(VirtCpufreq::new( *pcpu, - *components.vcpu_capacity.get(&vcpu).unwrap_or(&1024), - *components - .vcpu_frequencies - .get(&vcpu) - .unwrap() - .iter() - .max() - .unwrap(), + props.capacity.unwrap_or(arch::DEFAULT_CPU_CAPACITY), + *props.frequencies.iter().max().unwrap(), ))); mmio_bus .insert( @@ -1019,8 +1030,7 @@ vcpu_count as u32, &|n| get_vcpu_mpidr_aff(&vcpus, n), components.vcpu_clusters, - components.vcpu_capacity, - components.vcpu_frequencies, + components.vcpu_properties.clone(), fdt_address, cmdline .as_str_with_max_len(AARCH64_CMDLINE_MAX_SIZE - 1) @@ -1042,7 +1052,6 @@ vmwdt_cfg, dump_device_tree_blob, &|writer, phandles| vm.create_fdt(writer, phandles), - components.dynamic_power_coefficient, device_tree_overlays, &serial_devices, components.virt_cpufreq_v2, @@ -1530,12 +1539,12 @@ map_func: G, ) -> std::result::Result<BTreeMap<usize, T>, base::Error> where - F: Fn(usize) -> std::result::Result<bool, base::Error>, + F: Fn(usize) -> bool, G: Fn(usize) -> std::result::Result<T, base::Error>, { let mut cpu_map = BTreeMap::new(); for cpu_id in 0..num_cpus { - if filter_func(cpu_id)? { + if filter_func(cpu_id) { cpu_map.insert(cpu_id, map_func(cpu_id)?); } } @@ -1639,7 +1648,7 @@ #[test] fn collect_for_each_cpu_simple() { let num_cpus = 2; - let filter_func = |_cpu_id: usize| -> std::result::Result<bool, base::Error> { Ok(true) }; + let filter_func = |_cpu_id: usize| -> bool { true }; let map_func = |cpu_id: usize| -> std::result::Result<usize, base::Error> { Ok(cpu_id) }; let result = AArch64::collect_for_each_cpu(num_cpus, filter_func, map_func).unwrap(); @@ -1651,8 +1660,7 @@ #[test] fn collect_for_each_cpu_filter() { let num_cpus = 2; - let filter_func = - |cpu_id: usize| -> std::result::Result<bool, base::Error> { Ok(cpu_id % 2 == 0) }; + let filter_func = |cpu_id: usize| -> bool { cpu_id % 2 == 0 }; let map_func = |cpu_id: usize| -> std::result::Result<usize, base::Error> { Ok(cpu_id) }; let result = AArch64::collect_for_each_cpu(num_cpus, filter_func, map_func).unwrap(); @@ -1663,7 +1671,7 @@ #[test] fn collect_for_each_cpu_map() { let num_cpus = 4; - let filter_func = |_| -> std::result::Result<bool, base::Error> { Ok(true) }; + let filter_func = |_| -> bool { true }; let map_func = |cpu_id: usize| -> std::result::Result<usize, base::Error> { Ok(cpu_id * 2) }; let result = AArch64::collect_for_each_cpu(num_cpus, filter_func, map_func).unwrap(); @@ -1674,20 +1682,9 @@ } #[test] - fn collect_for_each_cpu_filter_error() { - let num_cpus = 1; - let filter_func = - |_| -> std::result::Result<bool, base::Error> { Err(base::Error::new(libc::EINVAL)) }; - let map_func = |cpu_id: usize| -> std::result::Result<usize, base::Error> { Ok(cpu_id) }; - let result = AArch64::collect_for_each_cpu(num_cpus, filter_func, map_func); - assert!(result.is_err()); - assert_eq!(result.unwrap_err().errno(), libc::EINVAL); - } - - #[test] fn collect_for_each_cpu_map_error() { let num_cpus = 1; - let filter_func = |_| -> std::result::Result<bool, base::Error> { Ok(true) }; + let filter_func = |_| -> bool { true }; let map_func = |_| -> std::result::Result<usize, base::Error> { Err(base::Error::new(libc::EINVAL)) }; let result = AArch64::collect_for_each_cpu(num_cpus, filter_func, map_func);
diff --git a/arch/src/lib.rs b/arch/src/lib.rs index 226c61a..23e976f 100644 --- a/arch/src/lib.rs +++ b/arch/src/lib.rs
@@ -398,6 +398,84 @@ pub mem: Option<MemoryRegionConfig>, } +pub const DEFAULT_CPU_CAPACITY: u32 = 1024; + +#[sorted] +#[derive(Clone, Debug, Deserialize, PartialEq, Eq, Serialize)] +pub struct VcpuProperties { + pub capacity: Option<u32>, + pub dynamic_power_coefficient: Option<u32>, + pub frequencies: Vec<u32>, + #[cfg(all( + target_arch = "aarch64", + any(target_os = "android", target_os = "linux") + ))] + pub normalized_cpu_ipc_ratio: Option<u32>, + #[cfg(all( + target_arch = "aarch64", + any(target_os = "android", target_os = "linux") + ))] + pub vcpu_domain: Option<u32>, + #[cfg(all( + target_arch = "aarch64", + any(target_os = "android", target_os = "linux") + ))] + pub vcpu_domain_path: Option<PathBuf>, +} + +/// Derives base VCPU properties from various config fields. +pub fn derive_vcpu_properties( + vcpu_count: usize, + vcpu_capacity: &std::collections::BTreeMap<usize, u32>, + dynamic_power_coefficient: &std::collections::BTreeMap<usize, u32>, + vcpu_frequencies: &std::collections::BTreeMap<usize, Vec<u32>>, + #[cfg(all( + target_arch = "aarch64", + any(target_os = "android", target_os = "linux") + ))] + normalized_cpu_ipc_ratio: &std::collections::BTreeMap<usize, u32>, + #[cfg(all( + target_arch = "aarch64", + any(target_os = "android", target_os = "linux") + ))] + vcpu_domain: &std::collections::BTreeMap<usize, u32>, + #[cfg(all( + target_arch = "aarch64", + any(target_os = "android", target_os = "linux") + ))] + vcpu_domain_path: &std::collections::BTreeMap<usize, std::path::PathBuf>, +) -> std::collections::BTreeMap<usize, VcpuProperties> { + let mut vcpu_properties = std::collections::BTreeMap::new(); + for vcpu_id in 0..vcpu_count { + let vcpu_prop_capacity = vcpu_capacity.get(&vcpu_id).copied(); + + vcpu_properties.insert( + vcpu_id, + VcpuProperties { + capacity: vcpu_prop_capacity, + frequencies: vcpu_frequencies.get(&vcpu_id).cloned().unwrap_or_default(), + dynamic_power_coefficient: dynamic_power_coefficient.get(&vcpu_id).copied(), + #[cfg(all( + target_arch = "aarch64", + any(target_os = "android", target_os = "linux") + ))] + normalized_cpu_ipc_ratio: normalized_cpu_ipc_ratio.get(&vcpu_id).copied(), + #[cfg(all( + target_arch = "aarch64", + any(target_os = "android", target_os = "linux") + ))] + vcpu_domain: vcpu_domain.get(&vcpu_id).copied(), + #[cfg(all( + target_arch = "aarch64", + any(target_os = "android", target_os = "linux") + ))] + vcpu_domain_path: vcpu_domain_path.get(&vcpu_id).cloned(), + }, + ); + } + vcpu_properties +} + /// Holds the pieces needed to build a VM. Passed to `build_vm` in the `LinuxArch` trait below to /// create a `RunnableLinuxVm`. #[sorted] @@ -413,7 +491,6 @@ pub delay_rt: bool, pub dev_pm: Option<DevicePowerManagerConfig>, - pub dynamic_power_coefficient: BTreeMap<usize, u32>, pub extra_kernel_params: Vec<String>, #[cfg(target_arch = "x86_64")] pub force_s2idle: bool, @@ -428,11 +505,7 @@ pub no_i8042: bool, pub no_rtc: bool, pub no_smt: bool, - #[cfg(all( - target_arch = "aarch64", - any(target_os = "android", target_os = "linux") - ))] - pub normalized_cpu_ipc_ratios: BTreeMap<usize, u32>, + pub pci_config: PciConfig, pub pflash_block_size: u32, pub pflash_image: Option<File>, @@ -448,28 +521,9 @@ pub sve_config: SveConfig, pub swiotlb: Option<u64>, pub vcpu_affinity: Option<VcpuAffinity>, - /// Map of vCPU ID->corresponding pCPU's capacity. - pub vcpu_capacity: BTreeMap<usize, u32>, /// List of vCPU clusters, mapped from pCPU clusters. pub vcpu_clusters: Vec<CpuSet>, - pub vcpu_count: usize, - #[cfg(all( - target_arch = "aarch64", - any(target_os = "android", target_os = "linux") - ))] - pub vcpu_domain_paths: BTreeMap<usize, PathBuf>, - #[cfg(all( - target_arch = "aarch64", - any(target_os = "android", target_os = "linux") - ))] - pub vcpu_domains: BTreeMap<usize, u32>, - #[cfg(all( - target_arch = "aarch64", - any(target_os = "android", target_os = "linux") - ))] - /// Maps vcpu -> the corresponding pcpu's frequency, based on the vcpu:pcpu mapping in - /// vcpu_affinity - pub vcpu_frequencies: BTreeMap<usize, Vec<u32>>, + pub vcpu_properties: BTreeMap<usize, VcpuProperties>, #[cfg(any(target_os = "android", target_os = "linux"))] pub vfio_platform_pm: bool, #[cfg(all(
diff --git a/base/src/lib.rs b/base/src/lib.rs index b0aff95..b0002bb 100644 --- a/base/src/lib.rs +++ b/base/src/lib.rs
@@ -232,6 +232,7 @@ Reset, DeviceCrashed, Crash, + GuestPanic, Panic(u8), WatchdogReset, }
diff --git a/base/src/sys/linux/mod.rs b/base/src/sys/linux/mod.rs index 36d74de..4f10ebf 100644 --- a/base/src/sys/linux/mod.rs +++ b/base/src/sys/linux/mod.rs
@@ -724,23 +724,65 @@ parse_sysfs_cpu_info(cpu_id, "cpufreq/cpuinfo_max_freq") } -/// Returns a bool if the CPU is online, or an error if there was an issue reading the system -/// properties. -pub fn is_cpu_online(cpu_id: usize) -> Result<bool> { - let result = parse_sysfs_cpu_info(cpu_id, "online"); - match result { - Err(e) => { - if e.errno() == libc::ENOENT { - // Some systems don't have a file for CPU 0 if the system considers CPU 0 to be - // always-online. Or if CONFIG_HOTPLUG_CPU=n, then the "online" property/file will - // never be created in drivers/base/cpu.c. - Ok(true) - } else { - Err(e) +/// Parses a string of comma separated CPU ranges, e.g. "0-2,4,6-8" into a BTreeSet of CPU IDs. +fn parse_online_cpu_range(content: &str) -> std::collections::BTreeSet<usize> { + let mut cpus = std::collections::BTreeSet::new(); + for part in content.trim().split(',') { + let part = part.trim(); + if part.is_empty() { + continue; + } + if let Some((start_str, end_str)) = part.split_once('-') { + if let (Ok(start), Ok(end)) = (start_str.parse::<usize>(), end_str.parse::<usize>()) { + for i in start..=end { + cpus.insert(i); + } + } + } else if let Ok(cpu) = part.parse::<usize>() { + cpus.insert(cpu); + } + } + cpus +} + +/// Returns a bool if the CPU is online. The online status is cached on the first call. +pub fn is_cpu_online(cpu_id: usize) -> bool { + static ONLINE_CPUS: OnceLock<std::collections::BTreeSet<usize>> = OnceLock::new(); + + let online_cpus = ONLINE_CPUS.get_or_init(|| { + let mut cpus = std::collections::BTreeSet::new(); + let path = Path::new(CPU_DIR).join("online"); + match std::fs::read_to_string(&path) { + Ok(content) => { + cpus = parse_online_cpu_range(&content); + } + Err(_) => { + // If we hit an error trying to access cpuX/online files, assume the CPU is online. + // This prevents permission/EACCES errors on individual files from crashing crosvm. + if let Ok(total_cores) = crate::number_of_logical_cores() { + for id in 0..total_cores { + match parse_sysfs_cpu_info(id, "online") { + Ok(1) => { + cpus.insert(id); + } + Ok(_) => {} + Err(e) => { + // Assume online on error to avoid crashes. + warn!( + "Assuming CPU {} is online because we couldn't read the sys file: {}", + id, e + ); + cpus.insert(id); + } + } + } + } } } - Ok(online) => Ok(online == 1), - } + cpus + }); + + online_cpus.contains(&cpu_id) } #[repr(C)] @@ -816,6 +858,39 @@ } #[test] + fn test_parse_online_cpu_range() { + let set = parse_online_cpu_range("0-3,5-7"); + assert_eq!(set.len(), 7); + assert!(set.contains(&0)); + assert!(set.contains(&1)); + assert!(set.contains(&2)); + assert!(set.contains(&3)); + assert!(!set.contains(&4)); + assert!(set.contains(&5)); + assert!(set.contains(&6)); + assert!(set.contains(&7)); + + let set = parse_online_cpu_range("0"); + assert_eq!(set.len(), 1); + assert!(set.contains(&0)); + + let set = parse_online_cpu_range("0,2,4"); + assert_eq!(set.len(), 3); + assert!(set.contains(&0)); + assert!(set.contains(&2)); + assert!(set.contains(&4)); + + let set = parse_online_cpu_range(" 0-1, 3 "); + assert_eq!(set.len(), 3); + assert!(set.contains(&0)); + assert!(set.contains(&1)); + assert!(set.contains(&3)); + + let set = parse_online_cpu_range(""); + assert!(set.is_empty()); + } + + #[test] fn pipe_size_and_fill() { let (_rx, mut tx) = new_pipe_full().expect("Failed to pipe");
diff --git a/base/src/sys/windows/system_info.rs b/base/src/sys/windows/system_info.rs index bf2c7cf..fd0629a 100644 --- a/base/src/sys/windows/system_info.rs +++ b/base/src/sys/windows/system_info.rs
@@ -73,9 +73,8 @@ todo!(); } -/// Returns a bool if the CPU is online, or an error if there was an issue reading the system -/// properties. -pub fn is_cpu_online(_cpu_id: usize) -> Result<bool> { +/// Returns a bool if the CPU is online. +pub fn is_cpu_online(_cpu_id: usize) -> bool { // See SYSTEM_INFO.number_of_online_cores for more context. - Ok(true) + true }
diff --git a/devices/src/usb/backend/device.rs b/devices/src/usb/backend/device.rs index 187d069..f5b5b72 100644 --- a/devices/src/usb/backend/device.rs +++ b/devices/src/usb/backend/device.rs
@@ -132,6 +132,23 @@ ) } + fn build_isochronous_transfer( + &mut self, + ep_addr: u8, + transfer_buffer: TransferBuffer, + packet_size: u32, + ) -> Result<BackendTransferType> { + multi_dispatch!( + self, + BackendDeviceType, + HostDevice FidoDevice, + build_isochronous_transfer, + ep_addr, + transfer_buffer, + packet_size + ) + } + fn get_control_transfer_state(&mut self) -> Arc<RwLock<ControlTransferState>> { multi_dispatch!( self, @@ -795,19 +812,26 @@ /// by this function must be consumed by `submit_backend_transfer()`. fn request_transfer_buffer(&mut self, size: usize) -> TransferBuffer; - /// Requests the backend to build a backend-specific bulk transfer request + /// Requests the backend to build a backend-specific bulk transfer request. fn build_bulk_transfer( &mut self, ep_addr: u8, transfer_buffer: TransferBuffer, stream_id: Option<u16>, ) -> Result<BackendTransferType>; - /// Requests the backend to build a backend-specific interrupt transfer request + /// Requests the backend to build a backend-specific interrupt transfer request. fn build_interrupt_transfer( &mut self, ep_addr: u8, transfer_buffer: TransferBuffer, ) -> Result<BackendTransferType>; + /// Requests the backend to build a backend-specific isochronous transfer request. + fn build_isochronous_transfer( + &mut self, + ep_addr: u8, + transfer_buffer: TransferBuffer, + packet_size: u32, + ) -> Result<BackendTransferType>; /// Returns the `ControlTransferState` for the given backend device. fn get_control_transfer_state(&mut self) -> Arc<RwLock<ControlTransferState>>;
diff --git a/devices/src/usb/backend/device_provider.rs b/devices/src/usb/backend/device_provider.rs index 7d36682..d102493 100644 --- a/devices/src/usb/backend/device_provider.rs +++ b/devices/src/usb/backend/device_provider.rs
@@ -186,7 +186,6 @@ let (host_device, event_handler) = match attach_host_backend_device( usb_file, self.event_loop.clone(), - self.job_queue.clone(), DeviceState::new(self.fail_handle.clone(), self.job_queue.clone()), ) { Ok((host_device, event_handler)) => (host_device, event_handler), @@ -227,7 +226,6 @@ let (fido_device, event_handler) = match attach_security_key( hidraw, self.event_loop.clone(), - self.job_queue.clone(), DeviceState::new(self.fail_handle.clone(), self.job_queue.clone()), ) { Ok((fido_device, event_handler)) => (fido_device, event_handler),
diff --git a/devices/src/usb/backend/endpoint.rs b/devices/src/usb/backend/endpoint.rs index 0464204..9e851d4 100644 --- a/devices/src/usb/backend/endpoint.rs +++ b/devices/src/usb/backend/endpoint.rs
@@ -98,7 +98,9 @@ } }; let buffer = match transfer_type { - XhciTransferType::Normal => transfer.create_buffer().map_err(Error::CreateBuffer)?, + XhciTransferType::Normal | XhciTransferType::Isochronous => { + transfer.create_buffer().map_err(Error::CreateBuffer)? + } XhciTransferType::Noop => { return transfer .on_transfer_complete(&TransferStatus::Completed, 0) @@ -119,6 +121,9 @@ EndpointType::Interrupt => { self.handle_interrupt_transfer(device, transfer, buffer)?; } + EndpointType::Isochronous => { + self.handle_isochronous_transfer(device, transfer, buffer)?; + } _ => { return transfer .on_transfer_complete(&TransferStatus::Error, 0) @@ -181,6 +186,28 @@ self.do_handle_transfer(device, xhci_transfer, usb_transfer, buffer) } + fn handle_isochronous_transfer( + &self, + device: &mut BackendDeviceType, + xhci_transfer: XhciTransfer, + buffer: ScatterGatherBuffer, + ) -> Result<()> { + // We do not support OUT transfer yet. + if self.direction == EndpointDirection::HostToDevice { + return xhci_transfer + .on_transfer_complete(&TransferStatus::Error, 0) + .map_err(Error::TransferComplete); + } + + let max_payload = xhci_transfer + .get_max_payload() + .map_err(Error::TransferGetMaxPayload)?; + let transfer_buffer = self.get_transfer_buffer(&buffer, device)?; + let usb_transfer = + device.build_isochronous_transfer(self.ep_addr(), transfer_buffer, max_payload)?; + self.do_handle_transfer(device, xhci_transfer, usb_transfer, buffer) + } + fn do_handle_transfer( &self, device: &mut BackendDeviceType,
diff --git a/devices/src/usb/backend/error.rs b/devices/src/usb/backend/error.rs index a161346..c774753 100644 --- a/devices/src/usb/backend/error.rs +++ b/devices/src/usb/backend/error.rs
@@ -84,6 +84,8 @@ StartAsyncJobQueue(UtilsError), #[error("xhci transfer completed: {0}")] TransferComplete(XhciTransferError), + #[error("failed to get max payload size for xhci transfer: {0}")] + TransferGetMaxPayload(XhciTransferError), #[error("failed to cancel transfer: {0}")] TransferHandle(UsbUtilError), #[error("transfer has already completed when being cancelled")]
diff --git a/devices/src/usb/backend/fido_backend/fido_passthrough.rs b/devices/src/usb/backend/fido_backend/fido_passthrough.rs index 2e2398f..9d8d86d 100644 --- a/devices/src/usb/backend/fido_backend/fido_passthrough.rs +++ b/devices/src/usb/backend/fido_backend/fido_passthrough.rs
@@ -415,6 +415,16 @@ ))) } + fn build_isochronous_transfer( + &mut self, + _ep_addr: u8, + _transfer_buffer: TransferBuffer, + _packet_size: u32, + ) -> BackendResult<BackendTransferType> { + // Fido devices don't support isochronous transfer requests + Err(BackendError::MalformedBackendTransfer) + } + fn get_control_transfer_state(&mut self) -> Arc<RwLock<ControlTransferState>> { self.control_transfer_state.clone() }
diff --git a/devices/src/usb/backend/fido_backend/fido_provider.rs b/devices/src/usb/backend/fido_backend/fido_provider.rs index 38ab669..1833a5f 100644 --- a/devices/src/usb/backend/fido_backend/fido_provider.rs +++ b/devices/src/usb/backend/fido_backend/fido_provider.rs
@@ -14,7 +14,6 @@ use crate::usb::backend::fido_backend::fido_device::FidoDevice; use crate::usb::backend::fido_backend::fido_passthrough::FidoPassthroughDevice; use crate::usb::backend::utils::UsbUtilEventHandler; -use crate::utils::AsyncJobQueue; use crate::utils::EventLoop; /// Utility function to attach a security key device to the backend provider. It initializes a @@ -22,7 +21,6 @@ pub(crate) fn attach_security_key( hidraw: File, event_loop: Arc<EventLoop>, - job_queue: Arc<AsyncJobQueue>, device_state: DeviceState, ) -> Result<(Arc<Mutex<BackendDeviceType>>, Arc<UsbUtilEventHandler>)> { let device = @@ -35,7 +33,7 @@ .map_err(Error::CreateFidoBackendDevice)?; let device_impl = BackendDeviceType::FidoDevice(passthrough_device); let arc_mutex_device = Arc::new(Mutex::new(device_impl)); - let event_handler = UsbUtilEventHandler::new(arc_mutex_device.clone(), event_loop, job_queue); + let event_handler = UsbUtilEventHandler::new(arc_mutex_device.clone(), event_loop); Ok((arc_mutex_device, event_handler)) }
diff --git a/devices/src/usb/backend/host_backend/host_backend_device_provider.rs b/devices/src/usb/backend/host_backend/host_backend_device_provider.rs index 62ddac0..b824b0a 100644 --- a/devices/src/usb/backend/host_backend/host_backend_device_provider.rs +++ b/devices/src/usb/backend/host_backend/host_backend_device_provider.rs
@@ -14,7 +14,6 @@ use crate::usb::backend::error::Result; use crate::usb::backend::host_backend::host_device::HostDevice; use crate::usb::backend::utils::UsbUtilEventHandler; -use crate::utils::AsyncJobQueue; use crate::utils::EventLoop; /// Attaches a host device to the backend. This creates a host USB device object by opening a @@ -22,14 +21,13 @@ pub(crate) fn attach_host_backend_device( usb_file: File, event_loop: Arc<EventLoop>, - job_queue: Arc<AsyncJobQueue>, device_state: DeviceState, ) -> Result<(Arc<Mutex<BackendDeviceType>>, Arc<UsbUtilEventHandler>)> { let device = Device::new(usb_file).map_err(Error::CreateHostUsbDevice)?; let host_device = HostDevice::new(Arc::new(Mutex::new(device)), device_state)?; let device_impl = BackendDeviceType::HostDevice(host_device); let arc_mutex_device = Arc::new(Mutex::new(device_impl)); - let event_handler = UsbUtilEventHandler::new(arc_mutex_device.clone(), event_loop, job_queue); + let event_handler = UsbUtilEventHandler::new(arc_mutex_device.clone(), event_loop); Ok((arc_mutex_device, event_handler)) }
diff --git a/devices/src/usb/backend/host_backend/host_device.rs b/devices/src/usb/backend/host_backend/host_device.rs index 13ba119..6b90c5f 100644 --- a/devices/src/usb/backend/host_backend/host_device.rs +++ b/devices/src/usb/backend/host_backend/host_device.rs
@@ -223,6 +223,18 @@ )) } + fn build_isochronous_transfer( + &mut self, + ep_addr: u8, + transfer_buffer: TransferBuffer, + packet_size: u32, + ) -> Result<BackendTransferType> { + Ok(BackendTransferType::HostDevice( + Transfer::new_isochronous(ep_addr, transfer_buffer, packet_size) + .map_err(Error::CreateTransfer)?, + )) + } + fn get_control_transfer_state(&mut self) -> Arc<RwLock<ControlTransferState>> { self.control_transfer_state.clone() }
diff --git a/devices/src/usb/backend/utils.rs b/devices/src/usb/backend/utils.rs index c7ad3fb..d35ad99 100644 --- a/devices/src/usb/backend/utils.rs +++ b/devices/src/usb/backend/utils.rs
@@ -17,7 +17,6 @@ use crate::usb::backend::error::Result; use crate::usb::xhci::usb_hub::UsbPort; use crate::usb::xhci::xhci_transfer::XhciTransferState; -use crate::utils::AsyncJobQueue; use crate::utils::EventHandler; use crate::utils::EventLoop; @@ -68,7 +67,6 @@ pub(crate) struct UsbUtilEventHandler { pub device: Arc<Mutex<BackendDeviceType>>, pub event_loop: Arc<EventLoop>, - pub job_queue: Arc<AsyncJobQueue>, pub port: Mutex<Weak<UsbPort>>, pub self_ref: Mutex<Option<Arc<UsbUtilEventHandler>>>, } @@ -98,7 +96,7 @@ !port.is_attached() }); - if is_detached && self.device.lock().can_finalize() { + if is_detached { self.signal_detach(); } @@ -113,12 +111,10 @@ pub(crate) fn new( device: Arc<Mutex<BackendDeviceType>>, event_loop: Arc<EventLoop>, - job_queue: Arc<AsyncJobQueue>, ) -> Arc<Self> { Arc::new(Self { device, event_loop, - job_queue, port: Mutex::new(Weak::new()), self_ref: Mutex::new(None), }) @@ -143,27 +139,55 @@ /// Signal this handler to perform the host resource cleanup. pub fn signal_detach(&self) { - let self_ref = self.self_ref.lock(); - if let Some(self_arc) = &*self_ref { - let self_clone = self_arc.clone(); - if let Err(e) = self.job_queue.queue_job(move || { - if self_clone.device.lock().can_finalize() { - if let Err(e) = self_clone.finalize_detach() { - error!("failed to finalize USB detachment: {}", e); + // self_ref acts as our atomic flag. Only the first caller (i.e., manual detach vs unplug) + // will succeed. + let self_arc = match self.self_ref.lock().take() { + Some(arc) => arc, + None => return, // Detach process is already running. + }; + + std::thread::spawn(move || { + let mut retries = 0; + loop { + let can_finalize = { + let mut device = self_arc.device.lock(); + if let BackendDeviceType::HostDevice(host_device) = &mut *device { + let _ = host_device.device.lock().poll_transfers(); } + device.can_finalize() + }; + + if can_finalize { + break; } - }) { - error!("failed to queue USB detach job: {}", e); + + // When a device is unplugged from the host, the in-flight URBs will be aborted and + // queued back to the completion list. However, there's a small window where the + // completion list is empty because the actual hardware has not responded to the + // host kernel. Since there's no way for the user space to wait for the host kernel + // to queue them back, we keep polling here. + std::thread::sleep(std::time::Duration::from_millis(100)); + retries += 1; + + // Use a generous 30-second timeout. The guest's StopEndpointCommand typically + // times out in 5 seconds. We want to wait much longer to guarantee the host kernel + // has time to finish aborting the URBs. In theory, we shouldn't hit this timeout, + // because the host driver would also time out around the same time and reset the + // HC, making all the in-flight URBs reap-able. + if retries > 300 { + error!("Timeout waiting for host device to release all URBs."); + break; + } } - } + + if let Err(e) = self_arc.finalize_detach() { + error!("failed to finalize USB detachment: {}", e); + } + }); } /// Clean up the resources and remove itself from the event loop. pub(crate) fn finalize_detach(&self) -> Result<()> { - if self.self_ref.lock().take().is_none() { - return Ok(()); - } - let mut device = self.device.lock(); if let BackendDeviceType::HostDevice(host_device) = &mut *device { host_device.device.lock().drop_dma_buffer();
diff --git a/devices/src/usb/xhci/device_slot.rs b/devices/src/usb/xhci/device_slot.rs index e8c829f..be0be8d 100644 --- a/devices/src/usb/xhci/device_slot.rs +++ b/devices/src/usb/xhci/device_slot.rs
@@ -1104,6 +1104,20 @@ } } } + + pub fn get_max_esit_payload(&self, endpoint_id: u8) -> Result<u32> { + if !valid_endpoint_id(endpoint_id) { + return Err(Error::BadEndpointId(endpoint_id)); + } + + let index = endpoint_id - 1; + let device_context = self.get_device_context()?; + let endpoint_context = device_context.endpoint_context[index as usize]; + + let lo = endpoint_context.get_max_esit_payload_lo() as u32; + let hi = endpoint_context.get_max_esit_payload_hi() as u32; + Ok(hi << 16 | lo) + } } #[cfg(test)]
diff --git a/devices/src/usb/xhci/ring_buffer_controller.rs b/devices/src/usb/xhci/ring_buffer_controller.rs index c978876..d7141e2 100644 --- a/devices/src/usb/xhci/ring_buffer_controller.rs +++ b/devices/src/usb/xhci/ring_buffer_controller.rs
@@ -52,7 +52,7 @@ fn handle_transfer_descriptor( &self, descriptor: TransferDescriptor, - complete_event: Event, + trigger_event: Event, ) -> anyhow::Result<()>; /// Cancel transfers. This is used to stop the ring activity as soon as possible when we @@ -240,13 +240,13 @@ fn handle_transfer_descriptor( &self, descriptor: TransferDescriptor, - complete_event: Event, + trigger_event: Event, ) -> anyhow::Result<()> { for atrb in &descriptor { assert_eq!(atrb.trb.get_trb_type().unwrap(), TrbType::Normal); self.sender.send(atrb.trb.get_parameter() as i32).unwrap(); } - complete_event.signal().unwrap(); + trigger_event.signal().unwrap(); Ok(()) } } @@ -260,13 +260,13 @@ fn handle_transfer_descriptor( &self, descriptor: TransferDescriptor, - complete_event: Event, + trigger_event: Event, ) -> anyhow::Result<()> { let mut locked = self.processing.lock(); for a in locked.iter() { self.sender.send(*a).unwrap(); } - complete_event.signal().unwrap(); + trigger_event.signal().unwrap(); *locked = descriptor.iter().map(|atrb| atrb.gpa).collect(); Ok(()) }
diff --git a/devices/src/usb/xhci/transfer_ring_controller.rs b/devices/src/usb/xhci/transfer_ring_controller.rs index 218932e..6b3e648 100644 --- a/devices/src/usb/xhci/transfer_ring_controller.rs +++ b/devices/src/usb/xhci/transfer_ring_controller.rs
@@ -47,7 +47,7 @@ fn handle_transfer_descriptor( &self, descriptor: TransferDescriptor, - completion_event: Event, + trigger_event: Event, ) -> anyhow::Result<()> { let xhci_transfer = self.transfer_manager.create_transfer( self.mem.clone(), @@ -56,7 +56,7 @@ self.slot_id, self.endpoint_id, descriptor, - completion_event, + trigger_event, self.stream_id, ); xhci_transfer
diff --git a/devices/src/usb/xhci/xhci_transfer.rs b/devices/src/usb/xhci/xhci_transfer.rs index a825bbb..e69da2c 100644 --- a/devices/src/usb/xhci/xhci_transfer.rs +++ b/devices/src/usb/xhci/xhci_transfer.rs
@@ -56,6 +56,8 @@ CreateBuffer(BufferError), #[error("cannot detach from port: {0}")] DetachPort(HubError), + #[error("failed to get max payload length for ep: {0}")] + GetMaxPayload(u8), #[error("failed to halt the endpoint: {0}")] HaltEndpoint(u8), #[error("failed to read guest memory: {0}")] @@ -200,7 +202,7 @@ slot_id: u8, endpoint_id: u8, transfer_descriptor: TransferDescriptor, - completion_event: Event, + trigger_event: Event, stream_id: Option<u16>, ) -> XhciTransfer { let transfer_dir = { @@ -218,11 +220,11 @@ mem, port, interrupter, - transfer_completion_event: completion_event, slot_id, endpoint_id, transfer_dir, transfer_descriptor, + trigger_event, device_slot: self.device_slot.clone(), stream_id, }; @@ -286,7 +288,7 @@ endpoint_id: u8, transfer_dir: TransferDirection, transfer_descriptor: TransferDescriptor, - transfer_completion_event: Event, + trigger_event: Event, device_slot: Weak<DeviceSlot>, stream_id: Option<u16>, } @@ -383,12 +385,24 @@ self.stream_id } + /// get max payload length for Isochronous transfer. + pub fn get_max_payload(&self) -> Result<u32> { + let Some(device_slot) = self.device_slot.upgrade() else { + return Err(Error::GetMaxPayload(self.endpoint_id)); + }; + device_slot + .get_max_esit_payload(self.endpoint_id) + .map_err(|_| Error::GetMaxPayload(self.endpoint_id)) + } + fn process_td_results( &self, status: &TransferStatus, bytes_transferred: u32, ) -> Result<Vec<TransferAction>> { let mut actions = Vec::new(); + // This can be processed first, because xHCI specification does not guarantee the order + // between the context update and the transfer completion event. if *status == TransferStatus::Stalled { warn!("xhci: endpoint is stalled. set state to Halted"); actions.push(TransferAction::HaltEndpoint); @@ -506,16 +520,10 @@ TransferStatus::NoDevice => { info!("xhci: device disconnected, detaching from port"); // Actual port detachment is handled by the UsbUtilEventHandler. - return self - .transfer_completion_event - .signal() - .map_err(Error::WriteCompletionEvent); + return self.send_late_trigger(); } TransferStatus::Cancelled => { - return self - .transfer_completion_event - .signal() - .map_err(Error::WriteCompletionEvent); + return self.send_late_trigger(); } TransferStatus::Completed => {} TransferStatus::Stalled => { @@ -567,9 +575,7 @@ // it to the end so that its error, if it ever occurs, won't cause the above transfer // events to be omitted. if !halted { - self.transfer_completion_event - .signal() - .map_err(Error::WriteCompletionEvent) + self.send_late_trigger() } else { Ok(()) } @@ -578,26 +584,28 @@ /// Send this transfer to backend if it's a valid transfer. pub fn send_to_backend_if_valid(self) -> Result<()> { if self.validate_transfer()? { - // Backend should invoke on transfer complete when transfer is completed. + // Backend should call on_transfer_complete() when the transfer is completed to generate + // events. The ring controller is also triggered there for bulk & interrupt transfers. + // For an isochronous transfer, the ring controller is triggered here without waiting + // for the completion. let port = self.port.clone(); let mut backend = port.backend_device(); match &mut *backend { - Some(backend) => backend - .lock() - .submit_xhci_transfer(self) - .map_err(|_| Error::SubmitTransfer)?, + Some(backend) => { + self.send_early_trigger()?; + backend + .lock() + .submit_xhci_transfer(self) + .map_err(|_| Error::SubmitTransfer)? + } None => { error!("backend is already disconnected"); - self.transfer_completion_event - .signal() - .map_err(Error::WriteCompletionEvent)?; + self.send_trigger()?; } } } else { error!("invalid td on transfer ring"); - self.transfer_completion_event - .signal() - .map_err(Error::WriteCompletionEvent)?; + self.send_trigger()?; } Ok(()) } @@ -624,6 +632,32 @@ } Ok(valid) } + + // ISOC transfer posts many small descriptors at once. We give an early signal to the ring + // controller so that we can keep up with the timing requirements. + fn needs_early_trigger(&self) -> bool { + matches!(self.get_transfer_type(), Ok(XhciTransferType::Isochronous)) + } + + fn send_early_trigger(&self) -> Result<()> { + if self.needs_early_trigger() { + self.send_trigger()?; + } + Ok(()) + } + + fn send_late_trigger(&self) -> Result<()> { + if !self.needs_early_trigger() { + self.send_trigger()?; + } + Ok(()) + } + + fn send_trigger(&self) -> Result<()> { + self.trigger_event + .signal() + .map_err(Error::WriteCompletionEvent) + } } fn trb_is_valid(atrb: &AddressedTrb) -> bool { @@ -714,7 +748,7 @@ Event::new().unwrap(), &xhci_regs, ))), - transfer_completion_event: Event::new().unwrap(), + trigger_event: Event::new().unwrap(), slot_id: 1, endpoint_id: 2, transfer_dir: TransferDirection::Out,
diff --git a/infra/README.md b/infra/README.md index 8ec6cb7..d4c7559 100644 --- a/infra/README.md +++ b/infra/README.md
@@ -13,7 +13,7 @@ A few links to relevant documentation needed to write recipes: - [Recipe Engine](https://chromium.googlesource.com/infra/luci/recipes-py.git/+/HEAD/README.recipes.md) -- [Depot Tools Recipes](https://chromium.googlesource.com/chromium/tools/depot_tools.git/+/HEAD/recipes/README.recipes.md)) +- [Depot Tools Recipes](https://chromium.googlesource.com/chromium/tools/depot_tools.git/+/HEAD/recipes/README.recipes.md) - [ChromiumOS Recipes](https://chromium.googlesource.com/chromiumos/infra/recipes.git/+/HEAD/README.recipes.md) Luci also provides a @@ -30,7 +30,7 @@ Recipes must have 100% code coverage to have tests pass. Tests can be run with: -``` +```shell cd infra && ./recipes.py test run ``` @@ -40,7 +40,7 @@ To regenerate the expectation files, run: -``` +```shell cd infra && ./recipes.py test train ``` @@ -51,7 +51,7 @@ We try to build our recipes to work well locally, so for example build_linux.py can be invoked in the recipe engine via: -``` +```shell cd infra && ./recipes.py run build_linux ``` @@ -64,10 +64,13 @@ ### Testing recipes on a bot (Googlers only) -Note: See internal [crosvm/infra](http://go/crosvm/infra) documentation on access control. +Note: The following led commands require crosvm-acl-luci-admin ACL group membership. See internal +[crosvm/infra](http://go/crosvm/infra) for more access information. -Some things cannot be tested locally and need to be run on one of our build bots. This can be done -with the [led](http://go/luci-how-to-led) tool. +A local run cannot faithfully reproduce the environment of the build bots. Constraints such as the +OS, installed packages, and available hardware are not the same. Therefore, some things cannot be +tested locally and need to be verified on one of our build bots. This can be done with the +[led](http://go/luci-how-to-led) tool. Commonly used led commands are: @@ -84,9 +87,9 @@ To test a local recipe change, you can launch a post-submit build using `led`. First `git commit` your recipe changes locally, then combine the led commands to: -``` +```shell led get-builder luci.crosvm.ci:linux_x86_64 - | led-edit-recipe-bundle + | led edit-recipe-bundle | led launch ``` @@ -105,10 +108,10 @@ So to test, first `git commit` and `./tools/cl upload` your local changes. Then build a job definition to run: -``` +```shell led get-builder luci.crosvm.try:linux_x86_64 - | led-edit-recipe-bundle - | led-edit-cr-cl $GERRIT_URL + | led edit-recipe-bundle + | led edit-cr-cl $GERRIT_URL | led launch ``` @@ -117,15 +120,27 @@ #### Testing a new recipe -This is a little tricker, but you can change the recipe name that is launched by `led`. So you can: +A new recipe can be tested by hijacking an existing builder to run the new recipe. -``` -led get-builder luci.crosvm.ci:linux_x86_64 > job.json +A job can be exported to a json file, edited, and then launched: + +```shell +led get-builder luci.crosvm.ci:linux_x86_64 + | led edit-recipe-bundle > job.json +vim job.json # edit the job definition to change the recipe used +# run it on swarming with job.json as input. +# Note that if the command is piped, like +# `led edit-gerrit-cl <CL patch> | led launch`, then job.json should be input of +# `led edit-gerrit-cl`, instead of `led launch`, so the command will be +# `led edit-gerrit-cl <CL patch> < job.json | led launch` +led launch < job.json ``` -Then edit the `job.json` file to use the newly added recipe, and launch the job using the local -version of recipes. +Alternatively, `led edit` can be used to override the recipe name and parameters: -``` -cat job.json | led-edit-recipe-bundle | led launch +```shell +led get-builder luci.crosvm.ci:linux_x86_64 + | led edit -r new_recipe -p new_recipe_parameters=value + | led edit-recipe-bundle + | led launch ```
diff --git a/infra/README.recipes.md b/infra/README.recipes.md index 659f948..99875b3 100644 --- a/infra/README.recipes.md +++ b/infra/README.recipes.md
@@ -199,11 +199,11 @@ — **def [RunSteps](/infra/recipes/uprev_baguette_image.py#37)(api: RecipeApi, properties: UprevBaguetteImageProperties):** -[depot_tools/recipe_modules/bot_update]: https://chromium.googlesource.com/chromium/tools/depot_tools.git/+/0ea4801192281fff1675855b6c03fd71811133e2/recipes/README.recipes.md#recipe_modules-bot_update -[depot_tools/recipe_modules/depot_tools]: https://chromium.googlesource.com/chromium/tools/depot_tools.git/+/0ea4801192281fff1675855b6c03fd71811133e2/recipes/README.recipes.md#recipe_modules-depot_tools -[depot_tools/recipe_modules/gclient]: https://chromium.googlesource.com/chromium/tools/depot_tools.git/+/0ea4801192281fff1675855b6c03fd71811133e2/recipes/README.recipes.md#recipe_modules-gclient -[depot_tools/recipe_modules/git]: https://chromium.googlesource.com/chromium/tools/depot_tools.git/+/0ea4801192281fff1675855b6c03fd71811133e2/recipes/README.recipes.md#recipe_modules-git -[depot_tools/recipe_modules/gsutil]: https://chromium.googlesource.com/chromium/tools/depot_tools.git/+/0ea4801192281fff1675855b6c03fd71811133e2/recipes/README.recipes.md#recipe_modules-gsutil +[depot_tools/recipe_modules/bot_update]: https://chromium.googlesource.com/chromium/tools/depot_tools.git/+/442cbd6d584d2c992ca9bcc19ecbd2235bea772e/recipes/README.recipes.md#recipe_modules-bot_update +[depot_tools/recipe_modules/depot_tools]: https://chromium.googlesource.com/chromium/tools/depot_tools.git/+/442cbd6d584d2c992ca9bcc19ecbd2235bea772e/recipes/README.recipes.md#recipe_modules-depot_tools +[depot_tools/recipe_modules/gclient]: https://chromium.googlesource.com/chromium/tools/depot_tools.git/+/442cbd6d584d2c992ca9bcc19ecbd2235bea772e/recipes/README.recipes.md#recipe_modules-gclient +[depot_tools/recipe_modules/git]: https://chromium.googlesource.com/chromium/tools/depot_tools.git/+/442cbd6d584d2c992ca9bcc19ecbd2235bea772e/recipes/README.recipes.md#recipe_modules-git +[depot_tools/recipe_modules/gsutil]: https://chromium.googlesource.com/chromium/tools/depot_tools.git/+/442cbd6d584d2c992ca9bcc19ecbd2235bea772e/recipes/README.recipes.md#recipe_modules-gsutil [recipe_engine/recipe_modules/buildbucket]: https://chromium.googlesource.com/infra/luci/recipes-py.git/+/363e865839c69e49ce2f6b6857bed6a66eae0dca/README.recipes.md#recipe_modules-buildbucket [recipe_engine/recipe_modules/cipd]: https://chromium.googlesource.com/infra/luci/recipes-py.git/+/363e865839c69e49ce2f6b6857bed6a66eae0dca/README.recipes.md#recipe_modules-cipd [recipe_engine/recipe_modules/context]: https://chromium.googlesource.com/infra/luci/recipes-py.git/+/363e865839c69e49ce2f6b6857bed6a66eae0dca/README.recipes.md#recipe_modules-context
diff --git a/infra/config/recipes.cfg b/infra/config/recipes.cfg index 43d32e9..d6117b6 100644 --- a/infra/config/recipes.cfg +++ b/infra/config/recipes.cfg
@@ -18,7 +18,7 @@ "deps": { "depot_tools": { "branch": "refs/heads/main", - "revision": "0ea4801192281fff1675855b6c03fd71811133e2", + "revision": "442cbd6d584d2c992ca9bcc19ecbd2235bea772e", "url": "https://chromium.googlesource.com/chromium/tools/depot_tools.git" }, "recipe_engine": {
diff --git a/riscv64/src/lib.rs b/riscv64/src/lib.rs index 66cc9c1..4058a1e 100644 --- a/riscv64/src/lib.rs +++ b/riscv64/src/lib.rs
@@ -364,7 +364,7 @@ }; // Creates vcpus early as the irqchip needs them created to attach interrupts. - let vcpu_count = components.vcpu_count; + let vcpu_count = components.vcpu_properties.len(); let mut vcpus = Vec::with_capacity(vcpu_count); for vcpu_id in 0..vcpu_count { let vcpu: Vcpu = *vm @@ -419,7 +419,7 @@ pci_cfg, &pci_ranges, dev_resources, - components.vcpu_count as u32, + components.vcpu_properties.len() as u32, fdt_offset, aia_num_ids, aia_num_sources, @@ -439,7 +439,7 @@ Ok(RunnableLinuxVm { vm, - vcpu_count: components.vcpu_count, + vcpu_count: components.vcpu_properties.len(), vcpus: Some(vcpus), vcpu_init, vcpu_affinity: components.vcpu_affinity,
diff --git a/src/crosvm/config.rs b/src/crosvm/config.rs index 235cba6..07eda43 100644 --- a/src/crosvm/config.rs +++ b/src/crosvm/config.rs
@@ -1248,12 +1248,12 @@ fn default_vcpu_affinity_map( vcpu_count: usize, max_cores: usize, - is_cpu_online: impl Fn(usize) -> base::Result<bool>, + is_cpu_online: impl Fn(usize) -> bool, ) -> BTreeMap<usize, CpuSet> { let mut affinity_map = BTreeMap::new(); let mut vcpu_id = 0; for cpu_id in 0..max_cores { - if is_cpu_online(cpu_id).expect("Couldn't check if cpu is online") { + if is_cpu_online(cpu_id) { affinity_map.insert(vcpu_id, CpuSet::new([cpu_id])); vcpu_id += 1; } @@ -2409,7 +2409,7 @@ #[test] fn test_default_vcpu_affinity_map() { // Simple 1:1 mapping of vcpu:cpu. - let affinity = default_vcpu_affinity_map(4, 4, |_| Ok(true)); + let affinity = default_vcpu_affinity_map(4, 4, |_| true); assert_eq!(affinity.len(), 4); assert_eq!(affinity.get(&0), Some(&CpuSet::new([0]))); assert_eq!(affinity.get(&1), Some(&CpuSet::new([1]))); @@ -2417,7 +2417,7 @@ assert_eq!(affinity.get(&3), Some(&CpuSet::new([3]))); // cpu 1 is offline, so skip it when assigning vcpu's. - let affinity = default_vcpu_affinity_map(3, 4, |id| Ok(id != 1)); + let affinity = default_vcpu_affinity_map(3, 4, |id| id != 1); assert_eq!(affinity.len(), 3); assert_eq!(affinity.get(&0), Some(&CpuSet::new([0]))); assert_eq!(affinity.get(&1), Some(&CpuSet::new([2])));
diff --git a/src/crosvm/sys/linux.rs b/src/crosvm/sys/linux.rs index def3e5b..8a3214e 100644 --- a/src/crosvm/sys/linux.rs +++ b/src/crosvm/sys/linux.rs
@@ -70,6 +70,7 @@ use arch::VmArch; use arch::VmComponents; use arch::VmImage; +use arch::DEFAULT_CPU_CAPACITY; use argh::FromArgs; use base::ReadNotifier; #[cfg(feature = "balloon")] @@ -1310,7 +1311,10 @@ let mut mapped_capacity = BTreeMap::new(); for vcpu_id in 0..vcpu_count { let pcpu_id = get_representative_pcpu(vcpu_id, vcpu_affinity); - let capacity = host_capacity.get(&pcpu_id).copied().unwrap_or(1024); + let capacity = host_capacity + .get(&pcpu_id) + .copied() + .unwrap_or(DEFAULT_CPU_CAPACITY); mapped_capacity.insert(vcpu_id, capacity); } Ok(mapped_capacity) @@ -1408,9 +1412,9 @@ (None, 0) }; - #[cfg(target_arch = "aarch64")] // Maps vcpu -> the corresponding vcpu's frequency. - let mut vcpu_frequencies = BTreeMap::new(); + #[allow(unused_mut)] + let mut vcpu_frequencies: BTreeMap<usize, Vec<u32>> = BTreeMap::new(); #[cfg(target_arch = "aarch64")] let mut normalized_cpu_ipc_ratios = BTreeMap::new(); @@ -1502,7 +1506,12 @@ ) }), host_max_freq, - |vcpu_id| cpu_ipc_ratio.get(&vcpu_id).copied().unwrap_or(1024), + |vcpu_id| { + cpu_ipc_ratio + .get(&vcpu_id) + .copied() + .unwrap_or(DEFAULT_CPU_CAPACITY) + }, )?; if !cfg.cpu_freq_domains.is_empty() { @@ -1552,6 +1561,29 @@ } } + let vcpu_count = cfg.vcpu_count.unwrap_or(1); + let vcpu_properties = arch::derive_vcpu_properties( + vcpu_count, + &vcpu_capacity, + &cfg.dynamic_power_coefficient, + &vcpu_frequencies, + #[cfg(all( + target_arch = "aarch64", + any(target_os = "android", target_os = "linux") + ))] + &normalized_cpu_ipc_ratios, + #[cfg(all( + target_arch = "aarch64", + any(target_os = "android", target_os = "linux") + ))] + &vcpu_domains, + #[cfg(all( + target_arch = "aarch64", + any(target_os = "android", target_os = "linux") + ))] + &vcpu_domain_paths, + ); + Ok(VmComponents { #[cfg(target_arch = "x86_64")] ac_adapter: cfg.ac_adapter, @@ -1565,20 +1597,11 @@ swiotlb, fw_cfg_enable, bootorder_fw_cfg_blob: Vec::new(), - vcpu_count: cfg.vcpu_count.unwrap_or(1), + vcpu_properties, vcpu_affinity: cfg.vcpu_affinity.clone(), - #[cfg(target_arch = "aarch64")] - vcpu_domains, - #[cfg(target_arch = "aarch64")] - vcpu_domain_paths, - #[cfg(target_arch = "aarch64")] - vcpu_frequencies, fw_cfg_parameters: cfg.fw_cfg_parameters.clone(), vcpu_clusters, - vcpu_capacity, dev_pm: cfg.dev_pm, - #[cfg(target_arch = "aarch64")] - normalized_cpu_ipc_ratios, no_smt: cfg.no_smt, hugepages: cfg.hugepages, hv_cfg: hypervisor::Config { @@ -1623,7 +1646,6 @@ force_s2idle: cfg.force_s2idle, pvm_fw: pvm_fw_image, pci_config: cfg.pci_config, - dynamic_power_coefficient: cfg.dynamic_power_coefficient.clone(), boot_cpu: cfg.boot_cpu, vfio_platform_pm: cfg.vfio_platform_pm, #[cfg(target_arch = "aarch64")] @@ -1826,7 +1848,7 @@ IrqChipKind::Userspace => bail!("Geniezone does not support userspace irqchip mode"), IrqChipKind::Kernel { allow_vgic_its: _ } => { ioapic_host_tube = None; - GeniezoneKernelIrqChip::new(vm_clone, components.vcpu_count) + GeniezoneKernelIrqChip::new(vm_clone, components.vcpu_properties.len()) .context("failed to create IRQ chip")? } }; @@ -1886,7 +1908,7 @@ IrqChipKind::Userspace => bail!("Halla does not support userspace irqchip mode"), IrqChipKind::Kernel { allow_vgic_its: _ } => { ioapic_host_tube = None; - HallaKernelIrqChip::new(vm_clone, components.vcpu_count) + HallaKernelIrqChip::new(vm_clone, components.vcpu_properties.len()) .context("failed to create IRQ chip")? } }; @@ -1978,7 +2000,7 @@ KvmIrqChip::Split( KvmSplitIrqChip::new( vm_clone, - components.vcpu_count, + components.vcpu_properties.len(), ioapic_device_tube, Some(24), ) @@ -1994,7 +2016,7 @@ KvmIrqChip::Kernel( KvmKernelIrqChip::new( vm_clone, - components.vcpu_count, + components.vcpu_properties.len(), #[cfg(target_arch = "aarch64")] allow_vgic_its, ) @@ -2478,7 +2500,16 @@ .collect::<Result<Vec<DtbOverlay>>>()?; #[cfg(target_arch = "aarch64")] - let vcpu_domain_paths = components.vcpu_domain_paths.clone(); + let vcpu_domain_paths: BTreeMap<usize, PathBuf> = components + .vcpu_properties + .iter() + .filter_map(|(id, props)| { + props + .vcpu_domain_path + .as_ref() + .map(|path| (*id, path.clone())) + }) + .collect(); let mut linux = Arch::build_vm::<V, Vcpu>( components, @@ -4017,9 +4048,9 @@ }; let (to_vcpu_channel, from_main_channel) = mpsc::channel(); - let vcpu_affinity = match linux.vcpu_affinity.clone() { - Some(VcpuAffinity::Global(v)) => v, - Some(VcpuAffinity::PerVcpu(mut m)) => m.remove(&cpu_id).unwrap_or_default(), + let vcpu_affinity = match &linux.vcpu_affinity { + Some(VcpuAffinity::Global(v)) => v.clone(), + Some(VcpuAffinity::PerVcpu(m)) => m.get(&cpu_id).cloned().unwrap_or_default(), None => Default::default(), }; @@ -4288,6 +4319,10 @@ info!("vcpu crashed"); exit_state = ExitState::Crash; } + VmEventType::GuestPanic => { + info!("guest panic event"); + exit_state = ExitState::GuestPanic; + } VmEventType::DeviceCrashed => { info!("device crashed"); exit_state = ExitState::Crash; @@ -5619,7 +5654,12 @@ ) }), host_max_freq, - |cpu_id| cpu_ipc_ratio.get(&cpu_id).copied().unwrap_or(1024), + |cpu_id| { + cpu_ipc_ratio + .get(&cpu_id) + .copied() + .unwrap_or(DEFAULT_CPU_CAPACITY) + }, ) .expect("normalize_cpu_ipc_ratios failed");
diff --git a/src/crosvm/sys/linux/vcpu.rs b/src/crosvm/sys/linux/vcpu.rs index c93c7d2..5d53521 100644 --- a/src/crosvm/sys/linux/vcpu.rs +++ b/src/crosvm/sys/linux/vcpu.rs
@@ -457,7 +457,7 @@ } Ok(VcpuExit::SystemEventCrash) => { info!("system crash event on vcpu {}", cpu_id); - return ExitState::Stop; + return ExitState::GuestPanic; } Ok(VcpuExit::Debug) => { #[cfg(feature = "gdb")] @@ -643,8 +643,7 @@ ExitState::Stop => VmEventType::Exit, ExitState::Reset => VmEventType::Reset, ExitState::Crash => VmEventType::Crash, - // vcpu_loop doesn't exit with GuestPanic. - ExitState::GuestPanic => unreachable!(), + ExitState::GuestPanic => VmEventType::GuestPanic, ExitState::WatchdogReset => VmEventType::WatchdogReset, }; if let Err(e) = vm_evt_wrtube.send::<VmEventType>(&final_event_data) {
diff --git a/src/sys/windows.rs b/src/sys/windows.rs index a0a4c57..0a9b661 100644 --- a/src/sys/windows.rs +++ b/src/sys/windows.rs
@@ -988,6 +988,10 @@ error!("got pvpanic event. this event is not expected on Windows."); None } + VmEventType::GuestPanic => { + info!("guest panic event"); + Some(ExitState::GuestPanic) + } VmEventType::WatchdogReset => { info!("vcpu stall detected"); Some(ExitState::WatchdogReset) @@ -2110,12 +2114,16 @@ .checked_mul(1024 * 1024) .ok_or_else(|| anyhow!("requested memory size too large"))?, swiotlb, - vcpu_count: cfg.vcpu_count.unwrap_or(1), fw_cfg_enable: false, bootorder_fw_cfg_blob: Vec::new(), vcpu_affinity: cfg.vcpu_affinity.clone(), vcpu_clusters: cfg.cpu_clusters.clone(), - vcpu_capacity: cfg.cpu_capacity.clone(), + vcpu_properties: arch::derive_vcpu_properties( + cfg.vcpu_count.unwrap_or(1), + &cfg.cpu_capacity, + &cfg.dynamic_power_coefficient, + &Default::default(), + ), dev_pm: None, no_smt: cfg.no_smt, hugepages: cfg.hugepages, @@ -2161,7 +2169,6 @@ #[cfg(target_arch = "x86_64")] smbios: cfg.smbios.clone(), smccc_trng: cfg.smccc_trng, - dynamic_power_coefficient: cfg.dynamic_power_coefficient.clone(), #[cfg(target_arch = "x86_64")] break_linux_pci_config_io: cfg.break_linux_pci_config_io, boot_cpu: cfg.boot_cpu, @@ -2385,8 +2392,10 @@ let vm = create_haxm_vm(haxm, guest_mem, &cfg.kernel_log_file)?; let (ioapic_host_tube, ioapic_device_tube) = Tube::pair().exit_context(Exit::CreateTube, "failed to create tube")?; - let irq_chip = - create_userspace_irq_chip::<HaxmVcpu>(components.vcpu_count, ioapic_device_tube)?; + let irq_chip = create_userspace_irq_chip::<HaxmVcpu>( + components.vcpu_properties.len(), + ioapic_device_tube, + )?; run_vm::<HaxmVcpu, HaxmVm>( cfg, components, @@ -2423,7 +2432,7 @@ let vm = create_whpx_vm( whpx, guest_mem, - components.vcpu_count, + components.vcpu_properties.len(), no_smt, apic_emulation_supported && irq_chip == IrqChipKind::Split, cfg.force_calibrated_tsc_leaf, @@ -2445,7 +2454,7 @@ } IrqChipKind::Userspace => { WindowsIrqChip::Userspace(create_userspace_irq_chip::<WhpxVcpu>( - components.vcpu_count, + components.vcpu_properties.len(), ioapic_device_tube, )?) } @@ -2472,14 +2481,14 @@ IrqChipKind::Split => unimplemented!("Split irqchip mode not supported by GVM"), IrqChipKind::Kernel => { ioapic_host_tube = None; - WindowsIrqChip::Gvm(create_gvm_irq_chip(&vm, components.vcpu_count)?) + WindowsIrqChip::Gvm(create_gvm_irq_chip(&vm, components.vcpu_properties.len())?) } IrqChipKind::Userspace => { let (host_tube, ioapic_device_tube) = Tube::pair().exit_context(Exit::CreateTube, "failed to create tube")?; ioapic_host_tube = Some(host_tube); WindowsIrqChip::Userspace(create_userspace_irq_chip::<GvmVcpu>( - components.vcpu_count, + components.vcpu_properties.len(), ioapic_device_tube, )?) } @@ -2596,7 +2605,8 @@ .context("failed to calculate init balloon size")?; let tsc_state = devices::tsc::tsc_state().exit_code(Exit::TscCalibrationFailed)?; - let tsc_sync_mitigations = get_tsc_sync_mitigations(&tsc_state, components.vcpu_count); + let tsc_sync_mitigations = + get_tsc_sync_mitigations(&tsc_state, components.vcpu_properties.len()); if tsc_state.core_grouping.size() > 1 { // Host TSCs are not in sync, log a metric about it.
diff --git a/usb_util/src/device.rs b/usb_util/src/device.rs index 99339c8..d94cd62 100644 --- a/usb_util/src/device.rs +++ b/usb_util/src/device.rs
@@ -407,8 +407,7 @@ /// Check if the device is ready to be detached, i.e., if we have reaped all the transfers /// we've submitted to the host. Returns true when ready. pub fn ready_to_detach(&self) -> bool { - self.is_detaching() - && (self.is_device_lost() || self.is_unrecoverable() || self.no_in_flight_transfer()) + self.is_detaching() && (self.is_unrecoverable() || self.no_in_flight_transfer()) } fn is_unrecoverable(&self) -> bool { @@ -727,9 +726,45 @@ } /// Create an isochronous transfer. - pub fn new_isochronous(endpoint: u8, buffer: TransferBuffer) -> Result<Transfer> { - // TODO(dverkamp): allow user to specify iso descriptors - Self::new(usb_sys::USBDEVFS_URB_TYPE_ISO, endpoint, buffer, &[]) + pub fn new_isochronous( + endpoint: u8, + buffer: TransferBuffer, + packet_size: u32, + ) -> Result<Transfer> { + let buffer_size: u32 = buffer + .size() + .ok_or(Error::InvalidBuffer)? + .try_into() + .map_err(Error::InvalidBufferLength)?; + // Isochronous transfers divide the buffer into multiple packets. + if buffer_size == 0 || packet_size == 0 { + error!("invalid ISOC parameters: buffer_size={buffer_size}, packet_size={packet_size}"); + return Err(Error::InvalidIsochronousParameters); + } + let count = buffer_size.div_ceil(packet_size); + + let mut iso_packets = vec![ + usb_sys::usbdevfs_iso_packet_desc { + length: packet_size, + actual_length: 0, + status: 0, + }; + count as usize + ]; + let last_entry = iso_packets + .last_mut() + .expect("there should be at least one entry for ISOC packet"); + last_entry.length = buffer_size - packet_size * (count - 1); + + let mut transfer = Self::new( + usb_sys::USBDEVFS_URB_TYPE_ISO, + endpoint, + buffer, + &iso_packets, + )?; + transfer.urb_mut().number_of_packets_or_stream_id = count; + transfer.urb_mut().flags = usb_sys::USBDEVFS_URB_ISO_ASAP; + Ok(transfer) } /// Get the status of a completed transfer.
diff --git a/usb_util/src/error.rs b/usb_util/src/error.rs index 5b35ea1..ab09842 100644 --- a/usb_util/src/error.rs +++ b/usb_util/src/error.rs
@@ -26,6 +26,8 @@ InvalidBuffer, #[error("invalid transfer buffer length: {0}")] InvalidBufferLength(num::TryFromIntError), + #[error("isochronous transfer requires non-zero buffer and packet size")] + InvalidIsochronousParameters, #[error("USB ioctl 0x{0:x} failed: {1}")] IoctlFailed(IoctlNr, base::Error), #[error("USB mmap failed: {0}")]
diff --git a/x86_64/src/lib.rs b/x86_64/src/lib.rs index e7f3850..b4bf11e 100644 --- a/x86_64/src/lib.rs +++ b/x86_64/src/lib.rs
@@ -1069,7 +1069,7 @@ { let mem = vm.get_memory().clone(); - let vcpu_count = components.vcpu_count; + let vcpu_count = components.vcpu_properties.len(); vm.set_identity_map_addr(identity_map_addr_start()) .map_err(Error::SetIdentityMapAddr)?;